# AutoEncoders

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Data Preprocessing

In [3]:
# importing the dataset  
movies = pd.read_csv('ml-1m/movies.dat', sep='::', header = None, engine = 'python', encoding='latin-1')
users = pd.read_csv('ml-1m/users.dat', sep='::', header = None, engine = 'python', encoding='latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', header = None, engine = 'python', encoding='latin-1')

In [4]:
# Preparing the training and the test set
training_set = pd.read_csv('ml-100k/u1.base', header = None, delimiter = '\t')
training_set = np.array(training_set, dtype='int')

In [5]:
test_set = pd.read_csv('ml-100k/u1.test', header = None, delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [6]:
# Getting the number of users and movies
nb_users = max(max(training_set[:,0]),max(test_set[:,0]))
nb_movies = max(max(training_set[:,1]),max(test_set[:,1]))

In [7]:
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        # substitude a list of ratings for a paticular movie
        # print id_movies-1, id_ratings
        ratings[id_movies - 1] = id_ratings
        # print "ratings: {}".format(ratings[id_movies-1])
        
        # Since each user doesn't watch all the movies, some watch 0, 1, 4, 5 with ratings 1, 1, 1, 1
        # when casted as list it would be 1, 1, 0, 0, 1, 1 will include movies not watched  
        new_data.append(list(ratings))
    return new_data

In [8]:
training_set = convert(training_set)
test_set = convert(test_set)

In [9]:
# similar to df.head()
# 1st list: for customer 1, you have list of ratings with index movies - 1, meaning for movie 1 (index 0) has rating 5
# 2nd list: for customer 2, you have list of ratings with index movies - 1, meaning for movie 1 (index 0) has rating 4
def peek(data):
    for i in range(0,10):
        print data[i]
peek(training_set)

[5.0, 3.0, 4.0, 3.0, 3.0, 0.0, 4.0, 1.0, 5.0, 0.0, 2.0, 0.0, 5.0, 0.0, 5.0, 5.0, 0.0, 4.0, 5.0, 0.0, 1.0, 4.0, 0.0, 0.0, 4.0, 3.0, 0.0, 4.0, 1.0, 3.0, 0.0, 5.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 3.0, 2.0, 5.0, 4.0, 0.0, 5.0, 4.0, 0.0, 5.0, 0.0, 5.0, 0.0, 4.0, 0.0, 0.0, 5.0, 0.0, 5.0, 4.0, 5.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 4.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 4.0, 0.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 5.0, 4.0, 5.0, 0.0, 0.0, 0.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 0.0, 5.0, 1.0, 5.0, 0.0, 0.0, 0.0, 5.0, 3.0, 0.0, 0.0, 5.0, 0.0, 0.0, 3.0, 4.0, 5.0, 0.0, 2.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 4.0, 3.0, 5.0, 1.0, 3.0, 0.0, 3.0, 2.0, 0.0, 4.0, 0.0, 4.0, 3.0, 0.0, 2.0, 0.0, 0.0, 5.0, 3.0, 0.0, 0.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 5.0, 5.0, 2.0, 5.0, 5.0, 0.0, 0.0, 5.0, 5.0, 0.0, 0.0, 5.0, 0.0, 5.0, 3.0, 0.0, 5.0, 4.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 5.0, 4.0, 0.0, 4.0, 5.0, 0.0, 5.0, 5.0, 4.0, 0.0,

In [10]:
# tensors are multi-d matrix (pytorch array) with with elements of single data type
# convert data into torch sensors
training_set =  torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [11]:
training_set
test_set


    5     3     4  ...      0     0     0
    4     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
       ...          ⋱          ...       
    5     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
    0     5     0  ...      0     0     0
[torch.FloatTensor of size 943x1682]


    0     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
       ...          ⋱          ...       
    0     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
[torch.FloatTensor of size 943x1682]

In [12]:
# create the architecture of neural network
# stacked autoencoder
class SAE(nn.Module):
    def __init__(self,):
        # get all the inherited classes
        super(SAE, self).__init__()
        # linear transformation/regression
        # number of movies, number of neurons
        # fc1, fc2, fc3 = full connection 1,2,3
        self.fc1 = nn.Linear(nb_movies, 20)
        # number of neurons between first hidden layer and second
        self.fc2 = nn.Linear(20, 10)
        # starting to decode and reconstruct our input layer
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        # specify activation function
        self.activation = nn.Sigmoid()
    
    # different encoding and decoding when forwarding into network
    # forward propogation
    def forward(self, x): # x - input vector
        # activation function on first full connection
        x = self.activation(self.fc1(x)) # returns encoded vector 
        x = self.activation(self.fc2(x)) # x in first hidden layer 20 to 10 while applying sigmoid activation function
        x = self.activation(self.fc3(x)) # decoding now from 10 to larger output 20 before getting back reconstructed vector with original features
        x = self.fc4(x)
        return x
        

In [13]:
sae = SAE()
criterion = nn.MSELoss() # criteron object of class to measure Mean Sq Error 
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5) # lr = learning rate

In [16]:
# training the SAE 
nb_epoch = 200
for epoch in range(1, nb_epoch+1):
    train_loss = 0 
    s = 0. # RMSE
    for id_user in range(nb_users):
        input = Variable(training_set[id_user]).unsqueeze(0) # create additional dimension
        target = input.clone() # copy input into target 
        if torch.sum(target.data > 0) > 0: # check user rated at least one movie 
            output = sae.forward(input) # forward propagation 
            target.require_grad = False # do not compute gradient w.r.t target
            output[target == 0] = 0 # replace the ones with 0e+00 to 0 to conserve memory
            loss = criterion(output, target) # criterion that measures the mean squared error
            mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10) # purpose of mean corrector to consider only movies that got rated and having 1+e-10 helps prevents denominator equal to zero also 
            loss.backward() # switch direction to update weights 
            train_loss += np.sqrt(loss.data[0]*mean_corrector)
            s += 1.
            optimizer.step()
    print 'epoch: {} train_loss: {}'.format(str(epoch), str(train_loss/s)) # loss - average of differences between real rating and predicted rating

epoch: 1 train_loss: 1.0166533591
epoch: 2 train_loss: 1.01648816722
epoch: 3 train_loss: 1.01728712234
epoch: 4 train_loss: 1.01692347657
epoch: 5 train_loss: 1.01676498139
epoch: 6 train_loss: 1.01636142532
epoch: 7 train_loss: 1.01621332291
epoch: 8 train_loss: 1.01587299677
epoch: 9 train_loss: 1.01617079431
epoch: 10 train_loss: 1.01583682413
epoch: 11 train_loss: 1.0160332278
epoch: 12 train_loss: 1.01577860672
epoch: 13 train_loss: 1.01556157728
epoch: 14 train_loss: 1.01544236491
epoch: 15 train_loss: 1.01518248168
epoch: 16 train_loss: 1.01378080219
epoch: 17 train_loss: 1.01231084662
epoch: 18 train_loss: 1.00973034634
epoch: 19 train_loss: 1.00871025556
epoch: 20 train_loss: 1.00567755799
epoch: 21 train_loss: 1.00574068741
epoch: 22 train_loss: 1.00116895156
epoch: 23 train_loss: 0.999807249491
epoch: 24 train_loss: 0.997392474965
epoch: 25 train_loss: 0.994696954089
epoch: 26 train_loss: 0.991578678004
epoch: 27 train_loss: 0.992610588167
epoch: 28 train_loss: 0.9893223076

In [17]:
# Testing the SAE
test_loss = 0
s = 0
for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0) # create additional dimension
    target = Variable(test_set[id_user]) # target contains real answers 
    if torch.sum(target.data > 0) > 0: # check user rated at least one movie 
        output = sae.forward(input) # forward propagation, output -> predictions of other movies that have not been watched 
        target.require_grad = False # do not compute gradient w.r.t target
        output[target == 0] = 0 # replace the ones with 0e+00 to 0 to conserve memory
        loss = criterion(output, target) # criterion that measures the mean squared error
        mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10) # purpose of mean corrector to consider only movies that got rated and having 1+e-10 helps prevents denominator equal to zero also 
        # loss.backward() don't need back propagation (update weights)
        test_loss += np.sqrt(loss.data[0]*mean_corrector)
        s += 1.
        # optimizer.step() related to backward propagation which is related to training
print 'test loss: {}'.format(str(test_loss/s)) # average test loss over all users that gave at least one non-zero rating

# Test loss = 0.95 stars 
# meaning predict within 0.95 

test loss: 0.948665172295
