# Stacked AutoEncoders

### Importing the libraries

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

### Importing the dataset

In [5]:
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

### Preparing the training set and the test set

In [7]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

### Getting the number of users and movies

In [9]:
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

### Converting the data into an array with users in rows and movies in columns

In [11]:
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data
training_set = convert(training_set)
test_set = convert(test_set)

### Converting the data into Torch tensors

In [13]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

### Creating the architecture of the Neural Network

In [15]:
class SAE(nn.Module):
    def __init__(self, ):
        # Defining full connections and the activation function
        super(SAE, self).__init__()
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10) # Encoder layer
        self.fc3 = nn.Linear(10, 20) # Decoder layer
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()
        
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x
sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

### Training the SAE

In [19]:
nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0
    # Implementing Stochastic Gradient Descent (SGD) in the training set
    for id_user in range(nb_users):
        input = Variable(training_set[id_user]).unsqueeze(0)
        target = input.clone() # Predicted ratings for movies
        if torch.sum(target.data > 0) > 0:
            output = sae(input)
            target.require_grad = False # No SGD
            output[target == 0] = 0
            loss = criterion(output, target)
            mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10) # Average of the loss between real and predicted ratings
            loss.backward()
            train_loss += np.sqrt(loss.data*mean_corrector)
            s += 1
            optimizer.step()
    print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

epoch: 1 loss: tensor(1.7671)
epoch: 2 loss: tensor(1.0966)
epoch: 3 loss: tensor(1.0534)
epoch: 4 loss: tensor(1.0382)
epoch: 5 loss: tensor(1.0308)
epoch: 6 loss: tensor(1.0267)
epoch: 7 loss: tensor(1.0236)
epoch: 8 loss: tensor(1.0221)
epoch: 9 loss: tensor(1.0206)
epoch: 10 loss: tensor(1.0195)
epoch: 11 loss: tensor(1.0190)
epoch: 12 loss: tensor(1.0184)
epoch: 13 loss: tensor(1.0180)
epoch: 14 loss: tensor(1.0174)
epoch: 15 loss: tensor(1.0172)
epoch: 16 loss: tensor(1.0170)
epoch: 17 loss: tensor(1.0169)
epoch: 18 loss: tensor(1.0165)
epoch: 19 loss: tensor(1.0165)
epoch: 20 loss: tensor(1.0162)
epoch: 21 loss: tensor(1.0160)
epoch: 22 loss: tensor(1.0157)
epoch: 23 loss: tensor(1.0159)
epoch: 24 loss: tensor(1.0159)
epoch: 25 loss: tensor(1.0157)
epoch: 26 loss: tensor(1.0158)
epoch: 27 loss: tensor(1.0155)
epoch: 28 loss: tensor(1.0152)
epoch: 29 loss: tensor(1.0129)
epoch: 30 loss: tensor(1.0117)
epoch: 31 loss: tensor(1.0094)
epoch: 32 loss: tensor(1.0095)
epoch: 33 loss: t

**Loss Interpretation**

If we get an epoch loss of 1, that means the average difference between the real ratings of the movies by the users and the predicted ratings, will be 1. That means the error in predicting whether the user would like the movie or not, would be 1 star out of 5 stars. Here we see that the training loss is less than 1 and hence, the model gives good predictions. 

### Testing the SAE

In [22]:
test_loss = 0
s = 0
for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0)
    target = Variable(test_set[id_user]).unsqueeze(0)
    if torch.sum(target.data > 0) > 0:
        output = sae(input)
        target.require_grad = False
        output[target == 0] = 0
        loss = criterion(output, target)
        mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.data*mean_corrector)
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.9561)


**Loss Interpretation**

We see that the error/loss in the test set is less than 1, so we can say that the model performs well with little error and predicts correctly majority of the times about user's opinion on the movie.