# AutoEncoders

**1. Importing the Libraries**

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
# for parallel computations
import torch.nn.parallel
# for optimizer
import torch.optim as optim
# for tools that we use
import torch.utils.data
# for stochastic gradient descent
from torch.autograd import Variable

**2.1. Importing the 'Movies' dataset**

In [2]:
# We won't be using this dataset.
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
movies.head(5)

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


**2.2. Importing the 'Users' dataset**

In [3]:
# We won't be using this dataset.
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users.head(5)

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


**2.3. Importing the 'Ratings' dataset**

In [4]:
# We won't be using this dataset.
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings.head(5)

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


**3. Preparing the training set and the test set**

In [5]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
training_set.shape

(79999, 4)

In [6]:
pd.DataFrame(training_set).head(5)

Unnamed: 0,0,1,2,3
0,1,2,3,876893171
1,1,3,4,878542960
2,1,4,3,876893119
3,1,5,3,889751712
4,1,7,4,875071561


In [7]:
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')
test_set.shape

(19999, 4)

In [8]:
pd.DataFrame(test_set).head(5)

Unnamed: 0,0,1,2,3
0,1,10,3,875693118
1,1,12,5,878542960
2,1,14,5,874965706
3,1,17,3,875073198
4,1,20,4,887431883


**4. Getting the number of users and movies**

In [9]:
number_of_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
number_of_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

**5. Coverting the data into a** m(number of the users), n(number of the movies) **matrix**

In [10]:
# because we are going to use Torch library, we won't use numpy 2D array. We'll use list of list.
# one list for each user and the length of the lists are equal to number of all movies.
def convert(data):
    matrix = []
    for user_id in range(1, number_of_users + 1):
        movies_id = data[:, 1] [data[:, 0] == user_id]
        ratings = data[:, 2] [data[:, 0] == user_id]
        user_ratings = np.zeros(number_of_movies)
        user_ratings[movies_id - 1] = ratings
        matrix.append(list(user_ratings))
    return matrix

In [11]:
training_set = convert(training_set)
test_set = convert(test_set)

In [12]:
pd.DataFrame(training_set).head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,0.0,3.0,4.0,3.0,3.0,0.0,4.0,1.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**6. Covert data to Torch Tensor**<br> for better efficiency, we're going to convert numpy array to pytorch array (tensor)

In [13]:
training_set = torch.FloatTensor(training_set)
type(training_set)

torch.Tensor

In [14]:
test_set = torch.FloatTensor(test_set)
test_set

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

**7. Create Neural Network Architecture**

In [15]:
class Stacked_Autoencoders(nn.Module):
    def __init__(self, ):
        super(Stacked_Autoencoders, self).__init__()
        # For encoding         
        # features: movies 
        # Auto encoder => neuron in input layer < number of the hidden layers     
        # number of the hidden layer = 20: detect 20 features
        self.full_connection1 = nn.Linear(number_of_movies, 20)
        # It's Stacked AutoEncoder => we have several hidden layers         
        self.full_connection2 = nn.Linear(20, 10)
        # For decoding    
        self.full_connection3 = nn.Linear(10, 20)
        self.full_connection4 = nn.Linear(20, number_of_movies)
        # you have to do some experiece with type of the activation function, such as relu, sigmoid, etc.
        # Better result with sigmoid
        self.activation = nn.Sigmoid()
        
    
    def forward(self, x):
        x = self.activation(self.full_connection1(x))
        x = self.activation(self.full_connection2(x))
        x = self.activation(self.full_connection3(x))
        # In decoding we don't use activation in autoencoders
        x = self.full_connection4(x)
        return x

In [16]:
stackedAutoEncoder = Stacked_Autoencoders()
criterion = nn.MSELoss()
# we get better result with RMSProp
# decay: reduce learning rate after each epoch
# lr: learning rate
optimizer = optim.RMSprop(stackedAutoEncoder.parameters(), lr = 0.01, weight_decay = 0.5)

**8. Training the SAE**

In [18]:
epochs = 200
for epoch in range(1, epochs + 1):
    train_loss = 0
    counter = 0.
    for user_id in range(number_of_users):
    # Get the input vector features
    # Torch don't accept vector with one dimension as input => add dimension for batch with Variable() and use unsqueeze(index) 
        input = Variable(training_set[user_id]).unsqueeze(0)
        target = input.clone()
        # if user rate a movie or not
        if torch.sum(target.data > 0) > 0:
            output = stackedAutoEncoder.forward(input)
            # we want to make sure that the gradient is computed only with respect to the input, not target => optimize the code
            target.require_grad = False
            output[target == 0] = 0
            loss = criterion(output, target)
            # average of the errors for rated movies         
            mean_corrector = number_of_movies/float(torch.sum(target.data > 0) + 1e-10)
            # to find out to decrease or increase the weights
            loss.backward()
            train_loss += np.sqrt(loss.data*mean_corrector)
            counter += 1.
            optimizer.step()
    print('epoch: '+str(epoch)+'loss: '+ str(train_loss/counter))

epoch: 1loss: tensor(1.0198)
epoch: 2loss: tensor(1.0200)
epoch: 3loss: tensor(1.0194)
epoch: 4loss: tensor(1.0189)
epoch: 5loss: tensor(1.0181)
epoch: 6loss: tensor(1.0180)
epoch: 7loss: tensor(1.0176)
epoch: 8loss: tensor(1.0172)
epoch: 9loss: tensor(1.0167)
epoch: 10loss: tensor(1.0169)
epoch: 11loss: tensor(1.0167)
epoch: 12loss: tensor(1.0167)
epoch: 13loss: tensor(1.0165)
epoch: 14loss: tensor(1.0161)
epoch: 15loss: tensor(1.0158)
epoch: 16loss: tensor(1.0160)
epoch: 17loss: tensor(1.0156)
epoch: 18loss: tensor(1.0159)
epoch: 19loss: tensor(1.0157)
epoch: 20loss: tensor(1.0152)
epoch: 21loss: tensor(1.0152)
epoch: 22loss: tensor(1.0132)
epoch: 23loss: tensor(1.0119)
epoch: 24loss: tensor(1.0089)
epoch: 25loss: tensor(1.0094)
epoch: 26loss: tensor(1.0051)
epoch: 27loss: tensor(1.0047)
epoch: 28loss: tensor(1.0009)
epoch: 29loss: tensor(1.0000)
epoch: 30loss: tensor(0.9962)
epoch: 31loss: tensor(0.9945)
epoch: 32loss: tensor(0.9933)
epoch: 33loss: tensor(0.9956)
epoch: 34loss: tens

**9. Testing the SAE**

In [21]:
test_loss = 0
counter = 0.
for user_id in range(number_of_users):
    input = Variable(training_set[user_id]).unsqueeze(0)
    target = Variable(test_set[user_id]).unsqueeze(0)
    if torch.sum(target.data > 0) > 0:
        output = stackedAutoEncoder(input)
        target.require_grad = False
        output[target == 0] = 0
        loss = criterion(output, target)
        mean_corrector = number_of_movies/float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.data*mean_corrector)
        counter += 1.
print('test loss: '+str(test_loss/counter))

test loss: tensor(0.9480)
