## Auto Encoders (AE)

---

In [3]:
# Importing libraries

import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [4]:
# Changing directory

os.chdir('//Users/mareksturek/Documents/GitHub/deep-learning/data')

In [5]:
# Import data

movies = pd.read_csv('bm/ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('bm/ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('bm/ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

## Preparing the training set and the test set


In [6]:
# Preparing the training & test set

training_set = pd.read_csv('bm/ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('bm/ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [7]:
# Getting the number of users and movies

nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))

In [8]:
# Converting the data into an array with users in lines and movies in columns¶

def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)

In [9]:
# Converting the data into Torch tensors

training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [10]:
# Creating the architecture of the Neural Network

class SAE(nn.Module):
    def __init__(self, ):
        super(SAE, self).__init__()
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x
sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

In [11]:
# Training the AE

nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0)
    target = input.clone()
    if torch.sum(target.data > 0) > 0:
      output = sae(input)
      target.require_grad = False
      output[target == 0] = 0
      loss = criterion(output, target)
      mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
      loss.backward()
      train_loss += np.sqrt(loss.data*mean_corrector)
      s += 1.
      optimizer.step()
  print('epoch: '+str(epoch)+'loss: '+ str(train_loss/s))

epoch: 1loss: tensor(1.7715)
epoch: 2loss: tensor(1.0967)
epoch: 3loss: tensor(1.0534)
epoch: 4loss: tensor(1.0382)
epoch: 5loss: tensor(1.0308)
epoch: 6loss: tensor(1.0266)
epoch: 7loss: tensor(1.0240)
epoch: 8loss: tensor(1.0220)
epoch: 9loss: tensor(1.0208)
epoch: 10loss: tensor(1.0199)
epoch: 11loss: tensor(1.0189)
epoch: 12loss: tensor(1.0183)
epoch: 13loss: tensor(1.0179)
epoch: 14loss: tensor(1.0177)
epoch: 15loss: tensor(1.0171)
epoch: 16loss: tensor(1.0169)
epoch: 17loss: tensor(1.0166)
epoch: 18loss: tensor(1.0166)
epoch: 19loss: tensor(1.0164)
epoch: 20loss: tensor(1.0162)
epoch: 21loss: tensor(1.0162)
epoch: 22loss: tensor(1.0158)
epoch: 23loss: tensor(1.0161)
epoch: 24loss: tensor(1.0158)
epoch: 25loss: tensor(1.0157)
epoch: 26loss: tensor(1.0156)
epoch: 27loss: tensor(1.0152)
epoch: 28loss: tensor(1.0150)
epoch: 29loss: tensor(1.0124)
epoch: 30loss: tensor(1.0117)
epoch: 31loss: tensor(1.0092)
epoch: 32loss: tensor(1.0097)
epoch: 33loss: tensor(1.0054)
epoch: 34loss: tens

In [12]:
# Testing the AE

test_loss = 0
s = 0.
for id_user in range(nb_users):
  input = Variable(training_set[id_user]).unsqueeze(0)
  target = Variable(test_set[id_user]).unsqueeze(0)
  if torch.sum(target.data > 0) > 0:
    output = sae(input)
    target.require_grad = False
    output[target == 0] = 0
    loss = criterion(output, target)
    mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
    test_loss += np.sqrt(loss.data*mean_corrector)
    s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.9501)
