[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/khetansarvesh/Tabular-Cross-Sectional-Modelling/blob/main/modelling/recommendation_systems/AutoEncoders.ipynb)

to create a recommendation engine to predict the rating that a user will give for a movies between 1 to 5 ( 1 means he did not like the movie while 5 means he liked the movie very much)


In [23]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [24]:
!git clone https://github.com/khetansarvesh/Tabular-Cross-Sectional-Modelling.git

Cloning into 'Tabular-Cross-Sectional-Modelling'...
remote: Enumerating objects: 528, done.[K
remote: Counting objects: 100% (265/265), done.[K
remote: Compressing objects: 100% (224/224), done.[K
remote: Total 528 (delta 184), reused 72 (delta 41), pack-reused 263[K
Receiving objects: 100% (528/528), 10.98 MiB | 14.01 MiB/s, done.
Resolving deltas: 100% (313/313), done.


In [25]:
%cd Tabular-Cross-Sectional-Modelling/dataset/movie_rating_dataset

/content/Tabular-Cross-Sectional-Modelling/dataset/movie_rating_dataset/Tabular-Cross-Sectional-Modelling/dataset/movie_rating_dataset


# **Dataset**


In [26]:
# train dataset
train_df = pd.read_csv('train_df.csv')
train_set = np.array(train_df)
training_set = torch.FloatTensor(train_set)

In [33]:
# test dataset
test_df = pd.read_csv('test_df.csv')
test_set = np.array(test_df)
test_set = torch.FloatTensor(test_set)

In [34]:
nb_users = train_df.shape[0]
nb_movies = train_df.shape[1]

# **Modelling**

In [29]:
class SAE(nn.Module):

    def __init__(self, ):
        super(SAE, self).__init__()
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x

# **Training**


In [30]:
sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

In [31]:
nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0)
    target = input.clone()
    if torch.sum(target.data > 0) > 0:
      output = sae(input)
      target.require_grad = False
      output[target == 0] = 0
      loss = criterion(output, target)
      mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
      loss.backward()
      train_loss += np.sqrt(loss.data*mean_corrector)
      s += 1.
      optimizer.step()
  print('epoch: '+str(epoch)+'loss: '+ str(train_loss/s))

epoch: 1loss: tensor(37.9259)
epoch: 2loss: tensor(27.7981)
epoch: 3loss: tensor(39.6664)
epoch: 4loss: tensor(60.9344)
epoch: 5loss: tensor(57.1118)
epoch: 6loss: tensor(55.8065)
epoch: 7loss: tensor(54.5778)
epoch: 8loss: tensor(53.4027)
epoch: 9loss: tensor(52.2731)
epoch: 10loss: tensor(51.1808)
epoch: 11loss: tensor(50.1242)
epoch: 12loss: tensor(49.1076)
epoch: 13loss: tensor(48.1262)
epoch: 14loss: tensor(47.1807)
epoch: 15loss: tensor(46.2780)
epoch: 16loss: tensor(45.4156)
epoch: 17loss: tensor(44.5909)
epoch: 18loss: tensor(43.8051)
epoch: 19loss: tensor(43.0571)
epoch: 20loss: tensor(42.3396)
epoch: 21loss: tensor(41.6539)
epoch: 22loss: tensor(41.0018)
epoch: 23loss: tensor(40.3857)
epoch: 24loss: tensor(39.8036)
epoch: 25loss: tensor(39.2559)
epoch: 26loss: tensor(38.7452)
epoch: 27loss: tensor(38.2678)
epoch: 28loss: tensor(37.8246)
epoch: 29loss: tensor(37.4105)
epoch: 30loss: tensor(37.0288)
epoch: 31loss: tensor(36.6786)
epoch: 32loss: tensor(36.3530)
epoch: 33loss: te

# **Inference**


In [35]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
  input = Variable(training_set[id_user]).unsqueeze(0)
  target = Variable(test_set[id_user]).unsqueeze(0)
  if torch.sum(target.data > 0) > 0:
    output = sae(input)
    target.require_grad = False
    output[target == 0] = 0
    loss = criterion(output, target)
    mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
    test_loss += np.sqrt(loss.data*mean_corrector)
    s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(252.1464)
