In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
#ENABLING CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [4]:
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))

In [5]:
def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)

training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set) 

In [6]:
# class SAE(nn.Module):
#     def __init__(self, ):
#         super(SAE, self).__init__()
#         self.encoder = nn.Sequential(
#             nn.Linear(nb_movies, 128),
#             nn.ReLU(),
#             nn.Linear(128, 64),
#             nn.ReLU(),
#             nn.Linear(64, 32),
#             nn.ReLU(),
#             # nn.Linear(32, 16),
#             # nn.ReLU()
#         )
#         self.decoder = nn.Sequential(
#             # nn.Linear(16, 32),
#             # nn.ReLU(),
#             nn.Linear(32, 64),
#             nn.ReLU(),
#             nn.Linear(64, 128),
#             nn.ReLU(),
#             nn.Linear(128, nb_movies),
#             nn.Sigmoid()
#         )

#     def forward(self, x):
#         x = self.encoder(x)
#         x = self.decoder(x)
#         return x
    
# sae = SAE()
# criterion = nn.MSELoss()
# optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)



class SAE(nn.Module):
    def __init__(self, ):  
        super(SAE, self).__init__() 
        self.fc1 = nn.Linear(nb_movies, 128)               
        self.fc2 = nn.Linear(128,64)
        self.fc3 = nn.Linear(64,32)    
        self.fc4 = nn.Linear(32,16)                           
        self.fc5 = nn.Linear(16,32)   
        self.fc6 = nn.Linear(32,64)    
        self.fc7 = nn.Linear(64,128)                
        self.fc8 = nn.Linear(128,nb_movies)               
        self.activation = nn.Sigmoid()

    def forward(self, x):           
        x = self.activation(self.fc1(x))  
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x)) 
        x = self.activation(self.fc4(x)) 
        x = self.activation(self.fc5(x)) 
        x = self.activation(self.fc6(x)) 
        x = self.activation(self.fc7(x))                                  
        x = self.fc8(x)                    
                                           
        return x
sae = SAE().to(device)
# sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr= 0.01, weight_decay= 0.5) 

In [7]:
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0).to(device)
    target = input.clone().to(device)
    if torch.sum(target.data > 0) > 0:
      output = sae(input)
      target.require_grad = False
      output[target == 0] = 0
      loss = criterion(output, target)
      mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
      loss.backward()
      train_loss += torch.sqrt(loss.data*mean_corrector)
      s += 1.
      optimizer.step()
  print('epoch: '+str(epoch)+' loss: '+ str(train_loss/s))

epoch: 1 loss: tensor(1.4288, device='cuda:0')
epoch: 2 loss: tensor(1.1848, device='cuda:0')
epoch: 3 loss: tensor(1.1463, device='cuda:0')
epoch: 4 loss: tensor(1.1092, device='cuda:0')
epoch: 5 loss: tensor(1.0886, device='cuda:0')
epoch: 6 loss: tensor(1.0759, device='cuda:0')
epoch: 7 loss: tensor(1.0770, device='cuda:0')
epoch: 8 loss: tensor(1.0667, device='cuda:0')
epoch: 9 loss: tensor(1.0680, device='cuda:0')
epoch: 10 loss: tensor(1.0595, device='cuda:0')


## testing the SAE


In [49]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
  input = Variable(training_set[id_user]).unsqueeze(0).to(device)
  target = Variable(test_set[id_user]).unsqueeze(0).to(device)
  if torch.sum(target.data > 0) > 0:
    output = sae(input)
    target.require_grad = False
    output[target == 0] = 0
    loss = criterion(output, target)
    mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
    test_loss += torch.sqrt(loss.data*mean_corrector)
    s += 1.
print('test loss: '+str(test_loss/s))


test loss: tensor(1.0403, device='cuda:0')


## saving the model


In [50]:
torch.save(sae.state_dict(), 'sae_model.pth')

## loading in the model


In [57]:
inference_sae = SAE()
inference_sae.load_state_dict(torch.load('sae_model.pth'))
inference_sae.eval()

SAE(
  (fc1): Linear(in_features=1682, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=16, bias=True)
  (fc5): Linear(in_features=16, out_features=32, bias=True)
  (fc6): Linear(in_features=32, out_features=64, bias=True)
  (fc7): Linear(in_features=64, out_features=128, bias=True)
  (fc8): Linear(in_features=128, out_features=1682, bias=True)
  (activation): Sigmoid()
)

## making inferences

In [58]:
with torch.inference_mode():
    inference_sae_preds = inference_sae(test_set)
inference_sae_preds

tensor([[3.7598, 3.8090, 3.2097,  ..., 1.2937, 2.2139, 2.2238],
        [3.7598, 3.8090, 3.2097,  ..., 1.2937, 2.2139, 2.2238],
        [3.7598, 3.8090, 3.2097,  ..., 1.2937, 2.2139, 2.2238],
        ...,
        [3.7598, 3.8090, 3.2097,  ..., 1.2937, 2.2139, 2.2238],
        [3.7598, 3.8090, 3.2097,  ..., 1.2937, 2.2139, 2.2238],
        [3.7598, 3.8090, 3.2097,  ..., 1.2937, 2.2139, 2.2238]])

In [61]:
#testing if same
sae.to('cpu')
sae.eval()
with torch.inference_mode():
    y_pred = sae(test_set)

In [62]:
inference_sae_preds == y_pred

tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])

# ------------------------------- CORE CODE ENDS HERE-------------------------------------_

## testing 

In [8]:
customer = test_set[159,:]
customer

tensor([4., 0., 3.,  ..., 0., 0., 0.])

In [10]:
customer_input = Variable(customer).unsqueeze(0)

In [44]:
output = sae(customer_input)
# recomovo = torch.tensor(output[:][output[:]>4.5]).unsqueeze(0)
torch.where(output>4.5)[1]
indexes = torch.where(output>4.5)[1]
test_index = indexes.squeeze()


In [55]:
numpy_indexes = indexes.numpy()
print(numpy_indexes)
numpy_indexes.shape


[  63  126  168  356  656 1292 1366 1448]


(8,)

In [46]:
#printing out the recommended values 
print(output[:][output[:]>4.5])

tensor([4.5199, 4.5651, 4.8176, 4.5691, 4.5005, 4.6465, 4.5703, 4.6539],
       grad_fn=<IndexBackward0>)


In [None]:

def convert(data,cust_id):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == cust_id]
    id_ratings = data[:, 2] [data[:, 0] == cust_id]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
customer = convert(customer,nb_users)
customer = torch.FloatTensor(customer)

#for one customer ig
customer_input = Variable(customer).unsqueeze(0)
output = sae(input)

In [None]:
#original convert 
def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)

training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set) 

In [None]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
  input = Variable(training_set[id_user]).unsqueeze(0)
  target = Variable(test_set[id_user]).unsqueeze(0)
  if torch.sum(target.data > 0) > 0:
    output = sae(input)
    target.require_grad = False
    output[target == 0] = 0
    loss = criterion(output, target)
    mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
    test_loss += np.sqrt(loss.data*mean_corrector)
    s += 1.
print('test loss: '+str(test_loss/s))


In [None]:
customer = np.zeros(nb_movies)
customer

In [None]:

def convert(data,cust_id):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == cust_id]
    id_ratings = data[:, 2] [data[:, 0] == cust_id]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
customer = convert(customer,nb_users)
customer = torch.FloatTensor(customer)

#for one customer ig
input = Variable(customer).unsqueeze(0)
output = sae(input)




In [None]:

def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)

training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set) 