In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
# loading datasets, index start from 1
movies = pd.read_csv('../input/movies.dat', sep='::', header=None, index_col=0)
movies.index = np.arange(1, len(movies) + 1)
users = pd.read_csv('../input/users.dat', sep='\t', header=0, index_col=0)
users.index = np.arange(1, len(users) + 1)
ratings = pd.read_csv('../input/ratings.dat', sep='::', header=None)
ratings.index = np.arange(1, len(ratings) + 1)

In [None]:
# drop some revies of movie not included in dataset
ratings = ratings[ratings[1] < 3883]
ratings[1].max()

In [None]:
# numbers of items
nb_users = users.shape[0]
nb_movies = movies.shape[0]
nb_ratings = ratings.shape[0]
print("nb_users: ", nb_users, " nb_movies: ", nb_movies, " nb_ratings: ", nb_ratings)

In [None]:
# matrix of rows-users, cols-movies
user_ratings = np.zeros((nb_users+1, nb_movies))

for (idx, record) in ratings.iterrows():
    user_id = record[0]
    movie_id = record[1]
    rating = record[2]
    user_ratings[user_id, movie_id] = rating
user_ratings.shape

In [None]:
# now we split data to train and test
train_set, test_set = train_test_split(user_ratings, test_size=0.2)

In [None]:
# convert numy arreys to pytorch tensors
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_set = torch.Tensor(train_set)
test_set = torch.Tensor(test_set)
print(device)

In [None]:
# hiperparameters
hidden_outer_size = 128
hidden_inner_size = 128
batch_size = 16
epoch = 50
learning_rate = 0.0001
weight_decay = 0.005

In [None]:
# class of deep autoencoder
class DAE(nn.Module):
    def __init__(self):
        # init pytorch nn module
        super(DAE, self).__init__()
        # 1 parameter is INsize 2 is OUTsize
        self.fc1 = nn.Linear(nb_movies, hidden_outer_size)
        self.fc2 = nn.Linear(hidden_outer_size, hidden_inner_size)
        self.fc3 = nn.Linear(hidden_inner_size, hidden_outer_size)
        self.fc4 = nn.Linear(hidden_outer_size, nb_movies)
        self.activation = nn.Sigmoid()
        
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x
    
# this is MSE, but averaged by number of rated movies, not total movies
def mse_loss_masked(input, target, num_labels):
    return torch.div(torch.sum((input - target) ** 2), num_labels)

In [None]:
dae = DAE()
dae.to(device)
optimizer = optim.Adam(dae.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [None]:
# train model
for epoch in range(1, epoch+1):
    train_loss = 0.
    # Numbers of users who at least rated one movie
    step = 0
    row_idx = 0
    while row_idx < len(train_set):
        # add an empty dimension for batch size of 1
        input = train_set[row_idx:row_idx + batch_size,:]
        # target is copy of the input
        target = input.clone()
        # send target and input to device
        input, target = input.to(device), target.to(device)
        # we dont calculate grad of target
        target.require_grad = False
        # number of movies where rating is not zero
        num_labels = torch.sum(target > 0)
        # if user rate at least 1 movie
        if num_labels > 0:
            # get predicted ratings for this user
            output = dae(input)
            # zero to any movies that user dont rate
            # we dont wont that in ouer loss calculation
            output[target == 0] = 0
            loss = mse_loss_masked(output, target, num_labels)
            loss_value = loss.detach().cpu().numpy()
            train_loss = loss_value
            # backpropagete loss gradient to network
            loss.backward()
            # run oprimazer to update waights
            optimizer.step()
            step += 1
        row_idx += batch_size
    print("epoch: ", epoch, ' loss: ', str(train_loss/step))
    

In [13]:
# evaluate model
test_loss = 0
step = 0
for row_idx in range(len(test_set)):
    # unsqueeze (0) adds batch dimension to the matrix (size of 1)
    input = test_set[row_idx,:].unsqueeze(0)
    target = input.clone()
    target.require_grad = False
    input, target = input.to(device), target.to(device)
    num_labels = torch.sum(target > 0)
    if num_labels > 0:
        step += 1
        output = dae(input)
        output[target == 0] = 0
        loss = mse_loss_masked(output, target, num_labels)
        loss_value = loss.detach().cpu().numpy()
        test_loss += loss_value
print("test loss: ", str(test_loss / step))

test loss:  1.6341499020187593
