# MovieLens Recommendation system

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
# Import dataset
movies = pd.read_csv('data/ml-latest-small/movies.csv', sep=',', engine='python', encoding='latin-1')
ratings = pd.read_csv('data/ml-latest-small/ratings.csv', sep=',', engine='python', encoding='latin-1')

In [3]:
# Merge ratigs with the corresponding movies
df = pd.merge(ratings, movies, how='left', on=['movieId']).sort_values(by=['timestamp'], ascending=True)

In [4]:
# Drop columns not needed
df = df.drop(columns=['title', 'genres'])

In [5]:
# Prepare training and test set
training_set = df[:int(len(df)*0.8)]
test_set = df[int(len(df)*0.8):]

In [6]:
# Convert to numpy array
training_set = np.array(training_set, dtype='int32')
test_set = np.array(test_set, dtype='int32')

In [7]:
# Get number of users and movies
unqiue_users = np.unique(np.concatenate([training_set[:, 0], test_set[:, 0]]))
nb_users = len(unqiue_users)

unqiue_movies = np.unique(np.concatenate([training_set[:, 1], test_set[:, 1]]))
nb_movies = len(unqiue_movies)

In [8]:
# Create pivot table
def convert(data):
    new_data = []
    for id_user in unqiue_users:
        id_movies = data[:, 1][data[:, 0] == id_user]
        id_ratings = data[:, 2][data[:, 0] == id_user]
        ratings = np.zeros(nb_movies)
        ratings[id_movies] = id_ratings
        new_data.append(ratings)
        
    return new_data

In [9]:
def convert(data, unique_movies):  
    # Create a dictionary for fast movie ID lookup
    movie_id_to_index = {movie_id: index for index, movie_id in enumerate(unique_movies)}

    new_data = []
    for id_user in np.unique(data[:, 0]):  # Ensure unique users
        id_movies = data[:, 1][data[:, 0] == id_user]
        id_ratings = data[:, 2][data[:, 0] == id_user]

        ratings = np.zeros(nb_movies)  # Use length of unique movies
        for movie_id, rating in zip(id_movies, id_ratings):
            index = movie_id_to_index.get(movie_id)  # Find index using dictionary
            if index is not None:  # Check if movie ID exists in unique_movies
                ratings[index] = rating

        new_data.append(ratings)

    return new_data

In [10]:
# Convert data to matrix
training_set = np.array(convert(training_set, unqiue_movies))
test_set = np.array(convert(test_set, unqiue_movies))

In [12]:
# Convert to torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [13]:
training_set

tensor([[4., 0., 4.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [4., 0., 0.,  ..., 0., 0., 0.],
        [2., 2., 2.,  ..., 0., 0., 0.],
        [3., 0., 0.,  ..., 0., 0., 0.]])

In [14]:
# Convert ratings to binary (liked or not liked)
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [15]:
training_set

tensor([[1., 0., 1.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 1., 1.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])

In [None]:
# Creating the architecture of the neural network
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv)
        
    def sample_h(self, x):
        wx = torch.mm(x, self.W.t())
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    
    def sample_v(self, y):
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        
        return p_v_given_h, torch.bernoulli(p_v_given_h)     
    
    def train(self, v0, vk, ph0, phk):
        self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)

In [None]:
# Set the parameters for the model
nv = nb_movies
nh = 128
batch_size = 32
rbm =RBM(nv, nh)