In [1]:
import torch
import torch.nn as nn
import pandas as pd

In [2]:
# Define the model
class MatrixFactorization(nn.Module):
    def __init__(self, n_users, n_movies, n_factors=20):
        super(MatrixFactorization, self).__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.movie_factors = nn.Embedding(n_movies, n_factors)
        # initializing our matrices with a positive number generally will yield better results
        self.user_factors.weight.data.uniform_(0, 0.5)
        self.movie_factors.weight.data.uniform_(0, 0.5)
        
    def forward(self, user, movie):
        return (self.user_factors(user) * self.movie_factors(movie)).sum(1)

In [3]:
# Load the MovieLens dataset
movies = pd.read_csv('./ml-latest-small/movies.csv')
ratings = pd.read_csv('./ml-latest-small/ratings.csv')

# Preprocess the data
n_users = ratings.userId.unique().shape[0]
n_movies = ratings.movieId.unique().shape[0]

# Convert movieId and userId into unique integers
user_map = {u: i for i, u in enumerate(ratings.userId.unique())}
ratings['user_id'] = ratings['userId'].map(user_map)

movie_map = {m: i for i, m in enumerate(ratings.movieId.unique())}
ratings['movie_id'] = ratings['movieId'].map(movie_map)

# Create a matrix with users as rows and movies as columns
matrix = torch.zeros((n_users, n_movies))
for i, row in ratings.iterrows():
    matrix[int(row.user_id), int(row.movie_id)] = row.rating

In [5]:
model = MatrixFactorization(n_users, n_movies)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Train the model
for i in range(100):
    optimizer.zero_grad()
    user = torch.LongTensor(ratings.user_id)
    movie = torch.LongTensor(ratings.movie_id)
    rating = torch.FloatTensor(ratings.rating)
    predictions = model(user, movie)
    loss = criterion(predictions, rating)
    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(loss)


tensor(6.2147, grad_fn=<MseLossBackward0>)


In [6]:
# Make recommendations for a given user
def recommend_movies(model, user_id, num_recommendations):
    with torch.no_grad():
        user = torch.LongTensor([user_map[user_id]])
        movies = torch.arange(n_movies)
        ratings = model(user, movies).detach().numpy()
    movie_ids = ratings.argsort()[-num_recommendations:][::-1]
    recommended_movies = [movies[i] for i in movie_ids]
    return recommended_movies

3939    Clash of the Titans (1981)
Name: title, dtype: object

4197    Final Destination 2 (2003)
Name: title, dtype: object

504    Home Alone (1990)
Name: title, dtype: object



In [None]:
# Get recommendations for a user with user_id 1
def getRecs(model, user_id, num_recs):
    recommended_movies = recommend_movies(model, user_id, num_recs)

    # Convert tensors to Int
    val = []
    for i in range(num_recs):
        val.append(int(recommended_movies[i]))

    for id in val:
        row = movies.loc[movies['movieId'] == id]
        # print(type(row))
        movie = row.values.tolist()
        if len(movie) == 0:
            continue
        print(movie)
        
def getUserInfo(user):
    row = ratings.loc[ratings['userId'] == user]
    x = row.loc[ratings['rating'] >= 5.0]
    movieIDS = x['movieId'].values

    for id in movieIDS:
        movie = movies.loc[movies['movieId'] == id]
        print(movie.values.tolist())
        print()

In [None]:
userID = 100
print("Recommendations for User {}: ".format(userID))
print()
getRecs(model, userID, 5)
print()
print("----------------------------------------------------------------------------------------")
print()
print("Movies that User {} has rated over 5.0:".format(userID)) 
print()
getUserInfo(userID)