In [1]:
import numpy as np
import pandas as pd
import matrix_factorization_utilities

In [2]:
# Load user ratings
raw_dataset_df = pd.read_csv('movie_ratings_data_set.csv')
raw_dataset_df.head()

Unnamed: 0,user_id,movie_id,value
0,1,28,4
1,1,26,4
2,1,9,4
3,1,1,4
4,1,14,4


In [3]:
# Load movie titles
movies_df = pd.read_csv('movies.csv', index_col='movie_id')
movies_df.head()

Unnamed: 0_level_0,title,genre
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,The Sheriff 1,"crime drama, western"
2,The Big City Judge 1,legal drama
3,The Sheriff 2,"crime drama, western"
4,Just a Regular Family,reality
5,The Big City Judge 2,legal drama


In [4]:
# Convert the running list of user ratings into a matrix
ratings_df = pd.pivot_table(raw_dataset_df, index='user_id',
                            columns='movie_id',
                            aggfunc=np.max)

# Apply matrix factorization to find the latent features
U, M = matrix_factorization_utilities.low_rank_matrix_factorization(ratings_df.values,
                                                                    num_features=15,
                                                                    regularization_amount=0.1)

# Find all predicted ratings by multiplying U and M matrices
predicted_ratings = np.matmul(U, M)

         Current function value: 32.504363
         Iterations: 3000
         Function evaluations: 4505
         Gradient evaluations: 4505


In [5]:
print("Enter a user_id to get recommendations (Between 1 and 100):")
user_id_to_search = int(input())

Enter a user_id to get recommendations (Between 1 and 100):
2


In [6]:
print("Movies previously reviewed by user_id {}:".format(user_id_to_search))

reviewed_movies_df = raw_dataset_df[raw_dataset_df['user_id'] == user_id_to_search]
reviewed_movies_df = reviewed_movies_df.join(movies_df, on='movie_id')
reviewed_movies_df

Movies previously reviewed by user_id 2:


Unnamed: 0,user_id,movie_id,value,title,genre
6,2,2,5,The Big City Judge 1,legal drama
7,2,15,4,We Will Fight Those Aliens,"sci-fi, action"
8,2,1,5,The Sheriff 1,"crime drama, western"
9,2,21,5,Political Gaffs,"comedy, political satire"
10,2,34,4,The Serious Detective,detective drama
11,2,14,4,The Spy Family,spy drama
12,2,31,3,My Complicated Family,comedy-drama


In [7]:
print("Movies we will recommend:")

user_ratings = predicted_ratings[user_id_to_search - 1]
movies_df['rating'] = user_ratings

already_reviewed = reviewed_movies_df['movie_id']
recommended_df = movies_df[movies_df.index.isin(already_reviewed) == False]
recommended_df = recommended_df.sort_values(by=['rating'], ascending=False)

recommended_df.head(5)

Movies we will recommend:


Unnamed: 0_level_0,title,genre,rating
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,Surrounded by Zombies 1,"horror, zombie fiction",4.837406
5,The Big City Judge 2,legal drama,4.831207
13,The Sheriff 3,"crime drama, western",4.776908
24,The Big City Judge 3,legal drama,4.732256
6,Attack on Earth 1,"sci-fi, action",4.706611
