In [3]:
import pandas as pd
import numpy as np
import matrix_factorization_utilities
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
R = pd.read_csv('movie_ratings_data_set.csv')

In [5]:
R.head(10)

Unnamed: 0,user_id,movie_id,value
0,1,28,4
1,1,26,4
2,1,9,4
3,1,1,4
4,1,14,4
5,1,13,5
6,2,2,5
7,2,15,4
8,2,1,5
9,2,21,5


In [6]:
print('Number of users in the dataset', len(pd.unique(R.user_id)))

Number of users in the dataset 100


In [7]:
rating_df = pd.pivot_table(R, index='user_id',columns='movie_id',aggfunc=np.max)

In [8]:
rating_df.head()

Unnamed: 0_level_0,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value
movie_id,1,2,3,4,5,6,7,8,9,10,...,25,26,27,28,29,30,31,32,33,34
user_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1,4.0,,,,,,,,4.0,,...,,4.0,,4.0,,,,,,
2,5.0,5.0,,,,,,,,,...,,,,,,,3.0,,,4.0
3,4.0,4.0,5.0,,,,,,,,...,,,,,,,,,,
4,5.0,5.0,,5.0,5.0,,,,,,...,,,,,,,,,,
5,5.0,,,,,,,,5.0,,...,,,,,3.0,,3.0,2.0,5.0,5.0


In [9]:
movies_df = pd.read_csv('movies.csv', index_col='movie_id')
movies_df.shape

(34, 2)

In [10]:
movies_df.head()

Unnamed: 0_level_0,title,genre
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,The Sheriff 1,"crime drama, western"
2,The Big City Judge 1,legal drama
3,The Sheriff 2,"crime drama, western"
4,Just a Regular Family,reality
5,The Big City Judge 2,legal drama


**Matrix Factorization**

For a lot of users, rating values are missing. Matrix factorization is used to predict the missing values. Rating matrix is factorized into U and M matrices. Since all elements of rating matrix are unknown, we will have to find U and M iteratively. 

* First elements of U and M are randomly chosen
* Product of U.M gives R* (approximate rating matrix)
* Difference between elements of R* and corresponding known elments of ratings matrix are compared for cost calculation
* Cost is minimized with optimiztion function to predict new element values

In [11]:
U, M = matrix_factorization_utilities.low_rank_matrix_factorization(rating_df.values, num_features=15,\
                                                                    regularization_amount=0.1)

         Current function value: 32.504359
         Iterations: 3000
         Function evaluations: 4518
         Gradient evaluations: 4518


In [41]:
predicted_ratings = np.round(np.matmul(U,M))

In [86]:
print ('Enter a user_id to obtain recommendations')
user_id_to_search = int(input())

Enter a user_id to obtain recommendations
3


In [87]:
print ('Movies previously watched by User id {}:'.format(user_id_to_search))

Movies previously watched by User id 3:


In [88]:
reviewed_movies_df = R[R['user_id'] == user_id_to_search]
reviewed_movies_df = R.join(movies_df, on='movie_id')

In [89]:
reviewd_movies_df.head(10)

Unnamed: 0,user_id,movie_id,value,title,genre,ratings
6,2,2,5,The Big City Judge 1,legal drama,5.0
7,2,15,4,We Will Fight Those Aliens,"sci-fi, action",4.0
8,2,1,5,The Sheriff 1,"crime drama, western",4.0
9,2,21,5,Political Gaffs,"comedy, political satire",5.0
10,2,34,4,The Serious Detective,detective drama,4.0
11,2,14,4,The Spy Family,spy drama,4.0
12,2,31,3,My Complicated Family,comedy-drama,2.0


In [90]:
user_ratings = predicted_ratings[user_id_to_search-1]

already_reviewed = reviewd_movies_df['movie_id']

# Predicted ratings for all movies by user selected
movies_df['ratings'] = user_ratings

In [91]:
recommended_df = movies_df[movies_df.index.isin(already_reviewed) == False]

In [92]:
recommended_df = recommended_df.sort_values(by=['ratings'], ascending=False)

In [93]:
print(recommended_df[['title', 'genre', 'ratings']].head(5))

                         title                          genre  ratings
movie_id                                                              
3                The Sheriff 2           crime drama, western      5.0
5         The Big City Judge 2                    legal drama      5.0
13               The Sheriff 3           crime drama, western      5.0
6            Attack on Earth 1                 sci-fi, action      4.0
16             Master Criminal  thriller, horror, crime drama      4.0
