In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse.linalg import svds

%matplotlib inline
np.random.seed(0)
plt.style.use('ggplot')
np.set_printoptions(suppress=True)
from sklearn.utils import shuffle

In [40]:
movies_df = pd.read_csv('./ml-latest-small/movies.csv', names=['MovieID', 'Title', 'Genres'], header=0)
ratings_df = pd.read_csv('./ml-latest-small/ratings.csv', names=['UserID', 'MovieID', 'Rating', 'Timestamp'], header=0)
tags_df = pd.read_csv('./ml-latest-small/tags.csv', names=['UserID', 'MovieID', 'tag', 'Timestamp'], header=0)
links_df = pd.read_csv('./ml-latest-small/links.csv', names=['MovieID', 'imdbId', 'tmbdId'], header=0)

r_df = ratings_df.pivot(index='UserID', columns = 'MovieID', values='Rating')


### Import the P and Q vectors from pickle to form predictions matrix

In [41]:
P = pd.read_pickle('result_P_df.pkl')
Q = pd.read_pickle('result_q_df.pkl')

In [42]:
print P.shape
print Q.shape

(671, 20)
(9066, 20)


In [43]:
result_gd = np.dot(P,Q.T)
result_gd = pd.DataFrame(result_gd, columns=r_df.columns)

### Import the SVD predictions matrix 

In [44]:
result_svd = pd.read_pickle('result_SVD')
result_svd.index.name='MovieID'
result_svd.columns = r_df.columns

### Bringing in the top K recommendations function

In [45]:
def recommend_movies(predictions_df, userID, movies_df, original_ratings_df, num_recommendations=5):
    '''
    Generates top k predictions for a user. 
    Args:
        predictions_df: Dataframe of predicted ratings where columns = products, rows = users. Must be a pandas dataframe.
        userID: ID of the user to generate recommendations for.
        movies_df: Dataframe of movies, where movie ID column is named "MovieID"
        original_ratings_df: Original dataframe of matrix where columns = products, rows = users
    '''
    # Get and sort the user's predictions
    user_row_number = userID - 1 # UserID starts at 1, not 0
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings_df[original_ratings_df.UserID == (userID)]
    user_full = (user_data.merge(movies_df, how = 'left', left_on = 'MovieID', right_on = 'MovieID').
                     sort_values(['Rating'], ascending=False)
                 )

    print 'User {0} has already rated {1} movies.'.format(userID, user_full.shape[0])
    print 'Recommending the highest {0} predicted ratings movies not already rated.'.format(num_recommendations)
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (movies_df[~movies_df['MovieID'].isin(user_full['MovieID'])].
                       merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'MovieID',
               right_on = 'MovieID').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      ) 
    return user_full, recommendations



### 1. Predictions from GD

In [46]:
rated, recs = recommend_movies(result_gd, 419, movies_df, ratings_df, 20)

User 419 has already rated 50 movies.
Recommending the highest 20 predicted ratings movies not already rated.


In [47]:
rated

Unnamed: 0,UserID,MovieID,Rating,Timestamp,Title,Genres
0,419,1,4.5,1110049948,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
12,419,1293,4.5,1110049704,Gandhi (1982),Drama
45,419,6870,4.5,1110050595,Mystic River (2003),Crime|Drama|Mystery
43,419,6331,4.5,1110050577,Spellbound (2002),Documentary
39,419,5876,4.5,1110050633,"Quiet American, The (2002)",Drama|Thriller|War
24,419,2762,4.5,1110050489,"Sixth Sense, The (1999)",Drama|Horror|Mystery
21,419,2302,4.5,1110049581,My Cousin Vinny (1992),Comedy
14,419,1641,4.5,1110050097,"Full Monty, The (1997)",Comedy|Drama
13,419,1304,4.5,1110050163,Butch Cassidy and the Sundance Kid (1969),Action|Western
49,419,8961,4.5,1110050125,"Incredibles, The (2004)",Action|Adventure|Animation|Children|Comedy


In [48]:
recs

Unnamed: 0,MovieID,Title,Genres
4500,6256,"House with Laughing Windows, The (Casa dalle f...",Horror|Mystery|Thriller
5381,8491,White Heat (1949),Crime|Drama|Film-Noir
4806,6869,Bus 174 (Ônibus 174) (2002),Crime|Documentary
1160,1450,Prisoner of the Mountains (Kavkazsky plennik) ...,War
7984,93855,God Bless America (2011),Comedy|Drama
2881,3655,Blow-Out (La grande bouffe) (1973),Drama
2733,3460,Hillbillys in a Haunted House (1967),Comedy
4831,6927,"Human Stain, The (2003)",Drama|Romance|Thriller
3451,4406,"Man Who Shot Liberty Valance, The (1962)",Crime|Drama|Western
7572,80553,Howl (2010),Drama


### 2. Predictions from SVD

In [49]:
rated, recs = recommend_movies(result_svd, 419, movies_df, ratings_df, 20)

User 419 has already rated 50 movies.
Recommending the highest 20 predicted ratings movies not already rated.


In [52]:
rated

Unnamed: 0,UserID,MovieID,Rating,Timestamp,Title,Genres
0,419,1,4.5,1110049948,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
12,419,1293,4.5,1110049704,Gandhi (1982),Drama
45,419,6870,4.5,1110050595,Mystic River (2003),Crime|Drama|Mystery
43,419,6331,4.5,1110050577,Spellbound (2002),Documentary
39,419,5876,4.5,1110050633,"Quiet American, The (2002)",Drama|Thriller|War
24,419,2762,4.5,1110050489,"Sixth Sense, The (1999)",Drama|Horror|Mystery
21,419,2302,4.5,1110049581,My Cousin Vinny (1992),Comedy
14,419,1641,4.5,1110050097,"Full Monty, The (1997)",Comedy|Drama
13,419,1304,4.5,1110050163,Butch Cassidy and the Sundance Kid (1969),Action|Western
49,419,8961,4.5,1110050125,"Incredibles, The (2004)",Action|Adventure|Animation|Children|Comedy


In [53]:
recs

Unnamed: 0,MovieID,Title,Genres
279,318,"Shawshank Redemption, The (1994)",Crime|Drama
4355,5952,"Lord of the Rings: The Two Towers, The (2002)",Adventure|Fantasy
3835,4993,"Lord of the Rings: The Fellowship of the Ring,...",Adventure|Fantasy
3384,4306,Shrek (2001),Adventure|Animation|Children|Comedy|Fantasy|Ro...
1336,1704,Good Will Hunting (1997),Drama|Romance
3811,4963,Ocean's Eleven (2001),Crime|Thriller
4566,6377,Finding Nemo (2003),Adventure|Animation|Children|Comedy
2830,3578,Gladiator (2000),Action|Adventure|Drama
261,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
3837,4995,"Beautiful Mind, A (2001)",Drama|Romance
