In [1]:
import pandas as pd
import numpy as np

ratings = pd.read_csv('ml-latest-small/ratings.csv')
movies = pd.read_csv('ml-latest-small/movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [2]:
R_df = ratings.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
R_df.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
R = R_df.as_matrix()
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

from scipy.sparse.linalg import svds
U, sigma, Vt = svds(R_demeaned, k = 50)
sigma = np.diag(sigma)

  """Entry point for launching an IPython kernel.


In [4]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = R_df.columns)

In [5]:
def recommend_movies(predictions_df, userID, movies, original_ratings, num_recommendations=5):
    
    # Get and sort the user's predictions
    user_row_number = userID - 1 
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings[original_ratings.userId == (userID)]
    user_full = (user_data.merge(movies, how = 'left', left_on = 'movieId', right_on = 'movieId').
                     sort_values(['rating'], ascending=False)
                 )

    print('User',userID,'has already rated',user_full.shape[0],'movies.')
    print('Recommending the highest', num_recommendations ,'predicted ratings movies not already rated.')
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (movies[~movies['movieId'].isin(user_full['movieId'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'movieId',
               right_on = 'movieId').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )

    return user_full, recommendations

already_rated, predictions = recommend_movies(preds_df, 130, movies, ratings, 10)

User 130 has already rated 28 movies.
Recommending the highest 10 predicted ratings movies not already rated.


In [6]:
already_rated.head(10)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
16,130,318,5.0,832589583,"Shawshank Redemption, The (1994)",Crime|Drama
1,130,110,5.0,832589660,Braveheart (1995),Action|Drama|War
11,130,292,5.0,832589635,Outbreak (1995),Action|Drama|Sci-Fi|Thriller
8,130,225,5.0,832589660,Disclosure (1994),Drama|Thriller
9,130,231,4.0,832589583,Dumb & Dumber (Dumb and Dumber) (1994),Adventure|Comedy
19,130,344,4.0,832589546,Ace Ventura: Pet Detective (1994),Comedy
22,130,410,4.0,832589660,Addams Family Values (1993),Children|Comedy|Fantasy
12,130,296,4.0,832589516,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
10,130,253,4.0,832589635,Interview with the Vampire: The Vampire Chroni...,Drama|Horror
18,130,339,4.0,832589610,While You Were Sleeping (1995),Comedy|Romance


In [7]:
predictions

Unnamed: 0,movieId,title,genres
293,356,Forrest Gump (1994),Comedy|Drama|Romance|War
374,457,"Fugitive, The (1993)",Thriller
394,480,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller
482,589,Terminator 2: Judgment Day (1991),Action|Sci-Fi
371,454,"Firm, The (1993)",Drama|Thriller
483,593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller
301,364,"Lion King, The (1994)",Adventure|Animation|Children|Drama|Musical|IMAX
313,377,Speed (1994),Action|Romance|Thriller
42,47,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
8,10,GoldenEye (1995),Action|Adventure|Thriller
