# Movielens Recommendations
By Tanay Trivedi and Jonathan Bair

In [1]:
import numpy as np
import pandas as pd

rat = pd.read_csv('ratings.csv', sep='\t')[['movie_id','user_id', 'timestamp','rating']]
user = pd.read_csv('users.csv', sep='\t')[['age_desc', 'occ_desc','user_id', 'gender', 'zipcode']]
movie = pd.read_csv('movies.csv', sep='\t')[['movie_id', 'title', 'genres']]

In [2]:
print("Number of users: "+str(rat.user_id.unique().shape[0]))
print("Number of movies: "+str(rat.movie_id.unique().shape[0]))

Number of users: 6040
Number of movies: 3706


In [3]:
rat

Unnamed: 0,movie_id,user_id,timestamp,rating
0,1193,1,978300760,5
1,661,1,978302109,3
2,914,1,978301968,3
3,3408,1,978300275,4
4,2355,1,978824291,5
5,1197,1,978302268,3
6,1287,1,978302039,5
7,2804,1,978300719,5
8,594,1,978302268,4
9,919,1,978301368,4


In [4]:
Rat = rat.pivot(index = 'user_id', columns ='movie_id', values = 'rating')
Rat=Rat.fillna(0)
Rat.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
user_mean=Rat.mean(axis=1)
Rat_z = Rat.subtract(user_mean,axis=0)

# SVD

In [6]:
from scipy.sparse.linalg import svds
U, S, V = svds(Rat_z, k = 50)


In [7]:
Sigma = np.diag(S)

In [9]:
predicted_ratings = np.dot(np.dot(U, Sigma), V) + user_mean.values.reshape(-1, 1)

In [11]:
preds = pd.DataFrame(predicted_ratings, columns = Rat.columns)
preds.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
0,4.288861,0.143055,-0.19508,-0.018843,0.012232,-0.176604,-0.07412,0.141358,-0.059553,-0.19595,...,0.027807,0.00164,0.026395,-0.022024,-0.085415,0.403529,0.105579,0.031912,0.05045,0.08891
1,0.744716,0.169659,0.335418,0.000758,0.022475,1.35305,0.051426,0.071258,0.161601,1.567246,...,-0.056502,-0.013733,-0.01058,0.062576,-0.016248,0.15579,-0.418737,-0.101102,-0.054098,-0.140188
2,1.818824,0.456136,0.090978,-0.043037,-0.025694,-0.158617,-0.131778,0.098977,0.030551,0.73547,...,0.040481,-0.005301,0.012832,0.029349,0.020866,0.121532,0.076205,0.012345,0.015148,-0.109956
3,0.408057,-0.07296,0.039642,0.089363,0.04195,0.237753,-0.049426,0.009467,0.045469,-0.11137,...,0.008571,-0.005425,-0.0085,-0.003417,-0.083982,0.094512,0.057557,-0.02605,0.014841,-0.034224
4,1.574272,0.021239,-0.0513,0.246884,-0.032406,1.552281,-0.19963,-0.01492,-0.060498,0.450512,...,0.110151,0.04601,0.006934,-0.01594,-0.05008,-0.052539,0.507189,0.03383,0.125706,0.199244


In [12]:
def recommend(userID):
    sorted_preds = preds.iloc[userID - 1].sort_values(ascending=False) 
    user_data = rat[rat.user_id == (userID)]
    user_full = (user_data.merge(movie, how = 'left', left_on = 'movie_id', right_on = 'movie_id').
                     sort_values(['rating'], ascending=False)
                 )
    recommendations = (movie[~movie['movie_id'].isin(user_full['movie_id'])].
         merge(pd.DataFrame(sorted_preds).reset_index(), how = 'left',
               left_on = 'movie_id',
               right_on = 'movie_id').
         rename(columns = {(userID-1): 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:, :-1]
                      )

    return recommendations

In [15]:
predictions = recommend(1024)

In [16]:
predictions

Unnamed: 0,movie_id,title,genres
1250,1296,"Room with a View, A (1986)",Drama|Romance
349,357,Four Weddings and a Funeral (1994),Comedy|Romance
36,39,Clueless (1995),Comedy|Romance
895,919,"Wizard of Oz, The (1939)",Adventure|Children's|Drama|Musical
1139,1172,Cinema Paradiso (1988),Comedy|Drama|Romance
2141,2248,Say Anything... (1989),Comedy|Drama|Romance
1345,1393,Jerry Maguire (1996),Drama|Romance
1963,2067,Doctor Zhivago (1965),Drama|Romance|War
892,916,Roman Holiday (1953),Comedy|Romance
55,58,"Postino, Il (The Postman) (1994)",Drama|Romance
