## Movie Recommendation using Matrix Factorization

In [11]:
# Necessary libraries
import numpy as np
import pandas as pd
# For matrix factorization
from scipy.sparse.linalg import svds

# Load Datasets

In [12]:
# Load ratings dataset
rating = pd.read_csv('/content/drive/MyDrive/MAR6669-data/ratings.csv')
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [13]:
# More than 100,000 ratings
rating.shape

(100836, 4)

In [14]:
# Load movies dataset
movie = pd.read_csv('/content/drive/MyDrive/MAR6669-data/movies.csv')
movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [15]:
# More than 9700 movies
movie.shape

(9742, 3)

In [16]:
# Merge two datasets to have better picture
df = pd.merge(rating, movie, on='movieId')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [17]:
df.shape

(100836, 6)

# Matrix Factorization

In [18]:
# Construct a user-movie matrix
mtrx_df = rating.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
mtrx_df.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
mtrx_df.shape

(610, 9724)

In [20]:
# Center the data
mtrx = mtrx_df.to_numpy()
ratings_mean = np.mean(mtrx, axis = 1)
normalized_mtrx = mtrx - ratings_mean.reshape(-1, 1)

In [21]:
# Matrix factorization by Singular value decomposition
U, sigma, Vt = svds(normalized_mtrx, k = 50)

In [22]:
# Make predictions for any user
sigma = np.diag(sigma)
all_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + ratings_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_predicted_ratings, columns = mtrx_df.columns)

In [23]:
preds_df.shape

(610, 9724)

# Recommendations

In [25]:
user = 1
preds_df.iloc[user].sort_values(ascending=False)[:10]

movieId
79132    2.109064
58559    1.856581
318      1.629821
2959     1.516989
74458    1.462162
68157    1.421954
91529    1.387216
99114    1.289547
48516    1.248550
2571     1.179570
Name: 1, dtype: float64

In [38]:
movie_id = preds_df.iloc[user].sort_values(ascending=False)[:10].index
title = movie[movie['movieId'].isin(movie_id).values][['movieId','title']]
print(title.to_string(index=False))

 movieId                            title
     318 Shawshank Redemption, The (1994)
    2571               Matrix, The (1999)
    2959                Fight Club (1999)
   48516             Departed, The (2006)
   58559          Dark Knight, The (2008)
   68157      Inglourious Basterds (2009)
   74458            Shutter Island (2010)
   79132                 Inception (2010)
   91529    Dark Knight Rises, The (2012)
   99114          Django Unchained (2012)
