# Movie recommendation using **Collaborative Filtering**

In [147]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [148]:
movies = pd.read_csv("movie-lens-dataset/movies.csv")
ratings = pd.read_csv("movie-lens-dataset/ratings.csv")

print("Movies:")
display(movies[:10])
print("User Ratings:")
display(ratings[:10])

Movies:


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


User Ratings:


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
5,1,70,3.0,964982400
6,1,101,5.0,964980868
7,1,110,4.0,964982176
8,1,151,5.0,964984041
9,1,157,5.0,964984100


## *Combine movies and user ratings*

In [149]:
data = pd.merge(movies, ratings, on="movieId")
data = data.pivot_table(index="userId", columns="title", values="rating")
data

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,,,,,,,,,,...,,,,,,,,,,
607,,,,,,,,,,,...,,,,,,,,,,
608,,,,,,,,,,,...,,,,,,4.5,3.5,,,
609,,,,,,,,,,,...,,,,,,,,,,


## *Remove movies with less than 10 user ratings*

In [150]:
data = data.dropna(thresh=10, axis=1).fillna(0)
data

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,4.5,3.5,0.0
609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## *Normalize*

In [151]:
def normalize(row):
    return (row - row.mean()) / (row.max() - row.min())

data_std = data.apply(normalize)
data_std

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,-0.077541,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,-0.067377,-0.062131,-0.04082,-0.027869,-0.027254,0.773279
2,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,-0.077541,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,0.532623,-0.062131,-0.04082,-0.027869,-0.027254,-0.026721
3,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,-0.077541,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,-0.067377,-0.062131,-0.04082,-0.027869,-0.027254,-0.026721
4,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,0.922459,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,-0.067377,-0.062131,-0.04082,-0.027869,-0.027254,-0.026721
5,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,-0.077541,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,-0.067377,-0.062131,-0.04082,-0.027869,-0.027254,-0.026721
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,-0.077541,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,-0.067377,-0.062131,-0.04082,-0.027869,-0.027254,-0.026721
607,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,-0.077541,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,-0.067377,-0.062131,-0.04082,-0.027869,-0.027254,-0.026721
608,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,-0.077541,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,-0.067377,0.537869,-0.04082,0.872131,0.847746,-0.026721
609,-0.017705,-0.050492,-0.016885,-0.062459,-0.018852,-0.047377,-0.049508,-0.077541,-0.021129,-0.022623,...,-0.024262,-0.018852,-0.019508,-0.04623,-0.067377,-0.062131,-0.04082,-0.027869,-0.027254,-0.026721


## *Similarity Matrix*

In [152]:
similarity = pd.DataFrame( cosine_similarity(data_std.T), index=data.columns, columns=data.columns)
similarity

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"'burbs, The (1989)",1.000000,0.063117,-0.023768,0.143482,0.011998,0.087931,0.224052,0.034223,0.009277,0.008331,...,0.017477,0.032470,0.134701,0.153158,0.101301,0.049897,0.003233,0.187953,0.062174,0.353194
(500) Days of Summer (2009),0.063117,1.000000,0.142471,0.273989,0.193960,0.148903,0.142141,0.159756,0.135486,0.200135,...,0.374515,0.178655,0.068407,0.414585,0.355723,0.252226,0.216007,0.053614,0.241092,0.125905
10 Cloverfield Lane (2016),-0.023768,0.142471,1.000000,-0.005799,0.112396,0.006139,-0.016835,0.031704,-0.024275,0.272943,...,0.242663,0.099059,-0.023477,0.272347,0.241751,0.195054,0.319371,0.177846,0.096638,0.002733
10 Things I Hate About You (1999),0.143482,0.273989,-0.005799,1.000000,0.244670,0.223481,0.211473,0.011784,0.091964,0.043383,...,0.243118,0.104858,0.132460,0.091853,0.158637,0.281934,0.050031,0.121029,0.130813,0.110612
"10,000 BC (2008)",0.011998,0.193960,0.112396,0.244670,1.000000,0.234459,0.119132,0.059187,-0.025882,0.089328,...,0.260261,0.087592,0.094913,0.184521,0.242299,0.240231,0.094773,0.088045,0.203002,0.083518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zoolander (2001),0.049897,0.252226,0.195054,0.281934,0.240231,0.184324,0.274260,0.122107,0.017351,0.091416,...,0.304364,0.243820,-0.006269,0.242033,0.299522,1.000000,0.108147,0.097147,0.338034,0.109455
Zootopia (2016),0.003233,0.216007,0.319371,0.050031,0.094773,0.054024,0.077594,0.056742,0.063325,0.225747,...,0.286213,0.156603,0.011418,0.214385,0.298504,0.108147,1.000000,0.046885,0.200762,0.020595
eXistenZ (1999),0.187953,0.053614,0.177846,0.121029,0.088045,0.047804,0.085606,-0.001708,0.002528,0.128638,...,0.088202,0.028566,0.167541,0.145741,0.068763,0.097147,0.046885,1.000000,0.163022,0.138611
xXx (2002),0.062174,0.241092,0.096638,0.130813,0.203002,0.156932,0.248820,0.074306,0.037469,0.153335,...,0.271180,0.193624,0.080585,0.209840,0.203285,0.338034,0.200762,0.163022,1.000000,0.065673


## *Get similarity score of each movie based on movie name and given user ratings*

In [153]:
def get_similar_movies(movie_name, user_rating):
    similar_score = similarity[movie_name] * (user_rating-2.5)
    similar_score = pd.DataFrame(similar_score).T
    similar_score = np.array(similar_score)
    return similar_score.reshape(-1)

print( get_similar_movies("Zootopia (2016)", 1.5) )

[-0.00323263 -0.21600662 -0.3193711  ... -0.04688521 -0.20076169
 -0.02059522]


## *Make prediction based on User ratings*

In [154]:
user = [
    ("Good Will Hunting (1997)", 4.0),
    ("Godfather, The (1972)", 5.0),
    ("Serpico (1973)", 4.5),
    ("Dark Knight, The (2008)", 4.5),
    ("Jumanji (1995)", 3.5),
    ("Fight Club (1999)", 4.0),
    ("Matrix, The (1999)", 4.5),
    ("Kung Fu Panda (2008)", 5.0),
    ("Shutter Island (2010)", 4.5),
    ("Great Gatsby, The (2013)", 3.0),
    ("The Machinist (2004)", 3.0),
    ("Django Unchained (2012)", 4.5),
]

In [155]:
similar_movies = pd.DataFrame(columns=data.columns)

for i, (movie_name, user_rating) in enumerate(user):
    similar_movies.loc[i] = get_similar_movies(movie_name, user_rating)

similar_movies.sum().sort_values(ascending=False)[:25]

title
Dark Knight, The (2008)                                          7.454002
Fight Club (1999)                                                7.197354
Inception (2010)                                                 7.031055
Shutter Island (2010)                                            6.880304
Django Unchained (2012)                                          6.817600
Inglourious Basterds (2009)                                      6.756364
Departed, The (2006)                                             6.659953
Hangover, The (2009)                                             6.659583
Matrix, The (1999)                                               6.634945
Kill Bill: Vol. 1 (2003)                                         6.601387
Godfather, The (1972)                                            6.463206
Batman Begins (2005)                                             6.417965
Sherlock Holmes (2009)                                           6.372225
Iron Man (2008)                 