In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
ratings = pd.read_csv('ratings.csv')  #Ratings.csv contains userID, movieID, movie title, rating by each user to each movies if rated and timestamp.
movies = pd.read_csv('movies.csv')    #Movies.csv contains the movieID and title of the cooresponding movie
ratings = pd.merge(movies,ratings).drop(['genres','timestamp'],axis=1)  #We are using item to item collaborative filtering approach so we only require userID, movie title and rating to each (user, movie) pair
print(ratings.shape)
ratings.head()

(100836, 4)


Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),1,4.0
1,1,Toy Story (1995),5,4.0
2,1,Toy Story (1995),7,4.5
3,1,Toy Story (1995),15,2.5
4,1,Toy Story (1995),17,4.5


In [4]:
#defining utility matrix
userRatings = ratings.pivot_table(index=['userId'],columns=['title'],values='rating')
print("Before: ",userRatings.shape)

#dropping those movies which got less than 10 ratings and filling nan values by zero
userRatings = userRatings.dropna(thresh=10, axis=1).fillna(0,axis=1)
print("After: ",userRatings.shape)
userRatings

Before:  (610, 9719)
After:  (610, 2269)


title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
R = np.where(userRatings>=1,1,userRatings)
R = R.astype(float)
def standardize(row):
    new_row = (row-row.mean())/(row.max()-row.min())
    return new_row

user_std_ratings = R*userRatings.apply(standardize)
user_std_ratings.head(10)

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.773279
2,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,0.532623,-0.0,-0.0,-0.0,-0.0,-0.0
3,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
4,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.922459,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
5,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
6,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
7,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
8,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
9,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.222746,-0.0
10,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0


In [7]:
#Calculating similarity matrix
cos_similarity = cosine_similarity(user_std_ratings.T)

In [8]:
cos_similarity

array([[1.        , 0.099362  , 0.        , ..., 0.20812868, 0.08907783,
        0.36832822],
       [0.099362  , 1.        , 0.17066778, ..., 0.09528656, 0.27760952,
        0.16793361],
       [0.        , 0.17066778, 1.        , ..., 0.20117314, 0.12031877,
        0.03152171],
       ...,
       [0.20812868, 0.09528656, 0.20117314, ..., 1.        , 0.19254178,
        0.16825648],
       [0.08907783, 0.27760952, 0.12031877, ..., 0.19254178, 1.        ,
        0.0993799 ],
       [0.36832822, 0.16793361, 0.03152171, ..., 0.16825648, 0.0993799 ,
        1.        ]])

In [9]:
#It is easy to work with data frames, so converting above matrix into a data frame.
similarity_df = pd.DataFrame(cos_similarity,index = userRatings.columns, columns = userRatings.columns)
similarity_df

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"'burbs, The (1989)",1.000000,0.099362,0.000000,0.175586,0.035771,0.121634,0.254107,0.078088,0.033189,0.033597,...,0.044805,0.054712,0.151827,0.182024,0.141339,0.090500,0.037920,0.208129,0.089078,0.368328
(500) Days of Summer (2009),0.099362,1.000000,0.170668,0.323552,0.224416,0.205295,0.197406,0.224471,0.167848,0.230498,...,0.398158,0.206225,0.103567,0.449989,0.399738,0.305062,0.258495,0.095287,0.277610,0.167934
10 Cloverfield Lane (2016),0.000000,0.170668,1.000000,0.036648,0.133145,0.045701,0.020693,0.074532,0.000000,0.291757,...,0.263504,0.118474,0.000000,0.299634,0.271077,0.227979,0.338834,0.201173,0.120319,0.031522
10 Things I Hate About You (1999),0.175586,0.323552,0.036648,1.000000,0.278207,0.277931,0.268958,0.093686,0.128758,0.087631,...,0.280563,0.142068,0.165959,0.150263,0.226764,0.338206,0.109814,0.162568,0.175047,0.154228
"10,000 BC (2008)",0.035771,0.224416,0.133145,0.278207,1.000000,0.264008,0.154820,0.102699,0.000000,0.113325,...,0.281425,0.108533,0.116632,0.211487,0.276865,0.272469,0.126041,0.113596,0.225395,0.112663
101 Dalmatians (1996),0.121634,0.205295,0.045701,0.277931,0.264008,1.000000,0.332076,0.188145,0.110619,0.071558,...,0.155897,0.113337,0.131495,0.125835,0.179977,0.244123,0.109538,0.094491,0.193499,0.123687
101 Dalmatians (One Hundred and One Dalmatians) (1961),0.254107,0.197406,0.020693,0.268958,0.154820,0.332076,1.000000,0.202177,0.059597,0.000000,...,0.161017,0.159587,0.088598,0.141409,0.235709,0.326781,0.129825,0.130266,0.280282,0.213268
12 Angry Men (1957),0.078088,0.224471,0.074532,0.093686,0.102699,0.188145,0.202177,1.000000,0.173039,0.105614,...,0.151929,0.071594,0.121800,0.295854,0.218276,0.195457,0.119620,0.050123,0.125613,0.154232
12 Years a Slave (2013),0.033189,0.167848,0.000000,0.128758,0.000000,0.110619,0.059597,0.173039,1.000000,0.270673,...,0.050606,0.060878,0.037385,0.219911,0.144687,0.061139,0.095398,0.031359,0.066663,0.035047
127 Hours (2010),0.033597,0.230498,0.291757,0.087631,0.113325,0.071558,0.000000,0.105614,0.270673,1.000000,...,0.247614,0.174570,0.037844,0.391118,0.235123,0.135138,0.254815,0.155700,0.179096,0.035477


In [14]:
#retreiving the list of similar movies with respect to rating provided to a given movie
def get_similar1(movie_name,rating):
    similar_ratings = similarity_df[movie_name]*(rating-2.5)  #instead of mean we used median value (2.5) to avoid zero row calculations
    #print(type(similar_ratings))
    return similar_ratings

In [11]:
#Suppose we have a user who loves romantic movies and not like other genre movies, let say that user is defined as romantic_lover
#and the list of movies he rated with rating is given as input and we will recommend movies on the basis of data provided

romantic_lover = [("(500) Days of Summer (2009)",5),("Alice in Wonderland (2010)",3),("Aliens (1986)",1),("2001: A Space Odyssey (1968)",2)]
similar_movies = pd.DataFrame()
for movie,rating in romantic_lover:
    similar_movies = similar_movies.append(get_similar1(movie,rating))

similar_movies.head()

Unnamed: 0,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
(500) Days of Summer (2009),0.248405,2.5,0.426669,0.80888,0.561039,0.513238,0.493516,0.561178,0.419619,0.576244,...,0.995394,0.515562,0.258918,1.124972,0.999346,0.762655,0.646237,0.238216,0.694024,0.419834
Alice in Wonderland (2010),0.0,0.216912,0.136714,0.133869,0.105244,0.107055,0.095584,0.122285,0.095388,0.098922,...,0.170824,0.09662,0.026722,0.191476,0.187949,0.142461,0.091164,0.043384,0.097315,0.046683
Aliens (1986),-0.373432,-0.21348,-0.288068,-0.276852,-0.12714,-0.221626,-0.308668,-0.429224,-0.116025,-0.202605,...,-0.240618,-0.238114,-0.429378,-0.396864,-0.411659,-0.36818,-0.272131,-0.482965,-0.334748,-0.322116
2001: A Space Odyssey (1968),-0.123797,-0.098016,-0.073155,-0.090142,-0.048821,-0.100418,-0.093898,-0.140495,-0.086354,-0.098216,...,-0.101539,-0.073687,-0.149074,-0.203361,-0.14,-0.116809,-0.087871,-0.174769,-0.1098,-0.118538


In [12]:
#Here we recommend movies to romantic_lover user, where the movie at the top position is most likely to be like by him and the movie at least position is most likely to be disliked by him most
similar_movies.sum().sort_values(ascending=False).head(20)

(500) Days of Summer (2009)                2.405416
Alice in Wonderland (2010)                 1.324862
Silver Linings Playbook (2012)             1.179835
Adventureland (2009)                       1.055558
About Time (2013)                          1.039252
Yes Man (2008)                             1.038109
Marley & Me (2008)                         1.035291
50/50 (2011)                               1.026058
Crazy, Stupid, Love. (2011)                1.010425
Help, The (2011)                           1.003605
Up in the Air (2009)                       0.985961
Friends with Benefits (2011)               0.978460
Holiday, The (2006)                        0.977706
Secret Life of Walter Mitty, The (2013)    0.947109
Notebook, The (2004)                       0.940766
Easy A (2010)                              0.934985
Ugly Truth, The (2009)                     0.920143
Perks of Being a Wallflower, The (2012)    0.917591
Step Brothers (2008)                       0.883775
Bridesmaids 

In [15]:
action_lover = [("Amazing Spider-Man, The (2012)",5),("Mission: Impossible III (2006)",4),("Toy Story 3 (2010)",2),("2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",4)]
similar_movies = pd.DataFrame()
for movie,rating in action_lover:
    similar_movies = similar_movies.append(get_similar1(movie,rating))

similar_movies.head(10)

Unnamed: 0,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
"Amazing Spider-Man, The (2012)",0.279727,0.6513,0.437766,0.389622,0.755681,0.364605,0.50056,0.312209,0.351981,0.49435,...,0.929344,0.645107,0.293068,0.740262,1.103359,0.501231,1.002531,0.151524,0.557347,0.445585
Mission: Impossible III (2006),0.177894,0.290724,0.200142,0.371624,0.347944,0.323761,0.422709,0.233642,0.312764,0.335752,...,0.519024,0.452253,0.105923,0.481025,0.554682,0.524177,0.314923,0.221963,0.587198,0.218402
Toy Story 3 (2010),-0.046272,-0.231686,-0.062189,-0.087372,-0.10901,-0.101666,-0.116803,-0.13751,-0.11881,-0.128661,...,-0.174922,-0.110148,-0.026879,-0.17495,-0.213187,-0.163326,-0.205654,-0.022666,-0.179583,-0.079302
"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",0.161716,0.411171,0.179474,0.411075,0.603061,0.387074,0.462496,0.128811,0.103179,0.342796,...,0.517197,0.381056,0.080493,0.410914,0.438245,0.627326,0.386496,0.133379,0.815794,0.147861


In [16]:
similar_movies.sum().sort_values(ascending=False).head(20)

Amazing Spider-Man, The (2012)                           3.290386
Mission: Impossible III (2006)                           2.940596
2 Fast 2 Furious (Fast and the Furious 2, The) (2003)    2.775387
Over the Hedge (2006)                                    2.300411
Hellboy (2004)                                           2.259184
Hancock (2008)                                           2.256348
Mission: Impossible - Ghost Protocol (2011)              2.240418
Crank (2006)                                             2.228974
Jumper (2008)                                            2.216287
The Amazing Spider-Man 2 (2014)                          2.211730
X-Men: The Last Stand (2006)                             2.201531
Fantastic Four (2005)                                    2.198163
Chronicles of Riddick, The (2004)                        2.196654
Snakes on a Plane (2006)                                 2.187058
Tron: Legacy (2010)                                      2.182092
Incredible

In [17]:
corrMatrix = userRatings.corr(method='pearson')
corrMatrix.head(100)

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"'burbs, The (1989)",1.000000,0.063117,-0.023768,0.143482,0.011998,0.087931,0.224052,0.034223,0.009277,0.008331,...,0.017477,0.032470,0.134701,0.153158,0.101301,0.049897,0.003233,0.187953,0.062174,0.353194
(500) Days of Summer (2009),0.063117,1.000000,0.142471,0.273989,0.193960,0.148903,0.142141,0.159756,0.135486,0.200135,...,0.374515,0.178655,0.068407,0.414585,0.355723,0.252226,0.216007,0.053614,0.241092,0.125905
10 Cloverfield Lane (2016),-0.023768,0.142471,1.000000,-0.005799,0.112396,0.006139,-0.016835,0.031704,-0.024275,0.272943,...,0.242663,0.099059,-0.023477,0.272347,0.241751,0.195054,0.319371,0.177846,0.096638,0.002733
10 Things I Hate About You (1999),0.143482,0.273989,-0.005799,1.000000,0.244670,0.223481,0.211473,0.011784,0.091964,0.043383,...,0.243118,0.104858,0.132460,0.091853,0.158637,0.281934,0.050031,0.121029,0.130813,0.110612
"10,000 BC (2008)",0.011998,0.193960,0.112396,0.244670,1.000000,0.234459,0.119132,0.059187,-0.025882,0.089328,...,0.260261,0.087592,0.094913,0.184521,0.242299,0.240231,0.094773,0.088045,0.203002,0.083518
101 Dalmatians (1996),0.087931,0.148903,0.006139,0.223481,0.234459,1.000000,0.285112,0.119843,0.072399,0.029967,...,0.114968,0.077232,0.096294,0.067134,0.113224,0.184324,0.054024,0.047804,0.156932,0.078734
101 Dalmatians (One Hundred and One Dalmatians) (1961),0.224052,0.142141,-0.016835,0.211473,0.119132,0.285112,1.000000,0.134037,0.017264,-0.046277,...,0.120302,0.125816,0.049818,0.083650,0.171654,0.274260,0.077594,0.085606,0.248820,0.171118
12 Angry Men (1957),0.034223,0.159756,0.031704,0.011784,0.059187,0.119843,0.134037,1.000000,0.132979,0.058862,...,0.104518,0.028415,0.079905,0.241435,0.144652,0.122107,0.056742,-0.001708,0.074306,0.102744
12 Years a Slave (2013),0.009277,0.135486,-0.024275,0.091964,-0.025882,0.072399,0.017264,0.132979,1.000000,0.249931,...,0.024045,0.038127,0.013786,0.190366,0.104150,0.017351,0.063325,0.002528,0.037469,0.004213
127 Hours (2010),0.008331,0.200135,0.272943,0.043383,0.089328,0.029967,-0.046277,0.058862,0.249931,1.000000,...,0.223135,0.154299,0.012907,0.364841,0.198926,0.091416,0.225747,0.128638,0.153335,0.002912


In [18]:
def get_similar2(movie_name,rating):
    similar_ratings = corrMatrix[movie_name]*(rating-2.5)
    similar_ratings = similar_ratings.sort_values(ascending=False)
    #print(type(similar_ratings))
    return similar_ratings

In [19]:
romantic_lover = [("(500) Days of Summer (2009)",5),("Alice in Wonderland (2010)",3),("Aliens (1986)",1),("2001: A Space Odyssey (1968)",2)]
similar_movies = pd.DataFrame()
for movie,rating in romantic_lover:
    similar_movies = similar_movies.append(get_similar2(movie,rating))

similar_movies.head()

Unnamed: 0,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
(500) Days of Summer (2009),0.157792,2.5,0.356179,0.684973,0.4849,0.372257,0.355353,0.399389,0.338715,0.500338,...,0.936288,0.446637,0.171018,1.036463,0.889309,0.630565,0.540017,0.134034,0.60273,0.314763
Alice in Wonderland (2010),-0.016276,0.203998,0.126834,0.113241,0.092218,0.08579,0.072825,0.097794,0.083822,0.084897,...,0.159907,0.085502,0.011564,0.176888,0.168302,0.12171,0.07259,0.025695,0.081764,0.02754
Aliens (1986),-0.304722,-0.062634,-0.2147,-0.118754,-0.037059,-0.063992,-0.170195,-0.28009,-0.016283,-0.102493,...,-0.147339,-0.162387,-0.368712,-0.281119,-0.263686,-0.228562,-0.144292,-0.410899,-0.242935,-0.23841
2001: A Space Odyssey (1968),-0.102988,-0.056808,-0.049655,-0.042987,-0.021729,-0.055422,-0.051115,-0.097954,-0.061595,-0.070398,...,-0.075325,-0.048607,-0.128795,-0.175166,-0.098088,-0.074205,-0.049626,-0.153017,-0.082048,-0.091432


In [20]:
similar_movies.sum().sort_values(ascending=False).head(20)

(500) Days of Summer (2009)                      2.584556
Alice in Wonderland (2010)                       1.395229
Silver Linings Playbook (2012)                   1.254800
Yes Man (2008)                                   1.116264
Adventureland (2009)                             1.112235
Marley & Me (2008)                               1.108381
About Time (2013)                                1.102192
Crazy, Stupid, Love. (2011)                      1.088757
50/50 (2011)                                     1.086517
Help, The (2011)                                 1.075963
Up in the Air (2009)                             1.053037
Holiday, The (2006)                              1.034470
Friends with Benefits (2011)                     1.030875
Notebook, The (2004)                             1.025880
Easy A (2010)                                    1.015771
Secret Life of Walter Mitty, The (2013)          0.997979
Perks of Being a Wallflower, The (2012)          0.967425
Toy Story 3 (2

In [22]:
action_lover = [("Amazing Spider-Man, The (2012)",5),("Mission: Impossible III (2006)",4),("Toy Story 3 (2010)",2),("2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",4)]
similar_movies = pd.DataFrame()
for movie,rating in action_lover:
    similar_movies = similar_movies.append(get_similar2(movie,rating))

similar_movies.head(10)


Unnamed: 0,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
"Amazing Spider-Man, The (2012)",0.208546,0.562402,0.374759,0.259505,0.695633,0.240343,0.381986,0.176284,0.30373,0.420922,...,0.876848,0.585878,0.223005,0.657071,1.023717,0.379366,0.932932,0.058779,0.480094,0.359144
Mission: Impossible III (2006),0.134413,0.226793,0.159314,0.305357,0.312814,0.257647,0.360759,0.154621,0.273341,0.293849,...,0.483694,0.419053,0.060353,0.427869,0.495979,0.467937,0.255335,0.170812,0.555271,0.166005
Toy Story 3 (2010),-0.02472,-0.210077,-0.043119,-0.050315,-0.091487,-0.068457,-0.084566,-0.102953,-0.102485,-0.111497,...,-0.157158,-0.09266,-0.004604,-0.149674,-0.18719,-0.13314,-0.186164,0.004871,-0.162235,-0.053808
"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",0.126098,0.362859,0.146045,0.362353,0.575936,0.339292,0.417303,0.057454,0.064218,0.309156,...,0.487745,0.352235,0.04293,0.368328,0.387424,0.588792,0.342994,0.09938,0.805254,0.101007


In [23]:
similar_movies.sum().sort_values(ascending=False).head(20)

Amazing Spider-Man, The (2012)                           3.233134
Mission: Impossible III (2006)                           2.874798
2 Fast 2 Furious (Fast and the Furious 2, The) (2003)    2.701477
Over the Hedge (2006)                                    2.229721
Crank (2006)                                             2.176259
Mission: Impossible - Ghost Protocol (2011)              2.159666
Hancock (2008)                                           2.156098
The Amazing Spider-Man 2 (2014)                          2.153677
Hellboy (2004)                                           2.137518
Snakes on a Plane (2006)                                 2.137396
Jumper (2008)                                            2.129716
Chronicles of Riddick, The (2004)                        2.121689
Tron: Legacy (2010)                                      2.111843
Fantastic Four (2005)                                    2.083022
X-Men: The Last Stand (2006)                             2.077530
Wreck-It R