In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics.pairwise import cosine_similarity
import math
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import imshow
from matplotlib.pyplot import figure
from sklearn.decomposition import TruncatedSVD
from scipy.linalg import svd
from scipy.linalg import sqrtm
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler
from scipy.sparse import csr_matrix
import itertools
import warnings
warnings.simplefilter("ignore")

In [3]:
rating = pd.read_csv('u.data', sep = '\t', header = None, names = ['user_id', 'movie_id', 'rating', 'timestamp'], encoding = 'utf-8')
user = pd.read_csv('u.user', sep = '|', header = None, names = ['user_id', 'age', 'gender', 'occupation', 'zip_code'], encoding = 'utf-8')
movie = pd.read_csv('u.item', sep = '|', encoding="iso-8859-1", header = None, names = ["movie_id", "movie_title", "release_date", "video_releasedate", "IMDbURL", "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"])

In [4]:
rating.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
movie.head()

Unnamed: 0,movie_id,movie_title,release_date,video_releasedate,IMDbURL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [6]:
user.head()

Unnamed: 0,user_id,age,gender,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [7]:
data = rating.merge(movie, on = 'movie_id')
data.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,movie_title,release_date,video_releasedate,IMDbURL,unknown,Action,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,196,242,3,881250949,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0
1,63,242,3,875747190,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0
2,226,242,5,883888671,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0
3,154,242,3,879138235,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0
4,306,242,5,876503793,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
num_ratings = data.groupby('movie_id').agg({'rating':'count'}).reset_index()
num_ratings = num_ratings.rename(columns = {'rating':'num_ratings'})
num_ratings.head()

Unnamed: 0,movie_id,num_ratings
0,1,452
1,2,131
2,3,90
3,4,209
4,5,86


In [9]:
data = data.merge(num_ratings, on = 'movie_id')
data.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,movie_title,release_date,video_releasedate,IMDbURL,unknown,Action,...,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,num_ratings
0,196,242,3,881250949,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,117
1,63,242,3,875747190,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,117
2,226,242,5,883888671,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,117
3,154,242,3,879138235,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,117
4,306,242,5,876503793,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,117


# Non-Personalized / Content Based Recommender Systems 

In [10]:
matrix = data.pivot(index = 'user_id', columns = 'movie_id', values = 'rating')
matrix.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,


## Using Correlation Values (Similar Movies/More Like This Section)

In [11]:
def corr_recommendations(title, num_ratings = 50, num_recom = 6):
    movie_id = data[data['movie_title'] == title]['movie_id'].iloc[0]
    correlation = matrix.corrwith(matrix.loc[:, movie_id])
    df_corr = pd.DataFrame(correlation, columns=['correlation']).reset_index()
    df_corr.dropna(inplace=True)
    
    similar_movie = pd.merge(left = df_corr, right = data, on='movie_id')[['movie_title', 'correlation', 'num_ratings']].drop_duplicates().reset_index(drop=True)
    final_list = similar_movie[similar_movie['num_ratings'] > num_ratings].sort_values(by='correlation', ascending=False)
    return final_list['movie_title'].head(num_recom)[1:num_recom]

In [12]:
corr_recommendations('Alice in Wonderland (1951)')

886    Beautician and the Beast, The (1997)
820                        Soul Food (1997)
826                          Flubber (1997)
388                   Close Shave, A (1995)
600               Christmas Carol, A (1938)
Name: movie_title, dtype: object

## Using Cosine Similarity (Similar Movies/More Like This Section)

In [13]:
movie_updated = movie.drop(columns = ['movie_title', 'release_date', 'video_releasedate', 'IMDbURL']).set_index('movie_id')
movie_updated.head()

Unnamed: 0_level_0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


In [14]:
similarity_array = cosine_similarity(movie_updated)
similarity_dataframe = pd.DataFrame(similarity_array)
similarity_dataframe.index = movie['movie_title']
similarity_dataframe.columns = movie['movie_title']
similarity_dataframe.head()

movie_title,Toy Story (1995),GoldenEye (1995),Four Rooms (1995),Get Shorty (1995),Copycat (1995),Shanghai Triad (Yao a yao yao dao waipo qiao) (1995),Twelve Monkeys (1995),Babe (1995),Dead Man Walking (1995),Richard III (1995),...,Mirage (1995),Mamma Roma (1962),"Sunchaser, The (1996)","War at Home, The (1996)",Sweet Nothing (1995),Mat' i syn (1997),B. Monkey (1998),Sliding Doors (1998),You So Crazy (1994),Scream of Stone (Schrei aus Stein) (1991)
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Toy Story (1995),1.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.666667,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0
GoldenEye (1995),0.0,1.0,0.57735,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,...,0.816497,0.0,0.0,0.0,0.0,0.0,0.408248,0.0,0.0,0.0
Four Rooms (1995),0.0,0.57735,1.0,0.0,0.57735,0.0,0.0,0.0,0.0,0.0,...,0.707107,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.0
Get Shorty (1995),0.333333,0.333333,0.0,1.0,0.333333,0.57735,0.408248,0.666667,0.57735,0.408248,...,0.408248,0.57735,0.57735,0.57735,0.57735,0.57735,0.0,0.408248,0.57735,0.57735
Copycat (1995),0.0,0.333333,0.57735,0.333333,1.0,0.57735,0.408248,0.333333,0.57735,0.408248,...,0.408248,0.57735,0.57735,0.57735,0.57735,0.57735,0.408248,0.408248,0.0,0.57735


In [15]:
def sim_recommendations(title, num_ratings = 50, num_recom = 6):
    df_similar = similarity_dataframe[[title]]
    similar_movie = df_similar.merge(data, on = 'movie_title')[['movie_title', title, 'movie_id', 'num_ratings']].sort_values(by = title, ascending = False).drop_duplicates()
    final_list = similar_movie[similar_movie['num_ratings'] >= num_ratings]
    return final_list['movie_title'].head(num_recom)[1:num_recom]

In [16]:
sim_recommendations('Muppet Treasure Island (1996)')

57118    Three Musketeers, The (1993)
27640      Blues Brothers, The (1980)
16218                Rock, The (1996)
87656                 Daylight (1996)
16573                  Twister (1996)
Name: movie_title, dtype: object

## Using TFIDF Approach (Recommended For You Section)

In [17]:
movie_updated.head()

Unnamed: 0_level_0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


In [18]:
movie_normalized = movie_updated.div(np.sqrt(movie_updated.sum(axis = 1)), axis = 0)
movie_normalized.head()

Unnamed: 0_level_0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0.0,0.0,0.0,0.57735,0.57735,0.57735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.57735,0.57735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.57735,0.0,0.0,0.0,0.57735,0.0,0.0,0.57735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.57735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0


In [19]:
matrix_updated = matrix.fillna(0)
matrix_normalized = matrix_updated.div(np.sqrt(matrix_updated.sum(axis = 1)), axis = 0)
matrix_normalized.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.159556,0.095734,0.127645,0.095734,0.095734,0.159556,0.127645,0.031911,0.159556,0.095734,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.263752,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.131876,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.178351,0.133763,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
user_genre_score = matrix_normalized.dot(movie_normalized)
user_genre_score.head()

Unnamed: 0_level_0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0.127645,4.688259,2.145085,0.830173,1.056559,7.489825,1.715348,0.765871,10.475527,0.121563,0.112823,0.90872,0.697981,0.373119,3.481871,3.284213,3.826497,1.683116,0.470107
2,0.0,1.486,0.465819,0.152277,0.499612,3.293707,1.300347,0.0,6.907221,0.114208,0.317123,0.238783,0.139876,0.502535,2.871799,0.533402,1.750185,0.39297,0.0
3,0.0,1.813782,0.63877,0.0,0.0,2.01436,1.441406,0.406894,3.894338,0.0,0.222331,0.669228,0.209055,1.720142,0.788251,0.989464,2.628045,0.632475,0.0
4,0.0,1.694411,0.721451,0.0,0.0,1.610338,1.190176,0.49029,1.96519,0.0,0.0,0.27735,0.283069,1.286404,0.713039,1.264255,2.84055,0.415381,0.0
5,0.178351,4.683448,2.60134,1.385723,1.93841,8.21626,0.869889,0.0,2.232764,0.111469,0.157642,2.560316,1.055043,0.217889,1.06379,2.972995,1.421418,1.070362,0.128714


In [21]:
inver_doc_freq = 1/np.log(movie_updated.sum(axis = 0))
inver_doc_freq

unknown        1.442695
Action         0.180981
Adventure      0.203862
Animation      0.267546
Children's     0.208159
Comedy         0.160654
Crime          0.213158
Documentary    0.255622
Drama          0.151833
Fantasy        0.323515
Film-Noir      0.314658
Horror         0.221151
Musical        0.248425
Mystery        0.243257
Romance        0.181508
Sci-Fi         0.216679
Thriller       0.180981
War            0.234594
Western        0.303413
dtype: float64

In [22]:
user_genre_score.head()

Unnamed: 0_level_0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0.127645,4.688259,2.145085,0.830173,1.056559,7.489825,1.715348,0.765871,10.475527,0.121563,0.112823,0.90872,0.697981,0.373119,3.481871,3.284213,3.826497,1.683116,0.470107
2,0.0,1.486,0.465819,0.152277,0.499612,3.293707,1.300347,0.0,6.907221,0.114208,0.317123,0.238783,0.139876,0.502535,2.871799,0.533402,1.750185,0.39297,0.0
3,0.0,1.813782,0.63877,0.0,0.0,2.01436,1.441406,0.406894,3.894338,0.0,0.222331,0.669228,0.209055,1.720142,0.788251,0.989464,2.628045,0.632475,0.0
4,0.0,1.694411,0.721451,0.0,0.0,1.610338,1.190176,0.49029,1.96519,0.0,0.0,0.27735,0.283069,1.286404,0.713039,1.264255,2.84055,0.415381,0.0
5,0.178351,4.683448,2.60134,1.385723,1.93841,8.21626,0.869889,0.0,2.232764,0.111469,0.157642,2.560316,1.055043,0.217889,1.06379,2.972995,1.421418,1.070362,0.128714


In [23]:
movie_normalized.head()

Unnamed: 0_level_0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0.0,0.0,0.0,0.57735,0.57735,0.57735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.57735,0.57735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.57735,0.0,0.0,0.0,0.57735,0.0,0.0,0.57735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.57735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0


In [24]:
final_user_movie_matrix = ((user_genre_score*inver_doc_freq)[movie_normalized.columns]).dot(movie_normalized.T)
final_user_movie_matrix.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.949921,1.142177,0.692522,2.102876,1.529226,1.590534,1.627869,1.739981,1.590534,1.403878,...,1.089656,1.590534,1.590534,1.590534,1.590534,1.590534,0.93657,1.571561,1.20327,1.590534
2,0.389069,0.392973,0.31675,1.066268,0.948399,1.048746,0.823301,0.971041,1.048746,0.806762,...,0.414143,1.048746,1.048746,1.048746,1.048746,1.048746,0.592559,1.110159,0.529147,1.048746
3,0.186839,0.539306,0.475625,0.717741,0.793373,0.59129,0.569706,0.528221,0.59129,0.523022,...,0.568432,0.59129,0.59129,0.59129,0.59129,0.59129,0.437487,0.519274,0.323615,0.59129
4,0.149365,0.558769,0.514085,0.498683,0.615549,0.298381,0.404691,0.321635,0.298381,0.279892,...,0.580351,0.298381,0.298381,0.298381,0.298381,0.298381,0.455028,0.302503,0.258707,0.298381
5,1.209097,0.94407,0.257249,1.447184,0.451304,0.339008,0.695223,1.190773,0.339008,0.41727,...,0.781256,0.339008,0.339008,0.339008,0.339008,0.339008,0.318436,0.376248,1.319975,0.339008


In [25]:
def tfidf_recommendations(user_id, num_ratings = 50, num_recom = 6):
    df_tfidf = final_user_movie_matrix.loc[user_id]
    df_tfidf_score = df_tfidf.to_frame(name = 'score').reset_index()
    tfidf_movie = df_tfidf_score.merge(data, on = 'movie_id')[['movie_title', 'score', 'num_ratings']].sort_values(by = 'score', ascending = False).drop_duplicates()
    final_list = tfidf_movie[tfidf_movie['num_ratings'] >= num_ratings]
    return final_list['movie_title'].head(num_recom)[1:num_recom]

In [26]:
tfidf_recommendations(9)

79475    American President, The (1995)
84776           Don Juan DeMarco (1995)
67716                  Manhattan (1979)
67192            Wings of Desire (1987)
23769            Cinema Paradiso (1988)
Name: movie_title, dtype: object