# **Movie Recommender System**


In [1]:
# imports
import pandas as pd


In [2]:
df_movies = pd.read_csv('data/ml-25m/movies.csv')
df_ratings = pd.read_csv('data/ml-25m/ratings.csv')
df_tags = pd.read_csv('data/ml-25m/tags.csv')
df_links = pd.read_csv('data/ml-25m/links.csv')
df_movies_metadata = pd.read_csv('data/movies-metadata.csv')
df_credits = pd.read_csv('data/credits.csv')


In [3]:
df_movies_metadata.head()


Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,status,tagline,title,video,vote_average,vote_count,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path
0,False,/cXQH2u7wUIX1eoIdEj51kHXoWhX.jpg,,1350000,"[{'id': 35, 'name': 'Comedy'}, {'id': 80, 'nam...",http://www.universalstudiosentertainment.com/l...,100,tt0120735,en,"Lock, Stock and Two Smoking Barrels",...,Released,A Disgrace to Criminals Everywhere.,"Lock, Stock and Two Smoking Barrels",False,8.1,5798,,,,
1,False,/cbTvuGya7E1PnL8t95AWzumjqHg.jpg,,0,"[{'id': 18, 'name': 'Drama'}]",,100017,tt0488903,de,Verfolgt,...,Released,,Punish Me,False,4.6,16,,,,
2,False,,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",,100032,tt0099137,en,The Great Los Angeles Earthquake,...,Released,"There is no safe harbor, there is no escape......",The Great Los Angeles Earthquake,False,6.9,13,,,,
3,False,,,0,"[{'id': 27, 'name': 'Horror'}, {'id': 35, 'nam...",,100034,tt0462634,en,The Worst Horror Movie Ever Made,...,Released,,The Worst Horror Movie Ever Made,False,3.0,8,,,,
4,False,,,0,"[{'id': 10402, 'name': 'Music'}]",,100038,,en,Meshuggah - Nothing,...,Released,,Meshuggah - Nothing,False,4.0,2,,,,


In [4]:
df_credits.head()


Unnamed: 0,id,cast,crew
0,100,"[{'adult': False, 'gender': 2, 'id': 973, 'kno...","[{'adult': False, 'gender': 2, 'id': 960, 'kno..."
1,100017,"[{'adult': False, 'gender': 2, 'id': 5202, 'kn...","[{'adult': False, 'gender': 1, 'id': 2338, 'kn..."
2,100032,"[{'adult': False, 'gender': 1, 'id': 87038, 'k...","[{'adult': False, 'gender': 2, 'id': 36116, 'k..."
3,100034,"[{'adult': False, 'gender': 0, 'id': 1022808, ...","[{'adult': False, 'gender': 2, 'id': 99005, 'k..."
4,100038,[],[]


#### Three types of recommender systems

1. Demographic Filtering
2. Content Based Filtering
3. Collaborative Filtering


In [5]:
# join movies metadata and credits
df_movies_info = df_movies_metadata.merge(df_credits, on='id')


In [6]:
df_movies_info.head()


Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,title,video,vote_average,vote_count,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path,cast,crew
0,False,/cXQH2u7wUIX1eoIdEj51kHXoWhX.jpg,,1350000,"[{'id': 35, 'name': 'Comedy'}, {'id': 80, 'nam...",http://www.universalstudiosentertainment.com/l...,100,tt0120735,en,"Lock, Stock and Two Smoking Barrels",...,"Lock, Stock and Two Smoking Barrels",False,8.1,5798,,,,,"[{'adult': False, 'gender': 2, 'id': 973, 'kno...","[{'adult': False, 'gender': 2, 'id': 960, 'kno..."
1,False,/cbTvuGya7E1PnL8t95AWzumjqHg.jpg,,0,"[{'id': 18, 'name': 'Drama'}]",,100017,tt0488903,de,Verfolgt,...,Punish Me,False,4.6,16,,,,,"[{'adult': False, 'gender': 2, 'id': 5202, 'kn...","[{'adult': False, 'gender': 1, 'id': 2338, 'kn..."
2,False,,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",,100032,tt0099137,en,The Great Los Angeles Earthquake,...,The Great Los Angeles Earthquake,False,6.9,13,,,,,"[{'adult': False, 'gender': 1, 'id': 87038, 'k...","[{'adult': False, 'gender': 2, 'id': 36116, 'k..."
3,False,,,0,"[{'id': 27, 'name': 'Horror'}, {'id': 35, 'nam...",,100034,tt0462634,en,The Worst Horror Movie Ever Made,...,The Worst Horror Movie Ever Made,False,3.0,8,,,,,"[{'adult': False, 'gender': 0, 'id': 1022808, ...","[{'adult': False, 'gender': 2, 'id': 99005, 'k..."
4,False,,,0,"[{'id': 10402, 'name': 'Music'}]",,100038,,en,Meshuggah - Nothing,...,Meshuggah - Nothing,False,4.0,2,,,,,[],[]


## Demographic Filtering


Steps:

1. We require a metric to score or rate a movie.
2. Calculate the score for every movie.
3. Sort the scores and recommend the best rated movie to the users.


The average rating would be an obvious choice but this is not a fair score to use since a movie with a 9.3 average rating with 3 voters should not be considered better than a a movie with 8.8 average rating with 40 voters. I will use IMDB's weighted rating which is given as:


Weighted Rating (WR) = $(\frac{v}{v + m} \cdot R) + (\frac{m}{v + m} \cdot C)$ where,

- v is the number of votes for the movie,
- m is the minimum votes required to be listen the chart,
- R is the average rating of the movie, and
- C is the mean vote across the whole report


In [7]:
df_movies_info.head()


Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,title,video,vote_average,vote_count,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path,cast,crew
0,False,/cXQH2u7wUIX1eoIdEj51kHXoWhX.jpg,,1350000,"[{'id': 35, 'name': 'Comedy'}, {'id': 80, 'nam...",http://www.universalstudiosentertainment.com/l...,100,tt0120735,en,"Lock, Stock and Two Smoking Barrels",...,"Lock, Stock and Two Smoking Barrels",False,8.1,5798,,,,,"[{'adult': False, 'gender': 2, 'id': 973, 'kno...","[{'adult': False, 'gender': 2, 'id': 960, 'kno..."
1,False,/cbTvuGya7E1PnL8t95AWzumjqHg.jpg,,0,"[{'id': 18, 'name': 'Drama'}]",,100017,tt0488903,de,Verfolgt,...,Punish Me,False,4.6,16,,,,,"[{'adult': False, 'gender': 2, 'id': 5202, 'kn...","[{'adult': False, 'gender': 1, 'id': 2338, 'kn..."
2,False,,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",,100032,tt0099137,en,The Great Los Angeles Earthquake,...,The Great Los Angeles Earthquake,False,6.9,13,,,,,"[{'adult': False, 'gender': 1, 'id': 87038, 'k...","[{'adult': False, 'gender': 2, 'id': 36116, 'k..."
3,False,,,0,"[{'id': 27, 'name': 'Horror'}, {'id': 35, 'nam...",,100034,tt0462634,en,The Worst Horror Movie Ever Made,...,The Worst Horror Movie Ever Made,False,3.0,8,,,,,"[{'adult': False, 'gender': 0, 'id': 1022808, ...","[{'adult': False, 'gender': 2, 'id': 99005, 'k..."
4,False,,,0,"[{'id': 10402, 'name': 'Music'}]",,100038,,en,Meshuggah - Nothing,...,Meshuggah - Nothing,False,4.0,2,,,,,[],[]


In [8]:
# determine m
m = df_movies_info['vote_count'].quantile(0.9)
m


75.0

In [9]:
# calculate C
C = df_movies_info['vote_average'].mean()
C


4.7209208757213394

In [10]:
# filter dataset to exclude movies with less than m votes
df_movies_info_filtered = df_movies_info[df_movies_info['vote_count'] >= m]


In [11]:
len(df_movies_info_filtered) / len(df_movies_info)


0.10054137664346481

In [12]:
# function to calculate weighted rating
def calculate_weigted_rating(x, m=m, C=C):
    R = x['vote_average']
    v = x['vote_count']
    result = ((v/(v+m)*R)) + ((m/(v+m))*C)
    return result


In [13]:
# create new column for weighted rating
df_movies_info_filtered['weighted_rating'] = df_movies_info_filtered.apply(
    calculate_weigted_rating, axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies_info_filtered['weighted_rating'] = df_movies_info_filtered.apply(


In [14]:
# manuall check some values to ensure function worked correctly
df_movies_info_filtered[['vote_average',
                         'vote_count', 'weighted_rating']].head()


Unnamed: 0,vote_average,vote_count,weighted_rating
0,8.1,5798,8.056848
6,5.538,2795,5.516648
8,6.6,124,5.891804
28,5.1,90,4.927691
38,6.6,123,5.888228


In [15]:
# look at top 25 movies
df_movies_info_filtered[['title', 'vote_average',
                         'vote_count', 'weighted_rating']].head(25)


Unnamed: 0,title,vote_average,vote_count,weighted_rating
0,"Lock, Stock and Two Smoking Barrels",8.1,5798,8.056848
6,Dumb and Dumber To,5.538,2795,5.516648
8,The Giant Mechanical Man,6.6,124,5.891804
28,Outpost: Black Sun,5.1,90,4.927691
38,Gertie the Dinosaur,6.6,123,5.888228
83,Lola Versus,4.9,116,4.829681
101,Léon: The Professional,8.319,12924,8.29824
114,The Butterfly Room,6.0,108,5.475787
132,The Hunger Games: Catching Fire,7.422,15559,7.409042
182,Mulholland Drive,7.8,5276,7.756843
