# Content Based Movie Recommender

### Import libraries

In [3]:
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import WhitespaceTokenizer
import pickle
import math

nltk.download('wordnet')
nltk.download("stopwords")
nltk.download("omw-1.4")

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\tanka\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\tanka\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\tanka\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

### Read in dataset

In [1]:
metadata_path = r"./movie_lens_dataset/movies_metadata.csv"

In [4]:
df = pd.read_csv(metadata_path, low_memory=False)
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


# Preprocessing

In [5]:
print("Number of movies:", len(df))

Number of movies: 45466


In [6]:
print("Number of null overviews:", df['overview'].isnull().sum())

Number of null overviews: 954


### Removing rows with null or empty overviews

In [7]:
df = df[df['overview'].notna()]
df = df[df['overview'] != ""]
print("Number of non-null overviews", len(df))

Number of non-null overviews 44512


### Removing punctuations

In [8]:
df['overview'] = df['overview'].str.replace(r'[^\w\s]+', '')
df.head()

  df['overview'] = df['overview'].str.replace(r'[^\w\s]+', '')


Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,Led by Woody Andys toys live happily in his ro...,...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,Cheated on mistreated and stepped on the women...,...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


### Tokenizing words
Don't run this if using sklearn lib

In [73]:
# w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
# df['overview'] = df['overview'].apply(lambda x: w_tokenizer.tokenize(x))
# df.head()

### Making all words lower case

In [9]:
# df['overview'] = df['overview'].apply(lambda x: [word.lower() for word in x])
df['overview'] = df['overview'].apply(lambda x: " ".join([word.lower() for word in x.split()]))
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,led by woody andys toys live happily in his ro...,...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,when siblings judy and peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,a family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,cheated on mistreated and stepped on the women...,...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,just when george banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


### Removing stopwords from overview

In [10]:
stop = stopwords.words ('english')
df['overview'] = df['overview'].apply(lambda x: " ".join([word for word in x.split() if word not in (stop)]))
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,led woody andys toys live happily room andys b...,...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,siblings judy peter discover enchanted board g...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,family wedding reignites ancient feud nextdoor...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,cheated mistreated stepped women holding breat...,...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,george banks recovered daughters wedding recei...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


### Lemmatization


In [11]:
lemmatizer = nltk.stem.WordNetLemmatizer()

def lemmatize_text(text):
    return " ".join([lemmatizer.lemmatize(w) for w in text.split()])

df['overview'] = df['overview'].apply(lemmatize_text)

df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,led woody andys toy live happily room andys bi...,...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,sibling judy peter discover enchanted board ga...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,family wedding reignites ancient feud nextdoor...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,cheated mistreated stepped woman holding breat...,...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,george bank recovered daughter wedding receive...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [12]:
df = df[df['overview'].notna()]
print("Number of non-null overviews", len(df))

Number of non-null overviews 44512


In [13]:
print("Number of null overviews:", df['overview'].isnull().sum())

Number of null overviews: 0


Saving the dataframe to a preprocessed csv

In [14]:
df.to_csv('./movie_lens_dataset/movies_metadata_processed.csv')
print("Processed csv saved!")

Processed csv saved!


# Class to create TF IDF matrix

In [15]:
from tqdm import tqdm

class TFIDF:
    def __init__(self, data: pd.DataFrame) -> None:
        self.total_docs = len(data)
        self.word_set = set()
        self.index_dict = {} # Dictionary to store index for each word
        self.word_count = {} # Dictionary to store the count of the number of documents containing the given word
        self.processing_data(data)
        
    def processing_data(self, data) -> None:
        
        # Creating the word_set
        for _, movie_row in data.iterrows():
            overview = movie_row["overview"]
            for word in overview:
                if word not in self.word_set:
                    self.word_set.add(word)
                    
        # Creating the index_dict
        i = 0
        for word in self.word_set:
            self.index_dict[word] = i
            i += 1
        
        # Creating the word_count
        for word in self.word_set:
            self.word_count[word] = 0
            for _, movie_row in data.iterrows():
                overview = overview = movie_row["overview"]
                if word in overview:
                    self.word_count[word] += 1
    
    def term_frequency(self, word: str, overview: str) -> float:
        return document.count(word) / len(overview)

    def inverse_document_frequency(self, word: str) -> float:
        return 1.0 + math.log(self.total_docs / word_count[word])

    def tf_idf(self, overview: str) -> np.array:
        tf_idf_vec = np.zeros((len(word_set),))
        for word in overview:
            # Add the tf_idf value to the 
            tf_idf_vec[self.index_dict[word]] = self.term_frequency(word, overview) * self.inverse_document_frequency(word)
        return tf_idf_vec

    def generate_tf_idf_matrix(self) -> list:
        tf_idf_matrix = []
                                                                                                                      
        for _, movie_row in tqdm(data.iterrows()):
            vec = self.tf_idf(movie_row['overview'])
            tf_idf_matrix.append(vec)
                                                                                                                    
        return tf_idf_matrix

# Generating TFIDF from dataset and serializing using pickle
Serializing allows us to save time for re-generation of the index.

In [48]:
# tfidf = TFIDF(df)
# tfidf_matrix = tfidf.generate_tf_idf_matrix()
# # Takes too long to run

# tfidf_matrix[:5]

# with open('./pickle/tfidf_matrix.pickle', 'wb') as handle:
#     pickle.dump(tfidf_matrix, handle, protocol=pickle.HIGHEST_PROTOCOL)
#     print("Index saved!")

KeyboardInterrupt: 

Using sklearn library which runs way faster

In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['overview'])
tfidf_matrix = tfidf_matrix.astype(np.float32)

with open('./pickle/tfidf_matrix.pickle', 'wb') as handle:
    pickle.dump(tfidf_matrix, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("Index saved!")

Index saved!


# Creating cosine similarity model

The cosine similarity model is a (44512, 44512) matrix consisting of cosine similarity scores of all movies against each other.

In [35]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

cosine_sim.shape

(44512, 44512)

In [36]:
with open('./pickle/cosine_sim_.pickle', 'wb') as handle:
    pickle.dump(cosine_sim, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("Cosine Similarity model saved!")

Cosine Similarity model saved!


# Getting recomendations
Using test movie

In [20]:
cosine_sim_file = open('./pickle/cosine_sim_.pickle', 'rb')
cos_sim = pickle.load(cosine_sim_file)
print("Consine Similiarity Model loaded")

Consine Similiarity Model loaded


In [54]:
test_movie = "The Dark Knight"

Create movie title to index series and save it to pickle

In [40]:
indices = pd.Series(df.index, index=df['title'])
with open('./pickle/movie_indices.pickle', 'wb') as handle:
    pickle.dump(indices, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("Indices saved!")

Indices saved!


Get the movie index

In [60]:
movie_index = indices[test_movie]
if len(movie_index) > 1:
    movie_index = indices[test_movie][0]

Getting top 10 scores of the movie against all other movies

In [61]:
similarity_scores = pd.DataFrame(cos_sim[movie_index], columns=['score'])

Obtaining the top 10 indices sorted by descending similarity scores

In [62]:
movie_indices = similarity_scores.sort_values(by="score", ascending=False)[:10].index

Getting the list of movie titles

In [63]:
df[['title','imdb_id']].iloc[movie_indices]

Unnamed: 0,title,imdb_id
12515,Leatherheads,tt0379865
566,Foreign Student,tt0109828
4375,Everybody's All-American,tt0095119
25113,Devil's Doorway,tt0042395
35664,See You in Montevideo,tt1801071
5092,All the Right Moves,tt0085154
11209,Invincible,tt0445990
36132,Saturday's Hero,tt0043994
7632,Twice Upon a Time,tt0086489
4388,Johnny Be Good,tt0095409


In [37]:
import pickle
import pandas as pd
def getTfIdfRecommendations(name: str):
    print("Getting recommendations for:", name)
    
    # Load processed dataset
    df = pd.read_csv('./movie_lens_dataset/movies_metadata_processed.csv')
    
    # Load cosine similarity matrix (Might want to load this in server init)
    consine_sim_file = open('./pickle/cosine_sim_.pickle','rb')
    cosine_sim = pickle.load(consine_sim_file)
    
    # Load movie indices
    movie_indices_file = open('./pickle/movie_indices.pickle','rb')
    movie_indices = pickle.load(movie_indices_file)
    
    # Get index of query movie
    movie_index = movie_indices[name]
    
    similarity_scores = pd.DataFrame(cosine_sim[movie_index], columns=['score'])
    
    # Top 10
    top_indices = similarity_scores.sort_values(by="score", ascending=False)[:10].index
    
    # Get movie title and imdb_id
    output = df[['title','imdb_id']].iloc[top_indices]
    
    return output.to_json()

In [38]:
getTfIdfRecommendations('The Dark Knight Rises')

Getting recommendations for: The Dark Knight Rises


'{"title":{"18252":"Elena","30763":"Deadly Daycare","19164":"The One Percent","33909":"Hum Saath Saath Hain","43989":"Once More","44165":"Nicostratos the Pelican","25524":"Drishyam","32542":"White Cannibal Queen","33825":"Maine Pyar Kiya","4113":"The Luzhin Defence"},"imdb_id":{"18252":"tt1925421","30763":"tt3455826","19164":"tt0819791","33909":"tt0216817","43989":"tt0095097","44165":"tt1891942","25524":"tt3417422","32542":"tt0078936","33825":"tt0100095","4113":"tt0211492"}}'

# Knowledge Based Recommender System

In [21]:
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,led woody andys toy live happily room andys bi...,...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,sibling judy peter discover enchanted board ga...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,family wedding reignites ancient feud nextdoor...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,cheated mistreated stepped woman holding breat...,...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,george bank recovered daughter wedding receive...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


### Recommendations based on popularity
Let popular movies be movies with more than 5000 votes

In [37]:
popular_df = df[df["vote_count"] > 5000]
popular_df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,led woody andys toy live happily room andys bi...,...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
46,False,,33000000,"[{'id': 80, 'name': 'Crime'}, {'id': 9648, 'na...",http://www.sevenmovie.com/,807,tt0114369,en,Se7en,two homicide detective desperate hunt serial k...,...,1995-09-22,327311859.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Seven deadly sins. Seven ways to die.,Se7en,False,8.1,5915.0
256,False,"{'id': 10, 'name': 'Star Wars Collection', 'po...",11000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,11,tt0076759,en,Star Wars,princess leia captured held hostage evil imper...,...,1977-05-25,775398007.0,121.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"A long time ago in a galaxy far, far away...",Star Wars,False,8.1,6778.0
292,False,,8000000,"[{'id': 53, 'name': 'Thriller'}, {'id': 80, 'n...",,680,tt0110912,en,Pulp Fiction,burgerloving hit man philosophical partner dru...,...,1994-09-10,213928762.0,154.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Just because you are a character doesn't mean ...,Pulp Fiction,False,8.3,8670.0
314,False,,25000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",,278,tt0111161,en,The Shawshank Redemption,framed 1940s double murder wife lover upstandi...,...,1994-09-23,28341469.0,142.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Fear can hold you prisoner. Hope can set you f...,The Shawshank Redemption,False,8.5,8358.0


### Recommendations based on high ratings
Let a high rating be a rating > 8/10

In [43]:
high_rated_df = df[df["vote_average"] >= 8]
high_rated_df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
46,False,,33000000,"[{'id': 80, 'name': 'Crime'}, {'id': 9648, 'na...",http://www.sevenmovie.com/,807,tt0114369,en,Se7en,two homicide detective desperate hunt serial k...,...,1995-09-22,327311859.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Seven deadly sins. Seven ways to die.,Se7en,False,8.1,5915.0
49,False,,6000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",http://www.mgm.com/#/our-titles/2083/The-Usual...,629,tt0114814,en,The Usual Suspects,held la interrogation room verbal kint attempt...,...,1995-07-19,23341568.0,106.0,"[{'iso_639_1': 'es', 'name': 'Español'}, {'iso...",Released,Five Criminals. One Line Up. No Coincidence.,The Usual Suspects,False,8.1,3334.0
109,False,,1300000,"[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...",,103,tt0075314,en,Taxi Driver,mentally unstable vietnam war veteran work nig...,...,1976-02-07,28262574.0,114.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,"On every street in every city, there's a nobod...",Taxi Driver,False,8.1,2632.0
186,False,,0,"[{'id': 14, 'name': 'Fantasy'}, {'id': 35, 'na...",,58372,tt0114241,en,Reckless,christmas eve relentlessly cheerful woman esca...,...,1995-11-17,0.0,91.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The most twisted Christmas ever.,Reckless,False,10.0,1.0
256,False,"{'id': 10, 'name': 'Star Wars Collection', 'po...",11000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,11,tt0076759,en,Star Wars,princess leia captured held hostage evil imper...,...,1977-05-25,775398007.0,121.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"A long time ago in a galaxy far, far away...",Star Wars,False,8.1,6778.0


### Recommendations based on popularity and high ratings

In [45]:
knowledge_based_recommended_df = df[(df["vote_average"] >= 8) & (df["vote_count"] > 5000)]
knowledge_based_recommended_df

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
46,False,,33000000,"[{'id': 80, 'name': 'Crime'}, {'id': 9648, 'na...",http://www.sevenmovie.com/,807,tt0114369,en,Se7en,two homicide detective desperate hunt serial k...,...,1995-09-22,327311900.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Seven deadly sins. Seven ways to die.,Se7en,False,8.1,5915.0
256,False,"{'id': 10, 'name': 'Star Wars Collection', 'po...",11000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,11,tt0076759,en,Star Wars,princess leia captured held hostage evil imper...,...,1977-05-25,775398000.0,121.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"A long time ago in a galaxy far, far away...",Star Wars,False,8.1,6778.0
292,False,,8000000,"[{'id': 53, 'name': 'Thriller'}, {'id': 80, 'n...",,680,tt0110912,en,Pulp Fiction,burgerloving hit man philosophical partner dru...,...,1994-09-10,213928800.0,154.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Just because you are a character doesn't mean ...,Pulp Fiction,False,8.3,8670.0
314,False,,25000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",,278,tt0111161,en,The Shawshank Redemption,framed 1940s double murder wife lover upstandi...,...,1994-09-23,28341470.0,142.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Fear can hold you prisoner. Hope can set you f...,The Shawshank Redemption,False,8.5,8358.0
351,False,,55000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,13,tt0109830,en,Forrest Gump,man low iq accomplished great thing life prese...,...,1994-07-06,677945400.0,142.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"The world will never be the same, once you've ...",Forrest Gump,False,8.2,8147.0
359,False,"{'id': 94032, 'name': 'The Lion King Collectio...",45000000,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...",http://movies.disney.com/the-lion-king,8587,tt0110357,en,The Lion King,young lion cub named simba cant wait king uncl...,...,1994-06-23,788241800.0,89.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Life's greatest adventure is finding your plac...,The Lion King,False,8.0,5520.0
834,False,"{'id': 230, 'name': 'The Godfather Collection'...",6000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",http://www.thegodfather.com/,238,tt0068646,en,The Godfather,spanning year 1945 1955 chronicle fictional it...,...,1972-03-14,245066400.0,175.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,An offer you can't refuse.,The Godfather,False,8.5,6024.0
1154,False,"{'id': 10, 'name': 'Star Wars Collection', 'po...",18000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,1891,tt0080684,en,The Empire Strikes Back,epic saga continues luke skywalker hope defeat...,...,1980-05-17,538400000.0,124.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Adventure Continues...,The Empire Strikes Back,False,8.2,5998.0
1225,False,"{'id': 264, 'name': 'Back to the Future Collec...",19000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 35, '...",http://www.backtothefuture.com/movies/backtoth...,105,tt0088763,en,Back to the Future,eighty teenager marty mcfly accidentally sent ...,...,1985-07-03,381109800.0,116.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,He's the only kid ever to get into trouble bef...,Back to the Future,False,8.0,6239.0
2843,False,,63000000,"[{'id': 18, 'name': 'Drama'}]",http://www.foxmovies.com/movies/fight-club,550,tt0137523,en,Fight Club,tickingtimebomb insomniac slippery soap salesm...,...,1999-10-15,100853800.0,139.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Mischief. Mayhem. Soap.,Fight Club,False,8.3,9678.0


# Collaborative Filterring

In [46]:
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,led woody andys toy live happily room andys bi...,...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,sibling judy peter discover enchanted board ga...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,family wedding reignites ancient feud nextdoor...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,cheated mistreated stepped woman holding breat...,...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,george bank recovered daughter wedding receive...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0
