## Import libraries

In [2]:
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

import pandas as pd
import numpy as np

## Import datasets

In [3]:
movies = pd.read_csv('tmdb_5000_movies.csv')
movies.head()

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124


In [4]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4803 non-null   int64  
 1   genres                4803 non-null   object 
 2   homepage              1712 non-null   object 
 3   id                    4803 non-null   int64  
 4   keywords              4803 non-null   object 
 5   original_language     4803 non-null   object 
 6   original_title        4803 non-null   object 
 7   overview              4800 non-null   object 
 8   popularity            4803 non-null   float64
 9   production_companies  4803 non-null   object 
 10  production_countries  4803 non-null   object 
 11  release_date          4802 non-null   object 
 12  revenue               4803 non-null   int64  
 13  runtime               4801 non-null   float64
 14  spoken_languages      4803 non-null   object 
 15  status               

In [5]:
movies.isnull().sum()

budget                     0
genres                     0
homepage                3091
id                         0
keywords                   0
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
dtype: int64

In [6]:
movies = movies[['title','genres','runtime','vote_average','vote_count','overview','id']]
movies.head()

Unnamed: 0,title,genres,runtime,vote_average,vote_count,overview,id
0,Avatar,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",162.0,7.2,11800,"In the 22nd century, a paraplegic Marine is di...",19995
1,Pirates of the Caribbean: At World's End,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",169.0,6.9,4500,"Captain Barbossa, long believed to be dead, ha...",285
2,Spectre,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",148.0,6.3,4466,A cryptic message from Bond’s past sends him o...,206647
3,The Dark Knight Rises,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",165.0,7.6,9106,Following the death of District Attorney Harve...,49026
4,John Carter,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",132.0,6.1,2124,"John Carter is a war-weary, former military ca...",49529


In [7]:
import ast

def convert(obj):
    L = []
    for i in ast.literal_eval(obj):
        L.append(i['name'])
    return L

movies['genres'] = movies['genres'].apply(convert)

movies.head()

Unnamed: 0,title,genres,runtime,vote_average,vote_count,overview,id
0,Avatar,"[Action, Adventure, Fantasy, Science Fiction]",162.0,7.2,11800,"In the 22nd century, a paraplegic Marine is di...",19995
1,Pirates of the Caribbean: At World's End,"[Adventure, Fantasy, Action]",169.0,6.9,4500,"Captain Barbossa, long believed to be dead, ha...",285
2,Spectre,"[Action, Adventure, Crime]",148.0,6.3,4466,A cryptic message from Bond’s past sends him o...,206647
3,The Dark Knight Rises,"[Action, Crime, Drama, Thriller]",165.0,7.6,9106,Following the death of District Attorney Harve...,49026
4,John Carter,"[Action, Adventure, Science Fiction]",132.0,6.1,2124,"John Carter is a war-weary, former military ca...",49529


In [8]:
# Define a TF-IDF Vectorizer Object and remove all english stopwords

tfidf = TfidfVectorizer(stop_words = 'english')

In [9]:
# Replace NaN with an empty string

movies['overview'] = movies['overview'].fillna('')

In [11]:
# Construct the required TF-IDF matrix by applying the fit_transform method on the overview feature

tfidf_matrix = tfidf.fit_transform(movies['overview'])

In [13]:
# Compute the cosine similarity matrix

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

In [14]:
# Create a function to provide recommendations

def recommender(title, cosine_sim=cosine_sim, movies=movies, indices=indices):
    # obtain the index of the movie that matches the title
    idx = indices[title]
    # get the pairwise similarity scores of all movies with that movie and convert it into a list of tuples
    sim_scores = list(enumerate(cosine_sim[idx]))
    # sort the movies based on the cosine similarity scores
    sim_scores = sorted(sim_scores, key=lambda x:x[1], reverse=True)
    # get the scores of the 10 most similar movies and ignore the first movie
    sim_scores = sim_scores[1:11]
    # get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    # return the top 10 most similar movies
    return movies['title'].iloc[movie_indices]

In [15]:
recommender('The Shawshank Redemption')

4531               Civil Brand
3785                    Prison
609                Escape Plan
2868                  Fortress
4727              Penitentiary
1779    The 40 Year Old Virgin
2667          Fatal Attraction
3871         A Christmas Story
434           The Longest Yard
42                 Toy Story 3
Name: title, dtype: object

In [16]:
recommender('Spectre')

1343    Never Say Never Again
4071    From Russia with Love
3162              Thunderball
1717               Safe Haven
11          Quantum of Solace
4339                   Dr. No
29                    Skyfall
1880              Dance Flick
3336     Diamonds Are Forever
1743                Octopussy
Name: title, dtype: object

In [17]:
recommender('Avatar')

3604                       Apollo 18
2130                    The American
634                       The Matrix
1341            The Inhabited Island
529                 Tears of the Sun
1610                           Hanna
311     The Adventures of Pluto Nash
847                         Semi-Pro
775                        Supernova
2628             Blood and Chocolate
Name: title, dtype: object