### Importing dependencies

In [1]:
import pandas as pd
import numpy as np
import ast
import nltk
from nltk.corpus import stopwords
import difflib
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
pd.options.mode.chained_assignment = None

In [3]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

### Exploring the dataset

In [4]:
movies_df = pd.read_csv('datasets/tmdb_5000_movies.csv')
credits_df = pd.read_csv("datasets/tmdb_5000_credits.csv")

In [5]:
movies_df.shape

(4803, 20)

In [6]:
movies_df.head()

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124


In [7]:
movies_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4803 non-null   int64  
 1   genres                4803 non-null   object 
 2   homepage              1712 non-null   object 
 3   id                    4803 non-null   int64  
 4   keywords              4803 non-null   object 
 5   original_language     4803 non-null   object 
 6   original_title        4803 non-null   object 
 7   overview              4800 non-null   object 
 8   popularity            4803 non-null   float64
 9   production_companies  4803 non-null   object 
 10  production_countries  4803 non-null   object 
 11  release_date          4802 non-null   object 
 12  revenue               4803 non-null   int64  
 13  runtime               4801 non-null   float64
 14  spoken_languages      4803 non-null   object 
 15  status               

### Preprocessing the data

In [8]:
credits_df.head()

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [9]:
credits_df.rename(columns={'movie_id':'id'}, inplace=True)

In [10]:
credits_df.drop(columns='title', inplace=True)

In [11]:
movies = movies_df.merge(credits_df, on='id')

In [12]:
movies.head(3)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."


In [13]:
movies.iloc[96].crew

'[{"credit_id": "56e8462cc3a368408400354c", "department": "Sound", "gender": 2, "id": 947, "job": "Original Music Composer", "name": "Hans Zimmer"}, {"credit_id": "52fe4534c3a368484e04de55", "department": "Writing", "gender": 2, "id": 525, "job": "Screenplay", "name": "Christopher Nolan"}, {"credit_id": "52fe4534c3a368484e04de4b", "department": "Directing", "gender": 2, "id": 525, "job": "Director", "name": "Christopher Nolan"}, {"credit_id": "52fe4534c3a368484e04de2d", "department": "Production", "gender": 2, "id": 525, "job": "Producer", "name": "Christopher Nolan"}, {"credit_id": "52fe4534c3a368484e04de33", "department": "Production", "gender": 1, "id": 556, "job": "Producer", "name": "Emma Thomas"}, {"credit_id": "52fe4534c3a368484e04de3f", "department": "Camera", "gender": 2, "id": 559, "job": "Director of Photography", "name": "Wally Pfister"}, {"credit_id": "52fe4534c3a368484e04de6b", "department": "Production", "gender": 0, "id": 561, "job": "Casting", "name": "John Papsidera"}

In [14]:
movies['title'].nunique()

4800

In [15]:
movies.isnull().sum()

budget                     0
genres                     0
homepage                3091
id                         0
keywords                   0
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
cast                       0
crew                       0
dtype: int64

In [16]:
movies.fillna('', inplace=True)

### Preprocessing Functions

In [17]:
def change_duplicate_titles():
    title_list = movies['title']
    new_title_list = title_list.copy()
    rel_date_list = movies['release_date']
    unique = set()
    duplicate_titles = [title for title in movies['title'] if title in unique or unique.add(title)]
    for i in range(0, len(movies['title'])):
        rel_date = rel_date_list[i].split('-', 1)
        if movies['title'][i] in duplicate_titles:
            new_title_list[i] = movies['title'][i] + " " + "(" + rel_date[0] + ")"
    return new_title_list

In [18]:
def getWord(dict_list):
    name_list = []
    for item in ast.literal_eval(dict_list):
        name_list.append(item['name'])
    return name_list

In [19]:
def getCast(dict_list):
    cast_list = []
    count = 0
    for item in ast.literal_eval(dict_list):
        if count != 10:
            cast_list.append(item['name'])
            count += 1
    return cast_list

In [20]:
def getCrew(dict_list, job):
    crew_list = []
    for item in ast.literal_eval(dict_list):
        if item['job']==job:
            crew_list.append(item['name'])
    return crew_list

In [21]:
def removeStopwords(sentences):
    stopwords_english = set(stopwords.words('english'))
    new_sentences = [sentence for sentence in sentences if sentence not in stopwords_english]
    return new_sentences

In [22]:
movies['genres'] = movies['genres'].apply(getWord)
movies['keywords'] = movies['keywords'].apply(getWord)
movies['production_companies'] = movies['production_companies'].apply(getWord)
movies['spoken_languages'] = movies['spoken_languages'].apply(getWord)

In [23]:
movies

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew
0,237000000,"[Action, Adventure, Fantasy, Science Fiction]",http://www.avatarmovie.com/,19995,"[culture clash, future, space war, space colon...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[Ingenious Film Partners, Twentieth Century Fo...",...,2787965087,162.0,"[English, Español]",Released,Enter the World of Pandora.,Avatar,7.2,11800,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[Adventure, Fantasy, Action]",http://disney.go.com/disneypictures/pirates/,285,"[ocean, drug abuse, exotic island, east india ...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[Walt Disney Pictures, Jerry Bruckheimer Films...",...,961000000,169.0,[English],Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[Action, Adventure, Crime]",http://www.sonypictures.com/movies/spectre/,206647,"[spy, based on novel, secret agent, sequel, mi...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[Columbia Pictures, Danjaq, B24]",...,880674609,148.0,"[Français, English, Español, Italiano, Deutsch]",Released,A Plan No One Escapes,Spectre,6.3,4466,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,250000000,"[Action, Crime, Drama, Thriller]",http://www.thedarkknightrises.com/,49026,"[dc comics, crime fighter, terrorist, secret i...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.312950,"[Legendary Pictures, Warner Bros., DC Entertai...",...,1084939099,165.0,[English],Released,The Legend Ends,The Dark Knight Rises,7.6,9106,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,260000000,"[Action, Adventure, Science Fiction]",http://movies.disney.com/john-carter,49529,"[based on novel, mars, medallion, space travel...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,[Walt Disney Pictures],...,284139100,132.0,[English],Released,"Lost in our world, found in another.",John Carter,6.1,2124,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4798,220000,"[Action, Crime, Thriller]",,9367,"[united states–mexico barrier, legs, arms, pap...",es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,[Columbia Pictures],...,2040920,81.0,[Español],Released,"He didn't come looking for trouble, but troubl...",El Mariachi,6.6,238,"[{""cast_id"": 1, ""character"": ""El Mariachi"", ""c...","[{""credit_id"": ""52fe44eec3a36847f80b280b"", ""de..."
4799,9000,"[Comedy, Romance]",,72766,[],en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,[],...,0,85.0,[],Released,A newlywed couple's honeymoon is upended by th...,Newlyweds,5.9,5,"[{""cast_id"": 1, ""character"": ""Buzzy"", ""credit_...","[{""credit_id"": ""52fe487dc3a368484e0fb013"", ""de..."
4800,0,"[Comedy, Drama, Romance, TV Movie]",http://www.hallmarkchannel.com/signedsealeddel...,231617,"[date, love at first sight, narration, investi...",en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,"[Front Street Pictures, Muse Entertainment Ent...",...,0,120.0,[English],Released,,"Signed, Sealed, Delivered",7.0,6,"[{""cast_id"": 8, ""character"": ""Oliver O\u2019To...","[{""credit_id"": ""52fe4df3c3a36847f8275ecf"", ""de..."
4801,0,[],http://shanghaicalling.com/,126186,[],en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,[],...,0,98.0,[English],Released,A New Yorker in Shanghai,Shanghai Calling,5.7,7,"[{""cast_id"": 3, ""character"": ""Sam"", ""credit_id...","[{""credit_id"": ""52fe4ad9c3a368484e16a36b"", ""de..."


In [24]:
movies['cast'] = movies['cast'].apply(getCast)

In [25]:
movies['director'] = movies['crew'].apply(getCrew, job='Director')
movies['DOP'] = movies['crew'].apply(getCrew, job='Director of Photography')
movies['producer'] = movies['crew'].apply(getCrew, job='Producer')
movies['editor'] = movies['crew'].apply(getCrew, job='Editor')
movies['screenplay'] = movies['crew'].apply(getCrew, job='Screenplay')
movies['original_music_composer'] = movies['crew'].apply(getCrew, job='Original Music Composer')

In [26]:
movies

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,vote_average,vote_count,cast,crew,director,DOP,producer,editor,screenplay,original_music_composer
0,237000000,"[Action, Adventure, Fantasy, Science Fiction]",http://www.avatarmovie.com/,19995,"[culture clash, future, space war, space colon...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[Ingenious Film Partners, Twentieth Century Fo...",...,7.2,11800,"[Sam Worthington, Zoe Saldana, Sigourney Weave...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",[James Cameron],"[Mauro Fiore, Chiling Lin]","[James Cameron, Jon Landau]","[Stephen E. Rivkin, James Cameron, John Refoua]",[James Cameron],[James Horner]
1,300000000,"[Adventure, Fantasy, Action]",http://disney.go.com/disneypictures/pirates/,285,"[ocean, drug abuse, exotic island, east india ...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[Walt Disney Pictures, Jerry Bruckheimer Films...",...,6.9,4500,"[Johnny Depp, Orlando Bloom, Keira Knightley, ...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",[Gore Verbinski],[Dariusz Wolski],"[Jerry Bruckheimer, Eric McLeod, Chad Oman, Pe...","[Stephen E. Rivkin, Craig Wood]","[Ted Elliott, Terry Rossio]",[Hans Zimmer]
2,245000000,"[Action, Adventure, Crime]",http://www.sonypictures.com/movies/spectre/,206647,"[spy, based on novel, secret agent, sequel, mi...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[Columbia Pictures, Danjaq, B24]",...,6.3,4466,"[Daniel Craig, Christoph Waltz, Léa Seydoux, R...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",[Sam Mendes],[Hoyte van Hoytema],"[Barbara Broccoli, Michael G. Wilson]",[Lee Smith],"[John Logan, Robert Wade, Neal Purvis, Jez But...",[Thomas Newman]
3,250000000,"[Action, Crime, Drama, Thriller]",http://www.thedarkknightrises.com/,49026,"[dc comics, crime fighter, terrorist, secret i...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.312950,"[Legendary Pictures, Warner Bros., DC Entertai...",...,7.6,9106,"[Christian Bale, Michael Caine, Gary Oldman, A...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...",[Christopher Nolan],[Wally Pfister],"[Charles Roven, Christopher Nolan, Emma Thomas]",[Lee Smith],"[Christopher Nolan, Jonathan Nolan]",[Hans Zimmer]
4,260000000,"[Action, Adventure, Science Fiction]",http://movies.disney.com/john-carter,49529,"[based on novel, mars, medallion, space travel...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,[Walt Disney Pictures],...,6.1,2124,"[Taylor Kitsch, Lynn Collins, Samantha Morton,...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de...",[Andrew Stanton],[Daniel Mindel],"[Colin Wilson, Jim Morris, Lindsey Collins]",[Eric Zumbrunnen],"[Andrew Stanton, Michael Chabon, Mark Andrews]",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4798,220000,"[Action, Crime, Thriller]",,9367,"[united states–mexico barrier, legs, arms, pap...",es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,[Columbia Pictures],...,6.6,238,"[Carlos Gallardo, Jaime de Hoyos, Peter Marqua...","[{""credit_id"": ""52fe44eec3a36847f80b280b"", ""de...",[Robert Rodriguez],[Robert Rodriguez],"[Robert Rodriguez, Carlos Gallardo]",[Robert Rodriguez],[],[]
4799,9000,"[Comedy, Romance]",,72766,[],en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,[],...,5.9,5,"[Edward Burns, Kerry Bishé, Marsha Dietlein, C...","[{""credit_id"": ""52fe487dc3a368484e0fb013"", ""de...",[Edward Burns],[],"[Edward Burns, William Rexer, Aaron Lubin]",[Janet Gaynor],[],[]
4800,0,"[Comedy, Drama, Romance, TV Movie]",http://www.hallmarkchannel.com/signedsealeddel...,231617,"[date, love at first sight, narration, investi...",en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,"[Front Street Pictures, Muse Entertainment Ent...",...,7.0,6,"[Eric Mabius, Kristin Booth, Crystal Lowe, Geo...","[{""credit_id"": ""52fe4df3c3a36847f8275ecf"", ""de...",[Scott Smith],[Adam Sliwinski],[Harvey Kahn],[Lisa Binkley],[],[Hal Beckett]
4801,0,[],http://shanghaicalling.com/,126186,[],en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,[],...,5.7,7,"[Daniel Henney, Eliza Coupe, Bill Paxton, Alan...","[{""credit_id"": ""52fe4ad9c3a368484e16a36b"", ""de...",[Daniel Hsia],[],[],[],[],[]


In [27]:
movies['title'] = change_duplicate_titles()

In [28]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4803 entries, 0 to 4802
Data columns (total 28 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   budget                   4803 non-null   int64  
 1   genres                   4803 non-null   object 
 2   homepage                 4803 non-null   object 
 3   id                       4803 non-null   int64  
 4   keywords                 4803 non-null   object 
 5   original_language        4803 non-null   object 
 6   original_title           4803 non-null   object 
 7   overview                 4803 non-null   object 
 8   popularity               4803 non-null   float64
 9   production_companies     4803 non-null   object 
 10  production_countries     4803 non-null   object 
 11  release_date             4803 non-null   object 
 12  revenue                  4803 non-null   int64  
 13  runtime                  4803 non-null   object 
 14  spoken_languages        

In [29]:
movies.isnull().sum()

budget                     0
genres                     0
homepage                   0
id                         0
keywords                   0
original_language          0
original_title             0
overview                   0
popularity                 0
production_companies       0
production_countries       0
release_date               0
revenue                    0
runtime                    0
spoken_languages           0
status                     0
tagline                    0
title                      0
vote_average               0
vote_count                 0
cast                       0
crew                       0
director                   0
DOP                        0
producer                   0
editor                     0
screenplay                 0
original_music_composer    0
dtype: int64

In [30]:
export_movies = movies[['id', 'title', 'overview', 'genres', 'keywords', 'tagline', 'release_date', 'runtime', 'budget', 'revenue', 'spoken_languages', 'vote_count', 'vote_average', 'cast', 'director', 'producer', 'screenplay', 'DOP', 'editor', 'original_music_composer', 'production_companies']]

In [31]:
movies['overview'] = movies['overview'].apply(lambda x:x.lower())
movies['tagline'] = movies['tagline'].apply(lambda x:x.lower())

In [32]:
movies['overview'] = movies['overview'].apply(lambda x:str(x))
movies['overview'] = movies['overview'].apply(lambda x:x.split())
movies['overview'] = movies['overview'].apply(removeStopwords)

In [33]:
movies['tagline'] = movies['tagline'].apply(lambda x:str(x))
movies['tagline'] = movies['tagline'].apply(lambda x:x.split())
movies['tagline'] = movies['tagline'].apply(removeStopwords)

In [34]:
movies.head(3)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,vote_average,vote_count,cast,crew,director,DOP,producer,editor,screenplay,original_music_composer
0,237000000,"[Action, Adventure, Fantasy, Science Fiction]",http://www.avatarmovie.com/,19995,"[culture clash, future, space war, space colon...",en,Avatar,"[22nd, century,, paraplegic, marine, dispatche...",150.437577,"[Ingenious Film Partners, Twentieth Century Fo...",...,7.2,11800,"[Sam Worthington, Zoe Saldana, Sigourney Weave...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",[James Cameron],"[Mauro Fiore, Chiling Lin]","[James Cameron, Jon Landau]","[Stephen E. Rivkin, James Cameron, John Refoua]",[James Cameron],[James Horner]
1,300000000,"[Adventure, Fantasy, Action]",http://disney.go.com/disneypictures/pirates/,285,"[ocean, drug abuse, exotic island, east india ...",en,Pirates of the Caribbean: At World's End,"[captain, barbossa,, long, believed, dead,, co...",139.082615,"[Walt Disney Pictures, Jerry Bruckheimer Films...",...,6.9,4500,"[Johnny Depp, Orlando Bloom, Keira Knightley, ...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",[Gore Verbinski],[Dariusz Wolski],"[Jerry Bruckheimer, Eric McLeod, Chad Oman, Pe...","[Stephen E. Rivkin, Craig Wood]","[Ted Elliott, Terry Rossio]",[Hans Zimmer]
2,245000000,"[Action, Adventure, Crime]",http://www.sonypictures.com/movies/spectre/,206647,"[spy, based on novel, secret agent, sequel, mi...",en,Spectre,"[cryptic, message, bond’s, past, sends, trail,...",107.376788,"[Columbia Pictures, Danjaq, B24]",...,6.3,4466,"[Daniel Craig, Christoph Waltz, Léa Seydoux, R...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",[Sam Mendes],[Hoyte van Hoytema],"[Barbara Broccoli, Michael G. Wilson]",[Lee Smith],"[John Logan, Robert Wade, Neal Purvis, Jez But...",[Thomas Newman]


In [35]:
movies_tags = movies[['id', 'title']]
movies_tags['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['tagline'] + movies['director'] + movies['cast'] + movies['producer'] + movies['production_companies'] + movies['screenplay'] + movies['DOP']

In [36]:
movies_tags['tags'] = movies_tags['tags'].apply(lambda x:" ".join(x))
movies_tags['tags'] = movies_tags['tags'].apply(lambda x:x.lower())

In [37]:
movies_tags.head(3)

Unnamed: 0,id,title,tags
0,19995,Avatar,"22nd century, paraplegic marine dispatched moo..."
1,285,Pirates of the Caribbean: At World's End,"captain barbossa, long believed dead, come bac..."
2,206647,Spectre,cryptic message bond’s past sends trail uncove...


### Building the model

In [38]:
vectorizer = CountVectorizer()

In [39]:
feature_vectors = vectorizer.fit_transform(movies_tags['tags'])

In [40]:
print(feature_vectors)

  (0, 251)	1
  (0, 6812)	2
  (0, 29036)	1
  (0, 24403)	2
  (0, 10915)	1
  (0, 26331)	1
  (0, 28935)	2
  (0, 40465)	1
  (0, 25994)	1
  (0, 3681)	1
  (0, 39407)	1
  (0, 14370)	1
  (0, 28403)	1
  (0, 30996)	1
  (0, 1275)	3
  (0, 7578)	1
  (0, 651)	1
  (0, 828)	1
  (0, 13482)	1
  (0, 34438)	1
  (0, 13869)	1
  (0, 9306)	1
  (0, 7623)	1
  (0, 14955)	1
  (0, 36453)	4
  :	:
  (4802, 30826)	1
  (4802, 16328)	1
  (4802, 23523)	1
  (4802, 8676)	1
  (4802, 20003)	1
  (4802, 5467)	1
  (4802, 3403)	3
  (4802, 22467)	1
  (4802, 12213)	1
  (4802, 13989)	1
  (4802, 14881)	1
  (4802, 2482)	1
  (4802, 9903)	1
  (4802, 34126)	1
  (4802, 13706)	1
  (4802, 2461)	1
  (4802, 11084)	1
  (4802, 11710)	1
  (4802, 262)	1
  (4802, 33574)	1
  (4802, 16105)	1
  (4802, 6104)	1
  (4802, 9177)	1
  (4802, 42370)	1
  (4802, 17649)	4


### Calculating the similarity score

In [41]:
similarity = cosine_similarity(feature_vectors)

In [42]:
print(similarity)

[[1.         0.04913743 0.02569458 ... 0.04374535 0.02946562 0.02317587]
 [0.04913743 1.         0.05976143 ... 0.05087231 0.02284409 0.04491944]
 [0.02569458 0.05976143 1.         ... 0.03192212 0.04778185 0.00939558]
 ...
 [0.04374535 0.05087231 0.03192212 ... 1.         0.04880953 0.04798827]
 [0.02946562 0.02284409 0.04778185 ... 0.04880953 1.         0.05387255]
 [0.02317587 0.04491944 0.00939558 ... 0.04798827 0.05387255 1.        ]]


### Making recommendations

In [43]:
def recommend(movie_title):
    movie_list = export_movies['title'].tolist()
    movie_match = difflib.get_close_matches(movie_title, movie_list)
    closest_match = movie_match[0]
    movie_index = export_movies[export_movies['title'] == closest_match].index[0]
    distances = similarity[movie_index]
    recommended_id = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:11]
    recommended_title = []
    recommended_plot = []
    recommended_release_date = []
    recommended_genres = []
    recommended_director = []
    recommended_cast = []
    recommended_rating = []
    for i in recommended_id:
        movie_id = export_movies.iloc[i[0]].id
        recommended_title.append(export_movies.iloc[i[0]].title)
        recommended_rating.append(export_movies.iloc[i[0]].vote_average)
        recommended_plot.append(export_movies.iloc[i[0]].overview)
        recommended_release_date.append(export_movies.iloc[i[0]].release_date)
        recommended_genres.append(export_movies.iloc[i[0]].genres)
        recommended_director.append(export_movies.iloc[i[0]].director)
        recommended_cast.append(export_movies.iloc[i[0]].cast)

    return [recommended_title, recommended_rating, recommended_plot, recommended_release_date, recommended_genres, recommended_director, recommended_cast]

In [44]:
try:
    search_movie = input("Search for movies similar to >> ")
    recommendations_details = []
    recommendations_details = recommend(search_movie)
    print(recommendations_details[0])
except IndexError:
    print("No results found")

Search for movies similar to >> Interstellar
['Moonraker', 'Armageddon', 'Silent Running', 'Planet of the Apes', 'Zathura: A Space Adventure', 'Event Horizon', 'Lost in Space', 'Inception', 'Mission to Mars', 'Star Trek']


### Saving model

In [None]:
# vote_count_df.to_pickle("model/vote_count_df.pkl")
# movies.to_pickle("model/movies.pkl")

In [None]:
# pickle.dump(similarity, open("model/similarity.pkl", "wb"))