In [1]:
import pandas as pd     #for data manipulation
import numpy as np      #for numerical calculation
from ast import literal_eval #for inspecting the datatype
from sklearn.feature_extraction.text import TfidfVectorizer #feature extraction 
from sklearn.metrics.pairwise import linear_kernel #to build cosine distance
import warnings      #ignoring warnings
warnings.filterwarnings('ignore')


## load dataset

In [2]:
movie= pd.read_csv(r'movies_metadata.csv')

In [3]:
movie.head() #printing first 5 rows

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


## preprocessing

In [4]:
#filling null values
movie['genres']=movie['genres'].fillna('[]')

In [8]:
#converting values into list
movie['genres']=movie['genres'].apply(literal_eval)


ValueError: malformed node or string: [{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]

In [9]:
#extracting only genres name
movie['genres']=movie['genres'].apply(lambda x: [i['name'] for i in x] if isinstance(x,list) else [])
movie['genres']

0         [Animation, Comedy, Family]
1        [Adventure, Fantasy, Family]
2                   [Romance, Comedy]
3            [Comedy, Drama, Romance]
4                            [Comedy]
                     ...             
45461                 [Drama, Family]
45462                         [Drama]
45463       [Action, Drama, Thriller]
45464                              []
45465                              []
Name: genres, Length: 45466, dtype: object

In [15]:
movie[movie['vote_count'].notnull()]['vote_count']

0        5415.0
1        2413.0
2          92.0
3          34.0
4         173.0
          ...  
45461       1.0
45462       3.0
45463       6.0
45464       0.0
45465       0.0
Name: vote_count, Length: 45460, dtype: float64

In [16]:
#changing the votecount into int type
votecount= movie[movie['vote_count'].notnull()]['vote_count'].astype(int)

In [17]:
votecount.head()

0    5415
1    2413
2      92
3      34
4     173
Name: vote_count, dtype: int32

In [18]:
#changing vote avg into int type

vote_average=movie[movie['vote_average'].notnull()]['vote_average'].astype(int)
vote_average.head()

0    7
1    6
2    6
3    6
4    5
Name: vote_average, dtype: int32

In [19]:
#copying the original dataset
top_movies=movie.copy()

In [20]:
#sorting on the basis of top 250 vote_average
top_movies1=top_movies.sort_values('vote_average',ascending=False).head(250)
top_movies1.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
21642,False,,0,[Documentary],,320849,tt0886500,en,Ice Age Columbus: Who Were the First Americans?,Firmly rooted in the latest scientific discove...,...,2005-01-01,0.0,0.0,[],Released,,Ice Age Columbus: Who Were the First Americans?,False,10.0,1.0
15710,False,,0,[Documentary],,96451,tt1587373,en,If God Is Willing and da Creek Don't Rise,"In 2006, director Spike Lee created an astonis...",...,2010-08-23,0.0,255.0,[],Released,,If God Is Willing and da Creek Don't Rise,False,10.0,1.0
22396,False,,0,[Documentary],,72123,tt1341746,en,Meat the Truth,Meat the Truth is a high-profile documentary w...,...,2008-10-03,0.0,74.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Meat the Truth,False,10.0,1.0
22395,False,,0,[Documentary],http://www.marvinhamlischmovie.com/,230864,tt3011874,en,Marvin Hamlisch: What He Did For Love,When Marvin Hamlisch passed away in August 201...,...,2013-10-12,0.0,82.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Marvin Hamlisch: What He Did For Love,False,10.0,1.0
35343,False,,300000,"[Comedy, Documentary, Music, TV Movie]",,140595,tt0308213,en,Elaine Stritch: At Liberty,Judy at the Palace. Sinatra at Carnegie Hall. ...,...,2002-01-01,0.0,140.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Legendary performances come along so rarely.,Elaine Stritch: At Liberty,False,10.0,1.0


### the above dataset has no minimum vote requirements

In [22]:
#min requirement vote_count>1000

top_movies2= top_movies[top_movies['vote_count']>1000]

In [23]:
#sorting values
top_movies2=top_movies2.sort_values('vote_average',ascending=False)

In [24]:
top_movies2.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
314,False,,25000000,"[Drama, Crime]",,278,tt0111161,en,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,...,1994-09-23,28341469.0,142.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Fear can hold you prisoner. Hope can set you f...,The Shawshank Redemption,False,8.5,8358.0
40251,False,,0,"[Romance, Animation, Drama]",https://www.funimationfilms.com/movie/yourname/,372058,tt5311514,ja,君の名は。,High schoolers Mitsuha and Taki are complete s...,...,2016-08-26,355298270.0,106.0,"[{'iso_639_1': 'ja', 'name': '日本語'}]",Released,,Your Name.,False,8.5,1030.0
834,False,"{'id': 230, 'name': 'The Godfather Collection'...",6000000,"[Drama, Crime]",http://www.thegodfather.com/,238,tt0068646,en,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",...,1972-03-14,245066411.0,175.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,An offer you can't refuse.,The Godfather,False,8.5,6024.0
1152,False,,3000000,[Drama],,510,tt0073486,en,One Flew Over the Cuckoo's Nest,While serving time for insanity at a state men...,...,1975-11-18,108981275.0,133.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"If he's crazy, what does that make you?",One Flew Over the Cuckoo's Nest,False,8.3,3001.0
1176,False,"{'id': 119674, 'name': 'Psycho Collection', 'p...",806948,"[Drama, Horror, Thriller]",,539,tt0054215,en,Psycho,When larcenous real estate clerk Marion Crane ...,...,1960-06-16,32000000.0,109.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The master of suspense moves his cameras into ...,Psycho,False,8.3,2405.0


In [25]:
# mean of the vote_average
C=vote_average.mean()
C


5.244896612406511

In [27]:
#taking 95 quantile of vote_count

m=votecount.quantile(0.95)
m

434.0

In [29]:
#extracting the yea column

top_movies['release_date'].head()

0    1995-10-30
1    1995-12-15
2    1995-12-22
3    1995-12-22
4    1995-02-10
Name: release_date, dtype: object

In [31]:
top_movies['year']=pd.to_datetime(top_movies['release_date'],errors='coerce').apply(lambda x: str(x).split('-')[0] if x!=np.nan else np.nan)

In [32]:
top_movies.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,1995
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,1995
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,1995


## Top Movies

In [33]:
# building new dataset
top_movies3=top_movies[(top_movies['vote_count']>=m) & (top_movies['vote_count'].notnull()) & (top_movies['vote_average'].notnull())][['title','year','vote_count','vote_average','popularity','genres']]

In [35]:
top_movies3.head()

Unnamed: 0,title,year,vote_count,vote_average,popularity,genres
0,Toy Story,1995,5415.0,7.7,21.9469,"[Animation, Comedy, Family]"
1,Jumanji,1995,2413.0,6.9,17.0155,"[Adventure, Fantasy, Family]"
5,Heat,1995,1886.0,7.7,17.9249,"[Action, Crime, Drama, Thriller]"
9,GoldenEye,1995,1194.0,6.6,14.686,"[Adventure, Action, Thriller]"
15,Casino,1995,1343.0,7.8,10.1374,"[Drama, Crime]"


In [36]:

#changing datatypes
top_movies3['vote_count']=top_movies3['vote_count'].astype('int')
top_movies3['vote_average']=top_movies3['vote_average'].astype('int')

In [37]:
top_movies3.shape

(2274, 6)

In [38]:
#defining function or weightrate

def weighrate(x):
    v=x['vote_count']
    R=x['vote_average']
    return (v/(v+m)*R)+(m/(m+v)*C)

In [40]:
#applying above function
top_movies3['weighted_rate']=top_movies3.apply(weighrate,axis=1)

In [41]:
#Printing the dataset
top_movies3.head()

Unnamed: 0,title,year,vote_count,vote_average,popularity,genres,weighted_rate
0,Toy Story,1995,5415,7,21.9469,"[Animation, Comedy, Family]",6.86977
1,Jumanji,1995,2413,6,17.0155,"[Adventure, Fantasy, Family]",5.884891
5,Heat,1995,1886,7,17.9249,"[Action, Crime, Drama, Thriller]",6.671675
9,GoldenEye,1995,1194,6,14.686,"[Adventure, Action, Thriller]",5.798701
15,Casino,1995,1343,7,10.1374,"[Drama, Crime]",6.571348


In [42]:
#sorting values
top_movies3=top_movies3.sort_values('weighted_rate',ascending=False)

#printing again
top_movies3.head()

Unnamed: 0,title,year,vote_count,vote_average,popularity,genres,weighted_rate
15480,Inception,2010,14075,8,29.1081,"[Action, Thriller, Science Fiction, Mystery, A...",7.917588
12481,The Dark Knight,2008,12269,8,123.167,"[Drama, Action, Crime, Thriller]",7.905871
22879,Interstellar,2014,11187,8,32.2135,"[Adventure, Drama, Science Fiction]",7.897107
2843,Fight Club,1999,9678,8,63.8696,[Drama],7.881753
4863,The Lord of the Rings: The Fellowship of the Ring,2001,8892,8,32.0707,"[Adventure, Fantasy, Action]",7.871787


In [45]:
#building level based on various genres

genre_tm=top_movies.apply(lambda x: pd.Series(x['genres']),axis=1).stack().reset_index(level=1,drop=True)
genre_tm.name='genre'

In [46]:
genre_top_movies=top_movies.drop('genres',axis=1).join(genre_tm)
genre_top_movies.head()

Unnamed: 0,adult,belongs_to_collection,budget,homepage,id,imdb_id,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,genre
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Animation
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Comedy
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Family
1,False,,65000000,,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.0155,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995,Adventure
1,False,,65000000,,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.0155,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995,Fantasy


In [47]:
# defining functions for various genre
def build_chart(genre,percentile=0.85):
    df=genre_top_movies[genre_top_movies['genre']==genre]
    vote_count=df[df['vote_count'].notnull()]['vote_count'].astype('int')
    vote_average=df[df['vote_average'].notnull()]['vote_average'].astype('int')
    C=vote_average.mean()
    m=vote_count.quantile(percentile)
    qualified=df[(df['vote_count']>=m) & (df['vote_count'].notnull()) & (df['vote_average'].notnull())][['title','year','vote_count','vote_average','popularity']]
    qualified['vote_count']=qualified['vote_count'].astype('int')
    qualified['vote_average']=qualified['vote_average'].astype('int')
    qualified['wr']=qualified.apply(weighrate,axis=1)
    qualified=qualified.sort_values('wr',ascending=False)
    return qualified

## This function will recommend top 10 movies based on genre

In [48]:
#top 10 romance movies
build_chart('Romance').head(10)

Unnamed: 0,title,year,vote_count,vote_average,popularity,wr
351,Forrest Gump,1994,8147,8,48.3072,7.860656
10309,Dilwale Dulhania Le Jayenge,1995,661,9,34.457,7.511676
876,Vertigo,1958,1162,8,18.2082,7.250805
40251,Your Name.,2016,1030,8,34.461252,7.183255
883,Some Like It Hot,1959,835,8,11.8451,7.05775
1132,Cinema Paradiso,1988,834,8,14.177,7.057007
19901,Paperman,2012,734,8,7.19863,6.976272
37863,Sing Street,2016,669,8,10.672862,6.915943
1639,Titanic,1997,7770,7,26.8891,6.907153
19731,Silver Linings Playbook,2012,4840,7,14.4881,6.855572


In [50]:
#top 10 action movies
build_chart('Action').head(10)

Unnamed: 0,title,year,vote_count,vote_average,popularity,wr
15480,Inception,2010,14075,8,29.1081,7.917588
12481,The Dark Knight,2008,12269,8,123.167,7.905871
4863,The Lord of the Rings: The Fellowship of the Ring,2001,8892,8,32.0707,7.871787
7000,The Lord of the Rings: The Return of the King,2003,8226,8,29.3244,7.861927
5814,The Lord of the Rings: The Two Towers,2002,7641,8,29.4235,7.851924
256,Star Wars,1977,6778,8,42.1497,7.834205
1154,The Empire Strikes Back,1980,5998,8,19.471,7.814099
4135,Scarface,1983,3017,8,11.2997,7.653516
9430,Oldboy,2003,2000,8,10.6169,7.508745
1910,Seven Samurai,1954,892,8,15.0178,7.098254


In [51]:
#top 10 comedy movies
build_chart('Comedy').head(10)

Unnamed: 0,title,year,vote_count,vote_average,popularity,wr
351,Forrest Gump,1994,8147,8,48.3072,7.860656
1225,Back to the Future,1985,6239,8,25.7785,7.820813
18465,The Intouchables,2011,5410,8,16.0869,7.795394
22841,The Grand Budapest Hotel,2014,4644,8,14.442,7.76453
2211,Life Is Beautiful,1997,3643,8,39.395,7.706717
10309,Dilwale Dulhania Le Jayenge,1995,661,9,34.457,7.511676
732,Dr. Strangelove or: How I Learned to Stop Worr...,1964,1472,8,9.80398,7.372657
3342,Modern Times,1936,881,8,8.15956,7.090711
883,Some Like It Hot,1959,835,8,11.8451,7.05775
1236,The Great Dictator,1940,756,8,9.24175,6.995198


## Movies based on Description

In [52]:
#loading another dataset 
link_small=pd.read_csv(r'links_small.csv')

#printing first five rows
link_small.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [53]:
#checking the info of the dataset
link_small.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9125 entries, 0 to 9124
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   movieId  9125 non-null   int64  
 1   imdbId   9125 non-null   int64  
 2   tmdbId   9112 non-null   float64
dtypes: float64(1), int64(2)
memory usage: 214.0 KB


In [54]:
# changing the data type of tmdbId
link_small=link_small[link_small['tmdbId'].notnull()]['tmdbId'].astype('int')

In [55]:
#checking the shape of top movies
len(top_movies)

45466

In [64]:
incorrect=[]

for i,j in enumerate(top_movies['id']):
    try:
        s=int(j)
    except:
        incorrect.append(i)
        print(i)

19730
29503
35587


In [65]:
top_movies['id'][19730]

'1997-08-20'

#These 3 indices have incorrect format,dropping it 

In [66]:
#dropping the incorrect rows
top_movies.drop(labels=incorrect,inplace=True)

In [67]:
#changing the type of id to int
top_movies['id']=top_movies['id'].astype('int')

In [68]:
#creating new column using id available in link_small and top_movies['id']
top_movies4=top_movies[top_movies['id'].isin(link_small)]

In [69]:
#checking the shape of new dataset formed
top_movies4.shape

(9099, 25)

In [70]:
#checking the dataset
top_movies4.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,1995
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,1995
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,1995


In [71]:
top_movies4.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count', 'year'],
      dtype='object')

In [72]:
#filling the null values
top_movies4['overview']=top_movies4['overview'].fillna('')
top_movies4['tagline']=top_movies4['tagline'].fillna('')

#creating a new column using overview and tagline
top_movies4['description']=top_movies4['overview']+top_movies4['tagline']

In [73]:
#extracting the feature using Tfidvectorization
tf=TfidfVectorizer(ngram_range=(1,2),analyzer='word',stop_words='english')

#creating the tFid matrix
tfidf_matrix=tf.fit_transform(top_movies4['description'])

In [74]:
#checking the shape of the matrix
tfidf_matrix.shape

(9099, 268124)

In [75]:

#creating the cosine distance similarity variable
cosine_sim=linear_kernel(tfidf_matrix,tfidf_matrix)

In [76]:
#checking on the variable of cosime similarity
cosine_sim[0]

array([1.        , 0.00680476, 0.        , ..., 0.        , 0.00344913,
       0.        ])

In [77]:
#reseting the index of the dataset
top_movies4=top_movies4.reset_index()

#creating new variable names title and indices for the below function (get_recommendation)
titles=top_movies4['title']
indices=pd.Series(top_movies4.index,index=top_movies4['title'])

In [78]:
#creating the get_recommendation function
def get_recommendation(title):
    idx=indices[title]
    sim_scores=list(enumerate(cosine_sim[idx]))
    sim_scores=sorted(sim_scores,key=lambda x:x[1],reverse=True)
    sim_scores=sim_scores[1:31]
    movie_indices=[i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

In [79]:
#top 10 movies based on GoldenEye
get_recommendation('GoldenEye').head(10)

5172                          Octopussy
2398                   Live and Let Die
4309                      Casino Royale
5175              Never Say Never Again
2396                 For Your Eyes Only
1883                   A View to a Kill
2901        The Man with the Golden Gun
5171                You Only Live Twice
2397                    Licence to Kill
2896    On Her Majesty's Secret Service
Name: title, dtype: object

In [80]:
#top 10 movies based on Apartment
get_recommendation('The Apartment').head(10)

1925                          Nothing in Common
5242                     Shadow of the Thin Man
5769                          The Holy Mountain
3904                                    48 Hrs.
3221                         The House of Mirth
5358                               Safety Last!
1590                     The Barefoot Executive
1961                             Running Scared
1003              The Day the Earth Stood Still
75      Things to Do in Denver When You're Dead
Name: title, dtype: object

In [81]:

#top 10 movies based on GodFather
get_recommendation('The Godfather').head(10)

973      The Godfather: Part II
8387                 The Family
3509                       Made
4196         Johnny Dangerously
29               Shanghai Triad
5667                       Fury
2412             American Movie
1582    The Godfather: Part III
4221                    8 Women
2159              Summer of Sam
Name: title, dtype: object

In [82]:
#top 10 movies based on Dark Knight
get_recommendation('The Dark Knight').head(10)

7931                      The Dark Knight Rises
132                              Batman Forever
1113                             Batman Returns
8227    Batman: The Dark Knight Returns, Part 2
7565                 Batman: Under the Red Hood
524                                      Batman
7901                           Batman: Year One
2579               Batman: Mask of the Phantasm
2696                                        JFK
8165    Batman: The Dark Knight Returns, Part 1
Name: title, dtype: object

In [83]:
#top 10 movies based on Toy Story
get_recommendation('Toy Story').head(10)

2502               Toy Story 2
7535               Toy Story 3
6193    The 40 Year Old Virgin
2547           Man on the Moon
6627              Factory Girl
4702    What's Up, Tiger Lily?
889      Rebel Without a Cause
6554    For Your Consideration
4988          Rivers and Tides
1599                 Condorman
Name: title, dtype: object