# **Movie Recommendation**

**Importing Dependencies**

In [1]:
import pandas as pd
import difflib
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

**Loading Dataset and Description**

In [2]:
df1 = pd.read_csv("/content/drive/MyDrive/Datasets/movies_metadata.csv", low_memory=False)
print(df1.shape)

(45466, 24)


In [3]:
df1.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [5]:
df2 = pd.read_csv("/content/drive/MyDrive/Datasets/movies_credits.csv")
print(df2.shape)

(45476, 3)


In [6]:
df2.head()

Unnamed: 0,cast,crew,id
0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",862
1,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",8844
2,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...",15602
3,"[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...",31357
4,"[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",11862


**Combining both DataFrames**

In [7]:
df = pd.concat([df1, df2['cast']], axis=1)
df.shape

(45476, 25)

In [8]:
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,cast
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,"[{'cast_id': 14, 'character': 'Woody (voice)',..."
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,"[{'cast_id': 1, 'character': 'Alan Parrish', '..."
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,"[{'cast_id': 2, 'character': 'Max Goldman', 'c..."
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,"[{'cast_id': 1, 'character': ""Savannah 'Vannah..."
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,"[{'cast_id': 1, 'character': 'George Banks', '..."


In [9]:
df.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count', 'cast'],
      dtype='object')

In [10]:
drop_columns = ['adult', 'belongs_to_collection', 'budget', 'homepage', 'imdb_id', 'original_language', 'overview',
                'popularity', 'poster_path', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status',
                'video', 'vote_average', 'vote_count']

In [11]:
df = df.drop(drop_columns, axis=1)
df.shape

(45476, 7)

In [12]:
df.columns

Index(['genres', 'id', 'original_title', 'production_companies', 'tagline',
       'title', 'cast'],
      dtype='object')

In [13]:
df.head()

Unnamed: 0,genres,id,original_title,production_companies,tagline,title,cast
0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,Toy Story,"[{'name': 'Pixar Animation Studios', 'id': 3}]",,Toy Story,"[{'cast_id': 14, 'character': 'Woody (voice)',..."
1,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,Jumanji,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...",Roll the dice and unleash the excitement!,Jumanji,"[{'cast_id': 1, 'character': 'Alan Parrish', '..."
2,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",15602,Grumpier Old Men,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...",Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,"[{'cast_id': 2, 'character': 'Max Goldman', 'c..."
3,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",31357,Waiting to Exhale,[{'name': 'Twentieth Century Fox Film Corporat...,Friends are the people who let you be yourself...,Waiting to Exhale,"[{'cast_id': 1, 'character': ""Savannah 'Vannah..."
4,"[{'id': 35, 'name': 'Comedy'}]",11862,Father of the Bride Part II,"[{'name': 'Sandollar Productions', 'id': 5842}...",Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,"[{'cast_id': 1, 'character': 'George Banks', '..."


**Handling Missing Values**

In [14]:
features = ['genres', 'original_title', 'production_companies', 'tagline', 'cast']
features

['genres', 'original_title', 'production_companies', 'tagline', 'cast']

In [15]:
df.isnull().sum()

Unnamed: 0,0
genres,10
id,10
original_title,10
production_companies,13
tagline,25064
title,16
cast,0


In [16]:
for feature in features:
    df[feature] = df[feature].fillna('')

In [17]:
df.isnull().sum()

Unnamed: 0,0
genres,0
id,10
original_title,0
production_companies,0
tagline,0
title,16
cast,0


In [18]:
df = df.dropna()
df.isnull().sum()

Unnamed: 0,0
genres,0
id,0
original_title,0
production_companies,0
tagline,0
title,0
cast,0


In [19]:
df.shape

(45460, 7)

**Dataset Cleaning and Preprocessing**

In [20]:
df.insert(0, "index", df.index)

In [21]:
df.head()

Unnamed: 0,index,genres,id,original_title,production_companies,tagline,title,cast
0,0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,Toy Story,"[{'name': 'Pixar Animation Studios', 'id': 3}]",,Toy Story,"[{'cast_id': 14, 'character': 'Woody (voice)',..."
1,1,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,Jumanji,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...",Roll the dice and unleash the excitement!,Jumanji,"[{'cast_id': 1, 'character': 'Alan Parrish', '..."
2,2,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",15602,Grumpier Old Men,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...",Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,"[{'cast_id': 2, 'character': 'Max Goldman', 'c..."
3,3,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",31357,Waiting to Exhale,[{'name': 'Twentieth Century Fox Film Corporat...,Friends are the people who let you be yourself...,Waiting to Exhale,"[{'cast_id': 1, 'character': ""Savannah 'Vannah..."
4,4,"[{'id': 35, 'name': 'Comedy'}]",11862,Father of the Bride Part II,"[{'name': 'Sandollar Productions', 'id': 5842}...",Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,"[{'cast_id': 1, 'character': 'George Banks', '..."


In [22]:
df["genres"] = df["genres"].apply(
    lambda x: " ".join([d["name"] for d in ast.literal_eval(x)])
)

In [23]:
df["production_companies"] = df["production_companies"].apply(
    lambda x: " ".join([d["name"] for d in ast.literal_eval(str(x))]) if str(x).startswith("[") else ""
)

In [24]:
df["cast"] = df["cast"].apply(
    lambda x: " ".join([d["character"] for d in ast.literal_eval(x)])
)

In [25]:
df.head()

Unnamed: 0,index,genres,id,original_title,production_companies,tagline,title,cast
0,0,Animation Comedy Family,862,Toy Story,Pixar Animation Studios,,Toy Story,Woody (voice) Buzz Lightyear (voice) Mr. Potat...
1,1,Adventure Fantasy Family,8844,Jumanji,TriStar Pictures Teitler Film Interscope Commu...,Roll the dice and unleash the excitement!,Jumanji,Alan Parrish Samuel Alan Parrish / Van Pelt Ju...
2,2,Romance Comedy,15602,Grumpier Old Men,Warner Bros. Lancaster Gate,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,Max Goldman John Gustafson Ariel Gustafson Mar...
3,3,Comedy Drama Romance,31357,Waiting to Exhale,Twentieth Century Fox Film Corporation,Friends are the people who let you be yourself...,Waiting to Exhale,Savannah 'Vannah' Jackson Bernadine 'Bernie' H...
4,4,Comedy,11862,Father of the Bride Part II,Sandollar Productions Touchstone Pictures,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,George Banks Nina Banks Franck Eggelhoffer Ann...


**Combining features**

In [26]:
combined_features = ''
for feature in features:
    combined_features += df[feature] + ' '

combined_features

Unnamed: 0,genres
0,Animation Comedy Family Toy Story Pixar Animat...
1,Adventure Fantasy Family Jumanji TriStar Pictu...
2,Romance Comedy Grumpier Old Men Warner Bros. L...
3,Comedy Drama Romance Waiting to Exhale Twentie...
4,Comedy Father of the Bride Part II Sandollar P...
...,...
45461,Drama Family رگ خواب Rising and falling betwe...
45462,Drama Siglo ng Pagluluwal Sine Olivia
45463,Action Drama Thriller Betrayal American World ...
45464,Satana likuyushchiy Yermoliev Lauren Diego C...


**Converting Textual Data into Numerical features**

In [27]:
vectorizer = TfidfVectorizer()

In [28]:
features_vector = vectorizer.fit_transform(combined_features)

**Cosine Similarity Score**

In [29]:
similarity = cosine_similarity(features_vector)

In [30]:
print(similarity.shape)

(45460, 45460)


In [31]:
movie_name = input("Enter Movie name: ")

Enter Movie name: Krrish 3


In [32]:
list_of_all_movie_titles = list(df['title'])

In [33]:
close_match = difflib.get_close_matches(movie_name, list_of_all_movie_titles)[0]
close_match

'Krrish 3'

In [34]:
index_of_movie = df[df.title == close_match]['index'].values[0]
print(index_of_movie)

22080


In [35]:
similarity_score = list(enumerate(similarity[index_of_movie]))

In [36]:
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)

In [37]:
print("Top 10 movies recommended for you: \n")
i = 0;
for movie in sorted_similar_movies:
    if (i >= 10):
        break
    index = movie[0]
    title_of_movie = df[df.index == index]['title'].values[0]
    print(f"{i+1}.) {title_of_movie}")
    i += 1

Top 10 movies recommended for you: 

1.) Krrish 3
2.) Ethel & Ernest
3.) Earth
4.) Convict
5.) Deep Blue
6.) Freaks of Nature
7.) The War
8.) Why We Fight: The Battle of Britain
9.) Kaakha Kaakha
10.) The Manzanar Fishing Club


**Making a Recommendation System**

In [40]:
def recommend_movies(movie_name, cosine_sim=similarity, n_recommendations=5):
    movie_index = df[df['title'].str.lower() == movie_name.lower()].index

    if len(movie_index) == 0:
        return "Movie not found in the dataset!"
    movie_index = movie_index[0]

    similarity_scores = list(enumerate(cosine_sim[movie_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:n_recommendations+1]

    movie_indices = [i[0] for i in similarity_scores]
    recommended_movies = df[['title']].iloc[movie_indices].reset_index(drop=True)
    return recommended_movies

In [41]:
movie_name = input("Enter Movie name: ")
n_recommendations = int(input("Enter number of recommendations: "))
movies = recommend_movies(movie_name, n_recommendations)
print(movies)

Enter Movie name: Toy Story 2
                               title
0                          Toy Story
1                        Toy Story 3
2                      The Red Beret
3                       Flying Padre
4  Cloudy with a Chance of Meatballs
