# Movie Recommendation Engine

#### Import the required libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from ast import literal_eval

#### Load the dataset

In [2]:
movies_df = pd.read_csv(r"C:\Users\krish\OneDrive\Desktop\Projects\Movie Recommendation Engine\Dataset\tmdb_5000_movies.csv")
credits_df = pd.read_csv(r"C:\Users\krish\OneDrive\Desktop\Projects\Movie Recommendation Engine\Dataset\tmdb_5000_credits.csv")

In [3]:
movies_df.head()

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124


In [4]:
credits_df.head()

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [5]:
credits_df.columns = ['id', 'tittle', 'cast', 'crew']
movies_df = movies_df.merge(credits_df, on="id")
movies_df["overview"] = movies_df["overview"].fillna("")

In [6]:
C = movies_df["vote_average"].mean()
m = movies_df["vote_count"].quantile(0.9)

print("C:", C)
print("m:", m)

new_movies_df = movies_df.copy().loc[movies_df["vote_count"] >= m]
print(new_movies_df.shape)

# Weighted rating formula
def weighted_rating(x, C=C, m=m):
    v = x["vote_count"]
    R = x["vote_average"]
    return (v / (v + m) * R) + (m / (v + m) * C)

C: 6.092171559442016
m: 1838.4000000000015
(481, 23)


#### Top 10 movies by popularity

In [7]:
top_popular_movies_df = movies_df.sort_values("popularity", ascending=False).copy()

print("Top 10 Movies by Popularity:")
print(top_popular_movies_df[["title", "popularity", "vote_count", "vote_average"]].head(10))

Top 10 Movies by Popularity:
                                                 title  popularity  \
546                                            Minions  875.581305   
95                                        Interstellar  724.247784   
788                                           Deadpool  514.569956   
94                             Guardians of the Galaxy  481.098624   
127                                 Mad Max: Fury Road  434.278564   
28                                      Jurassic World  418.708552   
199  Pirates of the Caribbean: The Curse of the Bla...  271.972889   
82                      Dawn of the Planet of the Apes  243.791743   
200              The Hunger Games: Mockingjay - Part 1  206.227151   
88                                          Big Hero 6  203.734590   

     vote_count  vote_average  
546        4571           6.4  
95        10867           8.1  
788       10995           7.4  
94         9742           7.9  
127        9427           7.2  
28      

#### Content-based recommendation by plot

In [8]:
tfidf = TfidfVectorizer(stop_words="english")
movies_df["overview"] = movies_df["overview"].fillna("")
tfidf_matrix = tfidf.fit_transform(movies_df["overview"])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
indices = pd.Series(movies_df.index, index=movies_df["title"]).drop_duplicates()

In [9]:
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movies_indices = [ind[0] for ind in sim_scores]
    movies = movies_df["title"].iloc[movies_indices]
    return movies

In [10]:
user_movie = "The Avengers"
print("Content Based Filtering (Plot)")
print(f"Recommendations for '{user_movie}'")
print(get_recommendations(user_movie))

Content Based Filtering (Plot)
Recommendations for 'The Avengers'
7               Avengers: Age of Ultron
3144                            Plastic
1715                            Timecop
4124                 This Thing of Ours
3311              Thank You for Smoking
3033                      The Corruptor
588     Wall Street: Money Never Sleeps
2136         Team America: World Police
1468                       The Fountain
1286                        Snowpiercer
Name: title, dtype: object


#### Content-based recommendation using metadata

In [11]:
features = ["cast", "crew", "keywords", "genres"]
for feature in features:
    movies_df[feature] = movies_df[feature].apply(literal_eval)

def get_director(crew):
    for member in crew:
        if member.get("job") == "Director":
            return member.get("name", np.nan)
    return np.nan

def get_list(data):
    if isinstance(data, list):
        return [item.get("name", "") for item in data[:3]]
    return []

movies_df["director"] = movies_df["crew"].apply(get_director)
for feature in ["cast", "keywords", "genres"]:
    movies_df[feature] = movies_df[feature].apply(get_list)

def clean_data(data):
    if isinstance(data, list):
        return [str.lower(item.replace(" ", "")) for item in data]
    if isinstance(data, str):
        return str.lower(data.replace(" ", ""))
    return ""

features_to_clean = ["cast", "keywords", "director", "genres"]
for feature in features_to_clean:
    movies_df[feature] = movies_df[feature].apply(clean_data)

def create_soup(row):
    return " ".join(row["keywords"]) + " " + " ".join(row["cast"]) + " " + row["director"] + " " + " ".join(row["genres"])

movies_df["soup"] = movies_df.apply(create_soup, axis=1)

In [12]:
count_vectorizer = CountVectorizer(stop_words="english")
movies_df = movies_df.reset_index()
count_matrix = count_vectorizer.fit_transform(movies_df["soup"])
cosine_sim2 = cosine_similarity(count_matrix, count_matrix)
indices = pd.Series(movies_df.index, index=movies_df['title'])

In [13]:
user_movie = "The Avengers"
print("Content Based System (Metadata)")
print(f"Recommendations for '{user_movie}'")
print(get_recommendations(user_movie, cosine_sim2))

Content Based System (Metadata)
Recommendations for 'The Avengers'
7                  Avengers: Age of Ultron
26              Captain America: Civil War
79                              Iron Man 2
169     Captain America: The First Avenger
174                    The Incredible Hulk
85     Captain America: The Winter Soldier
31                              Iron Man 3
33                   X-Men: The Last Stand
68                                Iron Man
94                 Guardians of the Galaxy
Name: title, dtype: object
