### Movie Recommendation (2024) using TF-IDF

### Importing libraries :

In [1]:
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import os 
from zipfile import ZipFile 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
import pickle

In [2]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns',100)

### Downloading the dataset (re-run to download latest dataset) :

### Extract the dataset files :

In [3]:
# loading the temp.zip and creating a zip object 
with ZipFile("madhurima.zip", 'r') as zObject: 
  
    # Extracting all the members of the zip  
    # into a specific location. 
    zObject.extractall() 

### Loading the dataset :

In [4]:
moviedata=pd.read_csv('TMDB_movie_dataset_v11.csv')

### Checking first five rows of the dataset :

In [5]:
moviedata.head(3)

Unnamed: 0,id,title,vote_average,vote_count,status,release_date,revenue,runtime,adult,backdrop_path,budget,homepage,imdb_id,original_language,original_title,overview,popularity,poster_path,tagline,genres,production_companies,production_countries,spoken_languages,keywords
0,27205,Inception,8.364,34495,Released,2010-07-15,825532764,148,False,/8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg,160000000,https://www.warnerbros.com/movies/inception,tt1375666,en,Inception,"Cobb, a skilled thief who commits corporate es...",83.952,/oYuLEt3zVCKq57qu2F8dT7NIa6f.jpg,Your mind is the scene of the crime.,"Action, Science Fiction, Adventure","Legendary Pictures, Syncopy, Warner Bros. Pict...","United Kingdom, United States of America","English, French, Japanese, Swahili","rescue, mission, dream, airplane, paris, franc..."
1,157336,Interstellar,8.417,32571,Released,2014-11-05,701729206,169,False,/pbrkL804c8yAv3zBZR4QPEafpAR.jpg,165000000,http://www.interstellarmovie.net/,tt0816692,en,Interstellar,The adventures of a group of explorers who mak...,140.241,/gEU2QniE6E77NI6lCU6MxlNBvIx.jpg,Mankind was born on Earth. It was never meant ...,"Adventure, Drama, Science Fiction","Legendary Pictures, Syncopy, Lynda Obst Produc...","United Kingdom, United States of America",English,"rescue, future, spacecraft, race against time,..."
2,155,The Dark Knight,8.512,30619,Released,2008-07-16,1004558444,152,False,/nMKdUUepR0i5zn0y1T4CsSB5chy.jpg,185000000,https://www.warnerbros.com/movies/dark-knight/,tt0468569,en,The Dark Knight,Batman raises the stakes in his war on crime. ...,130.643,/qJ2tW6WMUDux911r6m7haRef0WH.jpg,Welcome to a world without rules.,"Drama, Action, Crime, Thriller","DC Comics, Legendary Pictures, Syncopy, Isobel...","United Kingdom, United States of America","English, Mandarin","joker, sadism, chaos, secret identity, crime f..."


### Data types :

In [6]:
moviedata.dtypes

id                        int64
title                    object
vote_average            float64
vote_count                int64
status                   object
release_date             object
revenue                   int64
runtime                   int64
adult                      bool
backdrop_path            object
budget                    int64
homepage                 object
imdb_id                  object
original_language        object
original_title           object
overview                 object
popularity              float64
poster_path              object
tagline                  object
genres                   object
production_companies     object
production_countries     object
spoken_languages         object
keywords                 object
dtype: object

### Checking one single row of dataset :

In [7]:
row1=moviedata.iloc[0]
row1

id                                                                  27205
title                                                           Inception
vote_average                                                        8.364
vote_count                                                          34495
status                                                           Released
release_date                                                   2010-07-15
revenue                                                         825532764
runtime                                                               148
adult                                                               False
backdrop_path                            /8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg
budget                                                          160000000
homepage                      https://www.warnerbros.com/movies/inception
imdb_id                                                         tt1375666
original_language                     

### Making a single column 'keys' combining all essential columns :

In [8]:
moviedata['tags'] = moviedata['title'] + ' ' + moviedata['genres'] + moviedata['overview'] + moviedata['original_title'] + moviedata['keywords']

### Generate a mapping FOR each movie title to its index in the dataframe :

In [9]:
movie2idx=pd.Series(moviedata.index,index=moviedata['title'])

In [10]:
def find_index(name):
    return movie2idx[name]

### Instantiating TF-IDF Vectorizer :

In [11]:
tfvectorizer=TfidfVectorizer(max_features=6000)

### Fitting TF-IDF Vectorizer on dataset :

In [12]:
M1 = tfvectorizer.fit_transform(moviedata['tags'].values.astype('U'))

### Storing the fitted TF-IDF vectorizer model to a file using pickle

In [13]:
with open('M1.pkl', 'wb') as f:
    pickle.dump(M1, f)

### Loading the model :

In [14]:
with open('M1.pkl', 'rb') as f:
    loaded_vectorizer = pickle.load(f)

### Function to get top 5 recommendations for a given movie :

In [15]:
def get_recommendations(model,movie):
    idX=find_index(movie)
    if type(idX) == pd.Series:
        idX=idX.iloc[0]
    result= (-(cosine_similarity(model[idX],model).flatten())).argsort()[1:6]
    print(moviedata['title'].iloc[result])

### Example :

In [16]:
get_recommendations(loaded_vectorizer,'Toy Story')

333       Toy Story 4
159       Toy Story 2
364478    Toy Story 5
138       Toy Story 3
322191       Toy Time
Name: title, dtype: object
