<a href="https://colab.research.google.com/github/talikagupta/Movie-Recommendation-System/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
import pandas as pd
import numpy as np
import difflib #if a user makes an error in input---to give close suggestions
from sklearn.feature_extraction.text import TfidfVectorizer #to convert textual data to numerical data
from sklearn.metrics.pairwise import cosine_similarity

##Data Collection and PreProcessing

In [51]:
# loading the data from .csv file to pandas data frame
movies = pd.read_csv('/content/movies.csv')

In [52]:
movies.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [53]:
movies.shape #rows x cols

(4803, 24)

In [54]:
#features selection 
selected_features = ['genres', 'keywords', 'tagline', 'cast', 'director']

In [55]:
#replacing all the null values with null string
for feature in selected_features:
  movies[feature] = movies[feature].fillna('')

In [56]:
# combining all the selected features into a single feature(string)
combined_features = movies['genres']+' '+movies['keywords']+' '+movies['tagline']+' '+movies['cast']+' '+movies['director']
#print(combined_features)

In [57]:
# converting the textual/string data to feature vectors(int numerical form)

#creating an instance of Tfidfvectorizer
vectorizer = TfidfVectorizer()

In [58]:
feature_vectors = vectorizer.fit_transform(combined_features)
#print(feature_vectors)

##cosine similarity

In [59]:
#getting the cosine confidence value(score) using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [60]:
similarity.shape

(4803, 4803)

In [61]:
#print(similarity) #similarity is a 4803x4803 matrix

In [62]:
#creating a list of all the movie titles
title_list = movies['title'].tolist()
#print(title_list)

In [63]:
#getting a movie title input from the user
movie_title = input("Enter a movie name to get recommendations: ")

Enter a movie name to get recommendations: interstell


In [64]:
# finding the close match for the movie name given by the user

close_match_list = difflib.get_close_matches(movie_title, title_list)
print(close_match_list)

['Interstellar', 'Cinderella', "Winter's Tale"]


In [65]:
close_match = close_match_list[0]
print(close_match)

Interstellar


In [66]:
#finding the index of close_match
index = movies[movies['title']==close_match]['index'].values[0]
print(index)

95


In [67]:
#getting the row of similarity matrix[index] and putting it into a list
similarity_list = list(enumerate(similarity[index]))
#print(similarity_list)

In [68]:
#sorting the similarity_list in descending order based on the similarity score
sorted_similarity_list = sorted(similarity_list, key = lambda x:x[1], reverse=True)
#print(sorted_similarity_list)

In [69]:
#print the top 10 movies in the sorted_similarity_list

print("Top 10 movies suggested for you: \n")
i=1
for movie in sorted_similarity_list:
  idx = movie[0]
  title_of_idx = movies[movies['index'] == idx]['title'].values[0]
  if(i<=10):
   print(i, '.', title_of_idx)
   i+=1

Top 10 movies suggested for you: 

1 . Interstellar
2 . The Dark Knight Rises
3 . The Matrix
4 . The Martian
5 . Dear Frankie
6 . Argo
7 . The Matrix Revolutions
8 . The Matrix Reloaded
9 . The Terminator
10 . Armageddon


##Movie Recommendation System

In [71]:
movie_title = input("Enter a movie name to get recommendations: ")
title_list = movies['title'].tolist()

close_match_list = difflib.get_close_matches(movie_title, title_list)

close_match = close_match_list[0]

index = movies[movies['title'] == close_match]['index'].values[0]

similarity_list = list(enumerate(similarity[index]))

sorted_similarity_list = sorted(similarity_list, key = lambda x:x[1], reverse = True) 

print("Top 10 movies suggested for you: \n")
i=1
for movie in sorted_similarity_list:
  idx = movie[0]
  title_of_idx = movies[movies['index'] == idx]['title'].values[0]
  if(i<=10):
   print(i, '.', title_of_idx)
   i+=1

Enter a movie name to get recommendations: spiderman
Top 10 movies suggested for you: 

1 . Spider-Man
2 . Spider-Man 3
3 . Spider-Man 2
4 . The Notebook
5 . Seabiscuit
6 . Clerks II
7 . The Ice Storm
8 . Oz: The Great and Powerful
9 . Horrible Bosses
10 . The Count of Monte Cristo
