In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data Pre-processing

In [2]:
#CSV File Load
movies_data = pd.read_csv('/content/movies.csv')

In [3]:
movies_data.shape

(4803, 24)

In [4]:
#selecting the features to consider
selected_features = ['genres','keywords','tagline','cast','director']

In [5]:
#replacing null values with null string
for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [6]:
#combining all features into one
combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

Feature Extraction

In [7]:
#Text --> Feature Vectors
vectorizer = TfidfVectorizer()

In [8]:
feature_vectors = vectorizer.fit_transform(combined_features)

Cosine Similarity

In [9]:
#getting similarity scores
similarity = cosine_similarity(feature_vectors)

User Input

In [10]:
movie_name = input('Enter your favourite movie name:')

Enter your favourite movie name:iron man


In [11]:
#creating a list with all the movies given in dataset
title_list = movies_data['title'].tolist()

In [12]:
#finding close match using difflib
find_close_match = difflib.get_close_matches(movie_name, title_list)

In [13]:
close_match = find_close_match[0]

In [14]:
#find index with title
index_of_movie = movies_data[movies_data['title'] == close_match].index[0]

Output

In [15]:
#getting list of similar movies
similarity_score = list(enumerate(similarity[index_of_movie]))

In [16]:
#sorting based on high score
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)

In [18]:
#print the name of similar movies based on index
print('Movies suggested for you: \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index == index]['title'].values[0]
  if (i <= 30):
    print(i, '.', title_from_index)
    i += 1

Movies suggested for you: 

1 . Iron Man
2 . Iron Man 2
3 . Iron Man 3
4 . Avengers: Age of Ultron
5 . The Avengers
6 . Captain America: Civil War
7 . Captain America: The Winter Soldier
8 . Ant-Man
9 . X-Men
10 . Made
11 . X-Men: Apocalypse
12 . X2
13 . The Incredible Hulk
14 . The Helix... Loaded
15 . X-Men: First Class
16 . X-Men: Days of Future Past
17 . Captain America: The First Avenger
18 . Kick-Ass 2
19 . Guardians of the Galaxy
20 . Deadpool
21 . Thor: The Dark World
22 . G-Force
23 . X-Men: The Last Stand
24 . Duets
25 . Mortdecai
26 . The Last Airbender
27 . Southland Tales
28 . Zathura: A Space Adventure
29 . Sky Captain and the World of Tomorrow
30 . The Amazing Spider-Man 2
