In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline 

#Display all the columns in a dataframe
pd.pandas.set_option('display.max_columns', None)

In [2]:
data = pd.read_csv('main_data.csv')
print(data.shape)
data.head()

(36987, 7)


Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,John Lasseter,Tom Hanks,Tim Allen,Don Rickles,Animation Comedy Family,toy story,Tom Hanks Tim Allen Don Rickles John Lasseter ...
1,Joe Johnston,Robin Williams,Jonathan Hyde,Kirsten Dunst,Adventure Fantasy Family,jumanji,Robin Williams Jonathan Hyde Kirsten Dunst Joe...
2,Howard Deutch,Walter Matthau,Jack Lemmon,Ann-Margret,Romance Comedy,grumpier old men,Walter Matthau Jack Lemmon Ann-Margret Howard ...
3,Forest Whitaker,Whitney Houston,Angela Bassett,Loretta Devine,Comedy Drama Romance,waiting to exhale,Whitney Houston Angela Bassett Loretta Devine ...
4,Charles Shyer,Steve Martin,Diane Keaton,Martin Short,Comedy,father of the bride part ii,Steve Martin Diane Keaton Martin Short Charles...


In [3]:
# Checking any missing Values 
data.isnull().sum()

director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
genres           0
movie_title      0
comb             0
dtype: int64

In [4]:
# Here we are using Cosine_similarity for our recommender systems 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
# Feeding comb column strings into countvectorizer
cv = CountVectorizer()

In [6]:
count_matrix = cv.fit_transform(data['comb'])

In [7]:
# Getting Cosine Similarity Matrix from the count matrix
cosine_sim = cosine_similarity(count_matrix)

In [8]:
# Defining two helper functions to get movie title from movie index and vice-versa.
def get_title_from_index(index):
    return data[data.index == index]["movie_title"].values[0]
def get_index_from_title(title):
    return data[data.movie_title == title].index.values[0]

In [20]:
try:
    movie_user_likes = input('Enter the Movie Name : ')
    movie_index = get_index_from_title(movie_user_likes)
except:
    print("Movie Name is not in our Database ")


Enter the Movie Name : jumanji


In [21]:
similar_movies = list(enumerate(cosine_sim[movie_index]))

In [22]:
sorted_similar_movies = sorted(similar_movies,key=lambda x:x[1],reverse=True)[1:]

In [25]:
i=0
print("Top 10 similar movies to "+movie_user_likes+" are:\n")
for element in sorted_similar_movies:
    print(get_title_from_index(element[0]))
    i=i+1
    if i>10:
        break

Top 10 similar movies to jumanji are:

hook
the nutcracker and the four realms
small soldiers
aladdin and the king of thieves
spider-man 2
spider-man 3
the adventurer: the curse of the midas box
pan
the neverending story ii: the next chapter
captain sindbad
jason and the argonauts
