# Recommendation Engine : Movies

In [1]:
import numpy as np
import pandas as pd

In [2]:
movies_data=pd.read_csv("MoviesRE.csv")
movies_data.head()

Unnamed: 0,index,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
movies_data.shape

(9742, 3)

In [4]:
movies_data.isnull().sum()

index     0
title     0
genres    0
dtype: int64

In [5]:
#If Null values present then the code will
#movies_data["genres"]=movies_data["genres"].fillna('')

In [6]:
#Convert the text data into feature vectors

from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer=TfidfVectorizer()
feature_vectors=vectorizer.fit_transform(movies_data["genres"])
print(feature_vectors)


  (0, 8)	0.482990142708577
  (0, 4)	0.26758647689140014
  (0, 3)	0.5048454681396087
  (0, 2)	0.5162254711770092
  (0, 1)	0.41684567364693936
  (1, 8)	0.5936619434123594
  (1, 3)	0.620525172745643
  (1, 1)	0.5123612074824268
  (2, 19)	0.8210088907493954
  (2, 4)	0.5709154064399099
  (3, 7)	0.46640480307738325
  (3, 19)	0.726240982959826
  (3, 4)	0.5050154397005037
  (4, 4)	1.0
  (5, 21)	0.5420423542868653
  (5, 5)	0.6359470441562756
  (5, 0)	0.5493281743985542
  (6, 19)	0.8210088907493954
  (6, 4)	0.5709154064399099
  (7, 3)	0.7711121633813997
  (7, 1)	0.6366993258087036
  (8, 0)	1.0
  (9, 21)	0.5457299419583338
  (9, 0)	0.5530653284926609
  (9, 1)	0.6295217016667962
  :	:
  (9731, 0)	0.4127296517002463
  (9731, 4)	0.30157046584177355
  (9731, 1)	0.4697858630458896
  (9732, 9)	0.48136743695078876
  (9732, 20)	0.48136743695078876
  (9732, 0)	0.39038039438445316
  (9732, 4)	0.28524046407869114
  (9732, 2)	0.5502833875552382
  (9733, 7)	0.43179372819853856
  (9733, 2)	0.9019723811117537
  

In [7]:
#Cosine Similarity

In [8]:
#Getting the similarity score using cosine similarity

from sklearn.metrics.pairwise import cosine_similarity
similarity=cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.81357774 0.15276924 ... 0.         0.4210373  0.26758648]
 [0.81357774 1.         0.         ... 0.         0.         0.        ]
 [0.15276924 0.         1.         ... 0.         0.         0.57091541]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.4210373  0.         0.         ... 0.         1.         0.        ]
 [0.26758648 0.         0.57091541 ... 0.         0.         1.        ]]


In [9]:
print(similarity.shape)

(9742, 9742)


# Creating a list with all the movie names given in the dataset

In [10]:
list_of_all_titles=movies_data['title'].tolist()
print(list_of_all_titles)



# Getting the movie name from the user

In [11]:
#Getting the movie name from the user
movie_name=input(' Enter your favourite movie name: ')

 Enter your favourite movie name: Iron man


In [18]:
#finding the close match for the movie name given by user
import difflib
find_close_match=difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

['Iron Man (2008)', 'Iron Man (1931)']


In [19]:
close_match=find_close_match[0]
print(close_match)

Iron Man (2008)


In [20]:
#Finding  the index of the movie with title
index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

6744


In [15]:
#Getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.1527692422122773), (1, 0.0), (2, 1.0000000000000002), (3, 0.8845713988516402), (4, 0.5709154064399099), (5, 0.0), (6, 1.0000000000000002), (7, 0.0), (8, 0.0), (9, 0.0), (10, 0.8845713988516402), (11, 0.2909098936874118), (12, 0.0), (13, 0.0), (14, 0.46787395109561647), (15, 0.0), (16, 0.6908158755079289), (17, 0.5709154064399099), (18, 0.5709154064399099), (19, 0.20109569849295683), (20, 0.24719704018593303), (21, 0.0), (22, 0.0), (23, 0.0), (24, 0.6908158755079289), (25, 0.0), (26, 0.0), (27, 0.6908158755079289), (28, 0.0), (29, 0.0), (30, 0.0), (31, 0.0), (32, 0.0), (33, 0.0), (34, 0.2673703887281183), (35, 1.0000000000000002), (36, 0.0), (37, 0.0), (38, 0.0), (39, 0.0), (40, 0.0), (41, 0.29774581281476226), (42, 0.6908158755079289), (43, 0.0), (44, 0.302735056594768), (45, 0.6908158755079289), (46, 0.0), (47, 0.8845713988516402), (48, 0.0), (49, 0.2673703887281183), (50, 0.0), (51, 0.0), (52, 0.8845713988516402), (53, 0.0), (54, 0.0), (55, 0.0), (56, 0.3047175075668095), (57,

In [16]:
len(similarity_score)

9742

In [24]:
#Sorting the movie based on their similarity score

sorted_similarity_movies=sorted(similarity_score, key=lambda x:x[1],reverse=True)
print(sorted_similarity_movies)

[(2, 1.0000000000000002), (6, 1.0000000000000002), (35, 1.0000000000000002), (57, 1.0000000000000002), (60, 1.0000000000000002), (103, 1.0000000000000002), (106, 1.0000000000000002), (111, 1.0000000000000002), (152, 1.0000000000000002), (157, 1.0000000000000002), (203, 1.0000000000000002), (216, 1.0000000000000002), (238, 1.0000000000000002), (248, 1.0000000000000002), (250, 1.0000000000000002), (255, 1.0000000000000002), (256, 1.0000000000000002), (297, 1.0000000000000002), (315, 1.0000000000000002), (335, 1.0000000000000002), (361, 1.0000000000000002), (383, 1.0000000000000002), (386, 1.0000000000000002), (394, 1.0000000000000002), (406, 1.0000000000000002), (434, 1.0000000000000002), (435, 1.0000000000000002), (482, 1.0000000000000002), (514, 1.0000000000000002), (529, 1.0000000000000002), (566, 1.0000000000000002), (577, 1.0000000000000002), (632, 1.0000000000000002), (687, 1.0000000000000002), (697, 1.0000000000000002), (717, 1.0000000000000002), (718, 1.0000000000000002), (727, 1

In [None]:
#Print the name of similary movies based on the index

In [28]:
print('Movies suggested for you: \n')
i=1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index=movies_data[movies_data.index==index]['title'].values[0]
    if(i<=10):
        print(i, '.',title_from_index)
    i+=1
    

Movies suggested for you: 

1 . Grumpier Old Men (1995)
2 . Sabrina (1995)
3 . Clueless (1995)
4 . Two if by Sea (1996)
5 . French Twist (Gazon maudit) (1995)
6 . If Lucy Fell (1996)
7 . Boomerang (1992)
8 . Pie in the Sky (1996)
9 . Mallrats (1995)
10 . Nine Months (1995)
