In [29]:
#Import the necessary library

import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
movie_data = pd.read_csv('/content/movie.csv')
movie_data.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar,CCH Pounder Joel David Moore Wes Studi James C...
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,pirates of the caribbean: at world's end,Johnny Depp Orlando Bloom Jack Davenport Gore ...
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,spectre,Christoph Waltz Rory Kinnear Stephanie Sigman ...
3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,the dark knight rises,Tom Hardy Christian Bale Joseph Gordon-Levitt ...
4,Doug Walker,Doug Walker,Rob Walker,unknown,Documentary,star wars: episode vii - the force awakens ...,Doug Walker Rob Walker unknown Doug Walker Doc...


In [30]:
# Add an index column
movie_data.reset_index(inplace=True)
movie_data.rename(columns={'index': 'movie_index'}, inplace=True)

# Inspect the dataset to verify the new index column
print(movie_data.head())

   movie_index      director_name     actor_1_name      actor_2_name  \
0            0      James Cameron      CCH Pounder  Joel David Moore   
1            1     Gore Verbinski      Johnny Depp     Orlando Bloom   
2            2         Sam Mendes  Christoph Waltz      Rory Kinnear   
3            3  Christopher Nolan        Tom Hardy    Christian Bale   
4            4        Doug Walker      Doug Walker        Rob Walker   

           actor_3_name                           genres  \
0             Wes Studi  Action Adventure Fantasy Sci-Fi   
1        Jack Davenport         Action Adventure Fantasy   
2      Stephanie Sigman        Action Adventure Thriller   
3  Joseph Gordon-Levitt                  Action Thriller   
4               unknown                      Documentary   

                                         movie_title  \
0                                             avatar   
1           pirates of the caribbean: at world's end   
2                                     

**Content Based Filtering**

In [31]:
#Select the requied columns in the dataset
selected_features =['director_name','genres','actor_1_name','actor_2_name','actor_3_name']

# replacing the null valuess with null string

for feature in selected_features:
  movie_data[feature] = movie_data[feature].fillna('')

combined_features = movie_data['director_name']+' '+movie_data['genres']+' '+movie_data['actor_1_name']+' '+movie_data['actor_2_name']+' '+movie_data['actor_3_name']

In [32]:
print(combined_features)

0       James Cameron Action Adventure Fantasy Sci-Fi ...
1       Gore Verbinski Action Adventure Fantasy Johnny...
2       Sam Mendes Action Adventure Thriller Christoph...
3       Christopher Nolan Action Thriller Tom Hardy Ch...
4       Doug Walker Documentary Doug Walker Rob Walker...
                              ...                        
5864    Greta Gerwig Drama Romance Saoirse Ronan Emma ...
5865    Sam Mendes War Drama Action History George Mac...
5866    Destin Daniel Cretton Drama Crime Michael B. J...
5867    Chinonye Chukwu Drama Alfre Woodard Wendell Pi...
5868    Waymon Boone Horror Thriller Mena Suvari Kevin...
Length: 5869, dtype: object


In [33]:
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)

In [34]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.06158515 0.03308819 ... 0.         0.         0.        ]
 [0.06158515 1.         0.03350404 ... 0.         0.         0.        ]
 [0.03308819 0.03350404 1.         ... 0.         0.         0.01230844]
 ...
 [0.         0.         0.         ... 1.         0.00592608 0.        ]
 [0.         0.         0.         ... 0.00592608 1.         0.        ]
 [0.         0.         0.01230844 ... 0.         0.         1.        ]]


In [36]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = movie_data['movie_title'].tolist()
print(list_of_all_titles)

['avatar', "pirates of the caribbean: at world's end", 'spectre', 'the dark knight rises', 'star wars: episode vii - the force awakens\xa0           ', 'john carter', 'tangled', 'avengers: age of ultron', 'harry potter and the half-blood prince', 'batman v superman: dawn of justice', 'superman returns', 'quantum of solace', "pirates of the caribbean: dead man's chest", 'the lone ranger', 'man of steel', 'the chronicles of narnia: prince caspian', 'pirates of the caribbean: on stranger tides', 'men in black 3', 'the hobbit: the battle of the five armies', 'the amazing spider-man', 'robin hood', 'the hobbit: the desolation of smaug', 'the golden compass', 'titanic', 'captain america: civil war', 'battleship', 'jurassic world', 'spider-man 2', 'iron man 3', 'x-men: the last stand', 'monsters university', 'transformers: revenge of the fallen', 'transformers: age of extinction', 'the amazing spider-man 2', 'cars 2', 'green lantern', 'toy story 3', 'terminator salvation', 'furious 7', 'world

In [37]:
#Get the movie name from the user
movie_name = input("Enter the movie name: ")

#find the close match from the user input name
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

Enter the movie name: iron man
['iron man', 'iron man 3', 'iron man 2']


In [38]:
close_match = find_close_match[0]
print(close_match)
index_of_the_movie = movie_data[movie_data.movie_title == close_match]['movie_index'].values[0]
print(index_of_the_movie)

iron man
60


In [39]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.08234386196293741), (1, 0.03367159425462882), (2, 0.031260590013387594), (3, 0.016514979336007356), (4, 0.0), (5, 0.07805954788720945), (6, 0.017010438610850397), (7, 0.27205271082511623), (8, 0.018804605139734778), (9, 0.07834357074702308), (10, 0.08481371706252674), (11, 0.03146967882062774), (12, 0.03367159425462882), (13, 0.035039384938915785), (14, 0.08097704182073878), (15, 0.0296741176405413), (16, 0.038034289937282556), (17, 0.08271132712923729), (18, 0.02127668391665402), (19, 0.034742674526102926), (20, 0.03498318376244958), (21, 0.02127668391665402), (22, 0.0204030389824996), (23, 0.0), (24, 0.30057648419695815), (25, 0.08012190331273993), (26, 0.07643451468718986), (27, 0.039979500425565646), (28, 0.616467153311151), (29, 0.07744563947568733), (30, 0.01820554059126931), (31, 0.086920917071093), (32, 0.07903533444551993), (33, 0.08741232720767793), (34, 0.018693962572788148), (35, 0.07993269997596729), (36, 0.018734393406896845), (37, 0.08844008229257569), (38, 0.0174

In [40]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(60, 1.0000000000000004), (70, 0.841088875855786), (28, 0.616467153311151), (3638, 0.5854610629010251), (404, 0.4496843662608912), (1347, 0.3984622168760632), (412, 0.39118602460252205), (639, 0.38995458445018655), (1725, 0.3572564511165462), (684, 0.35576074623697107), (278, 0.354621223355661), (1158, 0.3544342711992765), (1468, 0.35223190363937706), (3175, 0.34623491131296963), (410, 0.3338693092869495), (928, 0.333859610851702), (1411, 0.333315538565749), (3040, 0.3316960801851576), (865, 0.32852129762277404), (373, 0.32657540146814534), (5750, 0.31481275380959195), (4761, 0.3123431930165106), (2873, 0.3079072396501277), (24, 0.30057648419695815), (7, 0.27205271082511623), (749, 0.27205271082511623), (296, 0.2554626997071773), (331, 0.254504623891547), (2396, 0.2534882565857574), (193, 0.2380377871291091), (2213, 0.23562957659044942), (934, 0.23233459568331055), (328, 0.22832999689068187), (184, 0.22686015424304154), (1370, 0.2224972979900451), (620, 0.21855393691948963), (1837, 0.

In [42]:
#get recommendation from the user preferance
def get_recommendation(movie_name):
  list_of_all_titles = movie_data['movie_title'].tolist()
  find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
  close_match = find_close_match[0]
  index_of_the_movie = movie_data[movie_data.movie_title == close_match]['movie_index'].values[0]
  similarity_score = list(enumerate(similarity[index_of_the_movie]))
  sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
  print('Movies suggested for you : \n')

  i = 1
  for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = movie_data[movie_data.movie_index==index]['movie_title'].values[0]
    if (i<11):
      print(i, '.',title_from_index)
      i+=1


movie_name = input(' Enter your favourite movie name : ')
while(True):
  if(movie_name == 'stop' or movie_name == 'exit'):
    print('Thanks for Ask Movie Recommendation ')
    break
  else:
      get_recommendation(movie_name)
      movie_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : iron man
Movies suggested for you : 

1 . iron man
2 . iron man 2
3 . iron man 3
4 . made
5 . deep impact
6 . identity thief
7 . four christmases
8 . zathura: a space adventure
9 . the jungle book
10 . couples retreat
 Enter your favourite movie name : bat man
Movies suggested for you : 

1 . batman
2 . batman returns
3 . young guns
4 . raiders of the lost ark
5 . dear wendy
6 . tango & cash
7 . flash gordon
8 . batman & robin
9 . sphinx
10 . charlie and the chocolate factory
 Enter your favourite movie name : avengers
Movies suggested for you : 

1 . avengers: age of ultron
2 . the avengers
3 . captain america: civil war
4 . iron man 2
5 . serenity
6 . captain america: the winter soldier
7 . avengers: infinity war
8 . the island
9 . iron man 3
10 . star trek
 Enter your favourite movie name : x-men
Movies suggested for you : 

1 . x-men
2 . x-men 2
3 . x-men: days of future past
4 . x-men: apocalypse
5 . superman returns
6 . x-men origins: wolverine
