Importing the dependencies

In [94]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle


Data Collection and Pre-Processing

In [95]:
# loading the data from the csv file to apandas dataframe
movies_data = pd.read_csv('Datasets/movies.csv')

In [96]:
# printing the first 5 rows of the dataframe
movies_data.head()

In [97]:
# number of rows and columns in the data frame

movies_data.shape

(4803, 24)

In [98]:
# # Saving the movies_data dataframe to a pickle file
with open('movies_data.pkl', 'wb') as file:
    pickle.dump(movies_data, file)

In [99]:
# # selecting the relevant features for recommendation

selected_features = ['genres','keywords','tagline','cast','director']
print(selected_features)

In [100]:
# replacing the null valuess with null string

for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [101]:
# # combining all the 5 selected features

combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [102]:
combined_features.head()

In [103]:
combined_features.shape

In [104]:
# # converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [105]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [106]:
print(feature_vectors)

Cosine Similarity

In [107]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [108]:
print(similarity)

In [109]:
# Saving the similarity scores to a pickle file
with open('similarity_scores.pkl', 'wb') as file:
    pickle.dump(similarity, file)

Getting the movie name from the user

Movie Recommendation Sytem

In [113]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
if len(find_close_match) == 0:
  print('Movie not found in the list. Please try with another movie name.')
else:
    close_match = find_close_match[0]

    index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

    similarity_score = list(enumerate(similarity[index_of_the_movie]))

    sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

    print('Movies suggested for you : \n')

    i = 1

    for movie in sorted_similar_movies:

      index = movie[0]
      title_from_index = movies_data[movies_data.index==index]['title'].values[0]
      if (i<11):
        print(i, '.',title_from_index)
        i+=1

Movies suggested for you : 

1 . The Avengers
2 . Avengers: Age of Ultron
3 . Captain America: The Winter Soldier
4 . Captain America: Civil War
5 . Iron Man 2
6 . Thor: The Dark World
7 . X-Men
8 . The Incredible Hulk
9 . X-Men: Apocalypse
10 . Ant-Man
