### Importing dependencies

In [12]:
import numpy as np
import pandas as pd
import difflib # used to find closest match
from sklearn.feature_extraction.text import TfidfVectorizer # used to convert text to feature vectors
from sklearn.metrics.pairwise import cosine_similarity # gets similarity score

### Data Collection and Pre-Processing

In [20]:
movies_df = pd.read_csv('movies.csv')

In [21]:
# feature selection
selected_features = ['genres','keywords','tagline','cast','director']


In [22]:
# filling all null values in selected_features
for feature in selected_features:
    movies_df[feature] = movies_df[feature].fillna('')

In [23]:
# combining all selected_features
combined_features = movies_df['genres'] + ' ' + movies_df['keywords'] + ' ' + movies_df['tagline'] + ' ' + movies_df['cast'] + ' ' + movies_df['director']




In [25]:
# converting the text data to feature vectors
vectorizer = TfidfVectorizer()

In [26]:
# converting text to numerical values
feature_vectors = vectorizer.fit_transform(combined_features)

### Cosine Similarity

In [89]:
# getting similarity score using cosine similarity - compares each movie to each other
similarity = cosine_similarity(feature_vectors)

In [38]:
# getting the movie name from the user
movie_name = input('Enter the name of a movie: ')

Enter the name of a movie:  Interstellar


In [42]:
# creating a list with all the movie names given in the dataset
list_of_all_titles = movies_df['title'].tolist()

In [46]:
# finding the close match for the movie name given by the user
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

In [49]:
# closest match to user input
closest_match = find_close_match[0]

In [59]:
# finding the index of the movie with title
index_of_movie = movies_df[movies_df['title'] == closest_match]['index'].values[0]

In [93]:
# getting a list of similar movies
# find similarity score for all movies (enumerate)
similarity_score = list(enumerate(similarity[index_of_movie]))

In [99]:
# sorting the movies based on similarity score
# x[0] is index and x[1] is similarity score , basically sort on similarity score as opposed to index
sorted_similar_movies = sorted(similarity_score,key = lambda x:x[1],reverse = True)

In [76]:
# return name of similar movies based on index
print('Movies suggested for you: \n')
i = 1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = movies_df[movies_df['index'] == index]['title'].values[0]
    if i < 30:
        print(i, '.',title_from_index)
        i += 1

Movies suggested for you: 

1 . Interstellar
2 . The Dark Knight Rises
3 . The Matrix
4 . The Martian
5 . Dear Frankie
6 . Argo
7 . The Matrix Revolutions
8 . The Matrix Reloaded
9 . The Terminator
10 . Armageddon
11 . Terminator Genisys
12 . Contact
13 . Terminator Salvation
14 . The Killer Inside Me
15 . Gandhi, My Father
16 . The Tree of Life
17 . Get Smart
18 . Back to the Future
19 . Terminator 3: Rise of the Machines
20 . The Prestige
21 . Batman Begins
22 . Dragonslayer
23 . WarGames
24 . Little Nicky
25 . Superman III
26 . The Other Side of Heaven
27 . House at the End of the Street
28 . Good Deeds
29 . Mortal Kombat: Annihilation


### Movie Recommendation System - Combined

In [101]:
movie_name = input('Enter the name of a movie: ')

list_of_all_titles = movies_df['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

closest_match = find_close_match[0]

index_of_movie = movies_df[movies_df['title'] == closest_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_movie]))

sorted_similar_movies = sorted(similarity_score,key = lambda x:x[1],reverse = True)

print('Movies suggested for you: \n')
i = 1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = movies_df[movies_df['index'] == index]['title'].values[0]
    if i < 30:
        print(i, '.',title_from_index)
        i += 1

Enter the name of a movie:  Wolf of Wall Street


Movies suggested for you: 

1 . The Wolf of Wall Street
2 . The Departed
3 . The Story of Us
4 . Shutter Island
5 . Gangs of New York
6 . The Aviator
7 . Focus
8 . Super 8
9 . Suicide Squad
10 . 21 Jump Street
11 . Sausage Party
12 . The Last Waltz
13 . Catch Me If You Can
14 . The Legend of Tarzan
15 . This Is the End
16 . Alex & Emma
17 . J. Edgar
18 . Kinsey
19 . Foolish
20 . 22 Jump Street
21 . A Few Good Men
22 . I Heart Huckabees
23 . Def Jam's How to Be a Player
24 . The American President
25 . Inception
26 . Strange Wilderness
27 . Moneyball
28 . Down Terrace
29 . Cyrus
