# Generating top 10 recommendations based on current movie, using the movie summary(content).

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv
/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel

In [3]:
movies_data = pd.read_csv('/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv')
movies_data.head()

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124


In [4]:
#eliminating missing values.
print(pd.isnull(movies_data['overview']).sum())
movies_data['overview'] = movies_data['overview'].fillna('')
print(pd.isnull(movies_data['overview']).sum())

3
0


In [5]:
#generating tf-idf vectors for text document.
tfidf = TfidfVectorizer(min_df=5, max_df=0.9, ngram_range=(1,3), stop_words='english')
tfidf_data = tfidf.fit_transform(movies_data['overview'])
tfidf_data

<4803x5601 sparse matrix of type '<class 'numpy.float64'>'
	with 107032 stored elements in Compressed Sparse Row format>

In [6]:
#getting similarity scores between all pairs of movie summaries.
sim_matrix = sigmoid_kernel(tfidf_data, tfidf_data)
#sample: for movie at index 0, similarity scores wrt every other movie.
sim_matrix[0]

array([0.76166913, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159416,
       0.76159416])

In [7]:
#dict of movie titles and indices.
index_list = np.arange(0,movies_data.shape[0])
title_list = movies_data['original_title']
title2idx = dict(zip(title_list,index_list))

In [8]:
#function to generate recommendations.
def recommend_movie(current_title):
    for title,idx in title2idx.items():
        if title==current_title:
            current_idx = idx
    sim_scores = sim_matrix[current_idx]
    sim_scores = list(enumerate(sim_scores))
    sim_scores_sorted = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    #getting top 10 recommendations based on similarity scores.
    top_similar_movies = sim_scores_sorted[1:11]
    
    print('Top Recommendations based on current movie: ',current_title)
    for i in top_similar_movies:
        for title, idx in title2idx.items():
            if i[0]==idx:
                print(i[0],title,'\n')
    

In [9]:
recommend_movie('Avatar')

Top Recommendations based on current movie:  Avatar
3604 Apollo 18 

2130 The American 

151 Beowulf 

529 Tears of the Sun 

311 The Adventures of Pluto Nash 

847 Semi-Pro 

1213 Aliens vs Predator: Requiem 

942 The Book of Life 

570 Ransom 

36 Transformers: Age of Extinction 



In [10]:
recommend_movie('Spectre')

Top Recommendations based on current movie:  Spectre
3162 Thunderball 

1343 Never Say Never Again 

4071 From Russia with Love 

11 Quantum of Solace 

3351 The Man with the Golden Gun 

1717 Safe Haven 

29 Skyfall 

4339 Dr. No 

1200 The Living Daylights 

1837 Romeo Must Die 



In [11]:
recommend_movie('The Dark Knight Rises')

Top Recommendations based on current movie:  The Dark Knight Rises
299 Batman Forever 

65 The Dark Knight 

428 Batman Returns 

2507 Slow Burn 

119 Batman Begins 

210 Batman & Robin 

9 Batman v Superman: Dawn of Justice 

1181 JFK 

3854 Batman: The Dark Knight Returns, Part 2 

