In [8]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances


In [9]:
data = pd.read_csv('tmdb_5000_movies.csv')

In [10]:
def genre_keywords_to_string(row):
  genr = json.loads(row['genres'])
  genr = ' '.join(''.join(i['name'].split()) for i in genr)

  keyw = json.loads(row['keywords'])
  keyw = ' '.join(''.join(i['name'].split()) for i in keyw)

  return genr + ' ' + keyw

In [20]:
data['genre_keywords'] = data.apply(genre_keywords_to_string, axis=1)

In [12]:
tfidf = TfidfVectorizer(max_features=2000)
tfidf_matrix = tfidf.fit_transform(data['genre_keywords'])
tfidf_matrix.shape

(4803, 2000)

#Generate a mapping from movie title -> index in the dataframe

In [21]:
mov2idx = pd.Series(data.index, index=data['title'])

#Create a function that generates recommendations from movie title as input


In [14]:
def recommend (title):
  idx = mov2idx[title]
  if idx == pd.Series:
    idx = idx.iloc[0] #grab the first item from pandas series
  q = tfidf_matrix[idx]
  scores = cosine_similarity(q, tfidf_matrix)
  scores = scores.flatten()
  recommended_idx = (-scores).argsort()[1:6]
  return data['title'].iloc[recommended_idx]

In [15]:
print("Recommendations similar to Scream 3: ")
print(recommend('Scream 3'))


Recommendations similar to Scream 3: 
3902    Friday the 13th Part VI: Jason Lives
4628                          Graduation Day
4053        Friday the 13th: A New Beginning
4048                             The Calling
1084                         The Glimmer Man
Name: title, dtype: object


In [16]:
print("Recommendations similar to The Godfather: ")
print(recommend('The Godfather'))


Recommendations similar to The Godfather: 
3293                        10th & Wolf
4135    Gangster's Paradise: Jerusalema
4772                       Down Terrace
877                          Black Mass
4392                           Safe Men
Name: title, dtype: object


In [17]:
print("Recommendations similar to The Dark Knight: ")
print(recommend('The Dark Knight'))


Recommendations similar to The Dark Knight: 
3                   The Dark Knight Rises
119                         Batman Begins
428                        Batman Returns
9      Batman v Superman: Dawn of Justice
210                        Batman & Robin
Name: title, dtype: object


In [18]:
print("Recommendations similar to Runaway Bride: ")
print(recommend('Runaway Bride'))


Recommendations similar to Runaway Bride: 
4115                    House of D
2325    My Big Fat Greek Wedding 2
4604         It Happened One Night
3313                  An Education
2689            Our Family Wedding
Name: title, dtype: object


In [19]:
print("Recommendations similar to Mortal Kombat: ")
print(recommend('Mortal Kombat'))


Recommendations similar to Mortal Kombat: 
1611              Mortal Kombat: Annihilation
1670                       DOA: Dead or Alive
3856              In the Name of the King III
1001    Street Fighter: The Legend of Chun-Li
2237                        Alone in the Dark
Name: title, dtype: object
