In [12]:
import pandas as pd

In [39]:
# ratings from user (will be provided from the database of the app)
ratings = [{'movie_id':207, 'cluster':0, 'rating':4}, {'movie_id':637,'cluster':6,'rating':3}]

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [37]:
def load_cluster_movies(cluster):
  cluster_movies = movies.loc[movies['cluster'] == cluster] 
  cluster_movies = cluster_movies.reset_index(drop=True)
  return cluster_movies

def get_similarities(data):
  tfidf = TfidfVectorizer(
    min_df=2,
    max_df=0.7,
    ngram_range=(1,3), 
    stop_words='english'
  )
  tfidf_matrix = tfidf.fit_transform(data['overview'])
  cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
  return cosine_sim

def get_movie_recommendations(movie_id, data, similarities):
  indices = pd.Series(data.index, index=data['tmdb_id'])

  movie_idx = indices[movie_id]
  sim_scores = list(enumerate(similarities[movie_idx]))
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
  sim_scores = sim_scores[1:21]
  recommendations_indices = [i[0] for i in sim_scores]
    
  return data.iloc[recommendations_indices]

In [43]:
def get_user_recommendations(inputs):
  '''Return the top 30 recommended movies'''
  movies = pd.read_csv('/content/drive/MyDrive/movies.csv')
  recommendations = pd.DataFrame()

  for input in inputs:
    if input['rating'] >= 3:
      cluster_movies = load_cluster_movies(input['cluster'])
      similarities = get_similarities(cluster_movies)

      movie_id = input['movie_id']
      if movie_id not in (cluster_movies['tmdb_id']).values:
        continue
      else:
        recommendations = recommendations.append(
            get_movie_recommendations(movie_id, cluster_movies, similarities)
        )
  recommendations = recommendations.drop_duplicates()
  recommendations = recommendations.sort_values(by='score', ascending=False)    
  return recommendations

In [50]:
get_user_recommendations(ratings).head()

Unnamed: 0.1,Unnamed: 0,tmdb_id,imdb_id,title,release_date,score,overview,genres,keywords,cluster
57,18776,8290,tt0060474,Don't Look Now: We're Being Shot At,1966-12-07,6.902806,"During World War II, two French civilians and ...",comedy|war,independent film,6
62,18781,304357,tt2404425,Woman in Gold,2015-04-10,6.85519,"Maria Altmann, an octogenarian Jewish refugee,...",drama,,6
78,78,7508,tt0986264,Taare Zameen Par,2007-12-21,6.750078,Ishaan Awasthi is an eight-year-old whose worl...,drama,japan|swordplay|treasure|samurai|sword|big fam...,0
135,135,9769,tt0119558,Lolita,1997-09-27,6.50379,Urbane professor Humbert Humbert marries a New...,drama|romance,berlin|neo-nazi|israel|nazi background|chauffe...,0
252,252,14794,tt0063850,If....,1968-12-19,6.15213,The film is a caustic portrait of a traditiona...,drama,,0
