In [3]:
import pickle
import pandas as pd
import numpy as np
from ast import literal_eval
from surprise import Reader, Dataset, SVD
from surprise.model_selection.validation import cross_validate
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [4]:
filename = 'svd.pickle'
svd = pickle.load(open(filename, 'rb'))

In [5]:
dataname = 'dataset.pickle'
smd = pickle.load(open(dataname, 'rb'))

In [6]:
count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
count_matrix = count.fit_transform(smd['soup'])
cosine_sim = cosine_similarity(count_matrix, count_matrix)
smd = smd.reset_index()
titles = smd['title']
indices = pd.Series(smd.index, index=smd['title'])

In [7]:
def convert_int(x):
    try:
        return int(x)
    except:
        return np.nan

id_map = pd.read_csv('dataset/links_small.csv')[['movieId', 'tmdbId']]
id_map['tmdbId'] = id_map['tmdbId'].apply(convert_int)
id_map.columns = ['movieId', 'id']
id_map = id_map.merge(smd[['title', 'id']], on='id').set_index('title')
indices_map = id_map.set_index('id')

In [8]:
def hybrid(userId, title):
    idx = indices[title]

    sim_scores = list(enumerate(cosine_sim[int(idx)]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:26]
    movie_indices = [i[0] for i in sim_scores]

    movies = smd.iloc[movie_indices][['title', 'id']]
    movies['est'] = movies['id'].apply(lambda x: svd.predict(userId, indices_map.loc[x]['movieId']).est)
    movies = movies.sort_values('est', ascending=False)
    del movies['est']
    return movies.head(10).to_json(orient = 'records')

In [9]:
hybrid(16, 'The Martian')

'[{"title":"Blade Runner","id":78},{"title":"Alien","id":348},{"title":"Interstellar","id":157336},{"title":"Matchstick Men","id":7270},{"title":"Jurassic Park","id":329},{"title":"Gladiator","id":98},{"title":"White Squall","id":10534},{"title":"Thelma & Louise","id":1541},{"title":"Someone to Watch Over Me","id":31650},{"title":"Body of Lies","id":12113}]'