In [116]:
import pickle
import pandas as pd
import numpy as np
from ast import literal_eval
from surprise import Reader, Dataset, SVD
from surprise.model_selection.validation import cross_validate
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [117]:
filename = 'svd.pickle'
svd = pickle.load(open(filename, 'rb'))

dataname = 'dataset.pickle'
smd = pickle.load(open(dataname, 'rb'))

indices = pickle.load(open('linkMap.pickle', 'rb'))

In [118]:
class ContentsBased(object):

    def __init__(self, data):
        self.smd = data
        self.count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
        self.count_matrix = self.count.fit_transform(self.smd['soup'])
        self.cosine_sim = cosine_similarity(self.count_matrix, self.count_matrix)
        self.smd = self.smd.reset_index()
        self.titles = self.smd['title']
        self.indices = pd.Series(self.smd.index, index=self.smd['title'])

    def calc_sim(self, title):
        idx = self.indices[title]
        sim_scores = list(enumerate(self.cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:30]
        return sim_scores

    def predict(self, title):
        sim_scores = self.calc_sim(title)
        movie_indices = [i[0] for i in sim_scores]
        return self.titles.iloc[movie_indices]

In [131]:
class Hybrid(object):

    def __init__(self, data, model, indices, contentbase):
        self.smd = data
        self.svd = model
        self.contentbase = contentbase
        self.indices_map = indices

    def hybrid(self, user, title):
        sim_scores = self.contentbase.calc_sim(title)
        movie_indices = [i[0] for i in sim_scores]

        movies = self.smd.iloc[movie_indices][['title', 'id']]
        movies['est'] = movies['id'].apply(lambda x: self.svd.predict(user, self.indices_map.loc[x]['movieId']).est)
        movies = movies.sort_values('est', ascending=False)
        del movies['est']
        return movies.head(20)

    def content_predict(self, title):
        return self.contentbase.predict(title)

In [132]:
class Recommender:

    def __init__(self):
        with open('dataset.pickle', 'rb') as f:
            self.dataset = pickle.load(f)
        with open('linkMap.pickle', 'rb') as f:
            self.indices = pickle.load(f)
        with open('svd.pickle', 'rb') as f:
            self.svd = pickle.load(f)

        self.content = ContentsBased(self.dataset)
        self.hybrid = Hybrid(self.dataset, self.svd, self.indices, self.content)

    def get_content_recommend(self, title):
        if title in self.dataset.values:
            output = {
                    'content': list(self.content.predict(title))
                }
        else:
            output = None
        return output

    def get_user_recommend(self, title, user):
        if title in self.dataset.values:
            output = {
                'hybrid': self.hybrid.hybrid(user, title)
            }
        else:
            output = None
        return output

In [133]:
a = Recommender()

In [122]:
result =a.get_content_recommend('The Martian')
print(result)

{'content': ['Blade Runner', 'Prometheus', 'Gladiator', 'The Duellists', 'American Gangster', 'Thelma & Louise', 'Exodus: Gods and Kings', 'Kingdom of Heaven', 'Alien', 'White Squall', 'G.I. Jane', 'Legend', 'Robin Hood', 'Body of Lies', 'The Counselor', 'Someone to Watch Over Me', 'Hannibal', 'Matchstick Men', 'Black Rain', 'Black Hawk Down', 'The Fountain', 'Interstellar', 'Jurassic Park', 'Spacehunter: Adventures in the Forbidden Zone', 'Stargate: Continuum', 'Stargate', 'Independence Day', '20,000 Leagues Under the Sea', 'Six-String Samurai']}


In [136]:
result = a.get_user_recommend('The Martian', 16)

8613                    Interstellar
485                     Blade Runner
3912                 Black Hawk Down
2876                       Gladiator
987                            Alien
427                    Jurassic Park
4812                  Matchstick Men
7119                    Body of Lies
7068             Stargate: Continuum
2389        Someone to Watch Over Me
4447                   The Duellists
1816              Six-String Samurai
841     20,000 Leagues Under the Sea
2741                 Thelma & Louise
80                      White Squall
6881               American Gangster
8805          Exodus: Gods and Kings
648                 Independence Day
3630                      Black Rain
6190               Kingdom of Heaven
Name: title, dtype: object
