In [3]:
import pandas as pd
import numpy as np
import scipy.sparse as sp
import pickle

from implicit.als import AlternatingLeastSquares as ALS
from implicit.gpu.als import AlternatingLeastSquares as ALSgpu
from implicit.evaluation import precision_at_k, mean_average_precision_at_k, ndcg_at_k

from surprise import accuracy
from surprise.prediction_algorithms.matrix_factorization import SVD, SVDpp, NMF
import surprise.dump as sdump

# Implicit

In [176]:
user_items = sp.load_npz('../data/X_1500k.npz')

top_movies = pd.read_csv('../data/movies_top_100.csv')

uid_to_idx = dict()
with open('../data/uid_to_idx_1500k.pkl', 'rb') as f:
    uid_to_idx = pickle.load(f)

idx_to_mid = dict()
with open('../data/idx_to_mid_1500k.pkl', 'rb') as f:
    idx_to_mid = pickle.load(f)

In [9]:
model = ALS()
model = model.load('als.npz')



In [177]:
def get_recommendations(user_id, N=20, exclude_watched=True):
    # if user already exist in our matrix
    # recommend based on model
    if user_id in uid_to_idx:
        uid = uid_to_idx[user_id]
        recs = recommend(uid, N, exclude_watched)
    
    # new user
    # recommend top 100 movies randomly
    else:
        ids = top_movies.sample(n=N)
        recs = pd.DataFrame({"movies": ids['movie_id']})
        
    return ','.join(recs['movies'])

In [178]:
def recommend(uid, N=20, exclude_watched=True):
    ids, scores = model.recommend(uid, user_items[uid], N=N, filter_already_liked_items=exclude_watched)
    movies = [idx_to_mid[mid] for mid in ids]
    df = pd.DataFrame({"movies": movies, "score": scores, "already_liked": np.in1d(ids, user_items[uid].indices)})
    return df

In [179]:
recommend(0)

Unnamed: 0,movies,score,already_liked
0,british+intelligence+1940,0.386924,False
1,marie+antoinette+1938,0.330448,False
2,the+i+inside+2004,0.306735,False
3,the+deep+blue+sea+2011,0.195668,False
4,train+of+life+1998,0.149616,False
5,joan+rivers+a+piece+of+work+2010,0.14293,False
6,requiem+for+a+heavyweight+1962,0.125145,False
7,the+legend+of+drunken+master+1994,0.120198,False
8,silent+fall+1994,0.116761,False
9,evening+2007,0.111301,False


In [180]:
get_recommendations(54948)

'british+intelligence+1940,marie+antoinette+1938,the+i+inside+2004,the+deep+blue+sea+2011,train+of+life+1998,joan+rivers+a+piece+of+work+2010,requiem+for+a+heavyweight+1962,the+legend+of+drunken+master+1994,silent+fall+1994,evening+2007,texas+killing+fields+2011,head+over+heels+2001,your+highness+2011,the+godfather+part+iii+1990,very+bad+things+1998,swing+kids+1993,wild+oranges+1924,looker+1981,head+above+water+1996,friends+with+benefits+2011'

In [181]:
get_recommendations(1000000)

'val+lewton+the+man+in+the+shadows+2007,platoon+1986,the+family+stone+2005,secret+sunshine+2007,breaking+point+1994,bells+from+the+deep+1993,wheres+poppa+1970,loft+2010,melvin+goes+to+dinner+2003,the+forgotten+one+1989,jekyll+and+hyde+...+together+again+1982,jet+lag+2002,indiana+jones+and+the+kingdom+of+the+crystal+skull+2008,a+better+life+2011,the+rock+1996,the+impostors+1998,taxi+driver+1976,desperate+living+1977,the+outlaw+1943,trapeze+1956'

In [189]:
%%timeit
get_recommendations(54948)

498 µs ± 7.23 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# Explicit

In [182]:
_, nmf = sdump.load("nmf.pkl")

uids = set(np.load('../data/uid_35k.npy').flatten())

idx_to_mid = dict()
with open('../data/idx_to_mid_1500k.pkl', 'rb') as f:
    idx_to_mid = pickle.load(f)

In [183]:
def get_recommendations(user_id, N=20):
    # if user already exist in our matrix
    # recommend based on model
    if user_id in uids:
        recs = recommend(user_id, N)
    
    # new user
    # recommend top 100 movies randomly
    else:
        ids = top_movies.sample(n=N)
        recs = pd.DataFrame({"movies": ids['movie_id']})
        
    return ','.join(recs['movies'])

In [184]:
def recommend(uid, N=20):
    ratings = []
    for mid in range(len(idx_to_mid)):
        ratings.append(nmf.predict(str(uid), str(mid))[3])

    ratings = np.array(ratings)
    ids = np.argsort(ratings)[-N:]
    
    movies = [idx_to_mid[mid] for mid in ids]
    scores = [ratings[mid] for mid in ids]
    
    df = pd.DataFrame({"movies": movies, "score": scores})
    return df

In [185]:
recommend(36583)

Unnamed: 0,movies,score
0,scrooge+1951,5.0
1,tsotsi+2005,5.0
2,kikis+delivery+service+1989,5.0
3,cabaret+1972,5.0
4,spider-man+2002,5.0
5,doctor+zhivago+1965,5.0
6,edward+scissorhands+1990,5.0
7,pieces+of+april+2003,5.0
8,manhattan+1979,5.0
9,rebel+without+a+cause+1955,5.0


In [186]:
get_recommendations(36583)

'scrooge+1951,tsotsi+2005,kikis+delivery+service+1989,cabaret+1972,spider-man+2002,doctor+zhivago+1965,edward+scissorhands+1990,pieces+of+april+2003,manhattan+1979,rebel+without+a+cause+1955,persona+1966,to+catch+a+thief+1955,a+simple+plan+1998,breakfast+at+tiffanys+1961,baran+2001,hoop+dreams+1994,terms+of+endearment+1983,broken+flowers+2005,miss+congeniality+2000,forrest+gump+1994'

In [188]:
get_recommendations(1000000)

'the+affair+of+the+necklace+2001,tuck+everlasting+2002,elf+2003,ghosts...+of+the+civil+dead+1989,my+fair+lady+1964,men+in+black+1997,veronica+mars+2014,turn+me+on_+dammit+2011,tidal+wave+2009,five+and+ten+1931,the+champ+1931,la+cage+aux+folles+ii+1980,the+spiders+-+the+golden+sea+1919,sabotage+2014,the+commitments+1991,open+water+2004,m.+butterfly+1993,amour+2012,diamond+men+2000,the+endless+summer+2+1994'

In [190]:
%%timeit
get_recommendations(36583)

53.9 ms ± 829 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
