In [15]:
import numpy as np
import pandas as pd

from spotlight.datasets.movielens import get_movielens_dataset

dataset = get_movielens_dataset(variant='100K')

In [16]:
import torch

from spotlight.factorization.explicit import ExplicitFactorizationModel

model = ExplicitFactorizationModel(loss='regression',
                                   embedding_dim=128,  # latent dimensionality
                                   n_iter=10,  # number of epochs of training
                                   batch_size=1024,  # minibatch size
                                   l2=1e-9,  # strength of L2 regularization
                                   learning_rate=1e-3,
                                   use_cuda=torch.cuda.is_available())

In [17]:
from spotlight.cross_validation import random_train_test_split

train, test = random_train_test_split(dataset, random_state=np.random.RandomState(42))

print('Split into \n {} and \n {}.'.format(train, test))

Split into 
 <Interactions dataset (944 users x 1683 items x 80000 interactions)> and 
 <Interactions dataset (944 users x 1683 items x 20000 interactions)>.


In [18]:
model.fit(train, verbose=True)

Epoch 0: loss 13.114402420913116
Epoch 1: loss 7.324166165122503
Epoch 2: loss 1.7500920838947538
Epoch 3: loss 1.069020982784561
Epoch 4: loss 0.9418559557274927
Epoch 5: loss 0.8980849988852875
Epoch 6: loss 0.8718508972397333
Epoch 7: loss 0.8592295925828475
Epoch 8: loss 0.8483344278758085
Epoch 9: loss 0.8386729186094259


In [19]:
from spotlight.evaluation import rmse_score

train_rmse = rmse_score(model, train)
test_rmse = rmse_score(model, test)

print('Train RMSE {:.3f}, test RMSE {:.3f}'.format(train_rmse, test_rmse))

Train RMSE 0.902, test RMSE 0.943


In [72]:
dataset_path="/Users/hannah/Desktop/Agile Data Science/ml-latest/movies.csv"

outdict={}
with open(dataset_path,'r') as fh:
    for line in fh:
        if line.startswith('mov'): continue
        pline=line.strip().split(',')
        movie_id, title =pline[0], pline[1]
        if movie_id in outdict:continue
        outdict[movie_id]=title
            

def recommend_movies(user_id, dataset, model, n_movies=5):
    
    ratings=model.predict(user_ids=user_id)
    indices=np.argpartition(ratings,-n_movies)[-n_movies:]
    best_movie_ids=indices[np.argsort(ratings[indices])]
    movie_id= [dataset[i] for i in best_movie_ids]

    return [outdict[str(v)] for v in list(movie_id)]

In [76]:
recommend_movies(6, dataset.item_ids, model)

['Mary Poppins (1964)',
 'Lord of Illusions (1995)',
 'Striptease (1996)',
 'Bhaji on the Beach (1993)',
 '"Day the Sun Turned Cold']