In [124]:
import numpy as np

from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.evaluation import rmse_score
from spotlight.factorization.explicit import ExplicitFactorizationModel

dataset = get_movielens_dataset(variant='100K')

train, test = random_train_test_split(dataset)

model = ExplicitFactorizationModel(n_iter=1)
model.fit(train)

rmse = rmse_score(model, test)

In [125]:
from spotlight.datasets.movielens import get_movielens_dataset

dataset = get_movielens_dataset(variant='100K')
print(dataset)

<Interactions dataset (944 users x 1683 items x 100000 interactions)>


In [126]:
import torch

from spotlight.factorization.explicit import ExplicitFactorizationModel

model = ExplicitFactorizationModel(loss='regression',
                                   embedding_dim=128,  # latent dimensionality
                                   n_iter=10,  # number of epochs of training
                                   batch_size=1024,  # minibatch size
                                   l2=1e-9,  # strength of L2 regularization
                                   learning_rate=1e-3,
                                   use_cuda=torch.cuda.is_available())

In [127]:
from spotlight.cross_validation import random_train_test_split

train, test = random_train_test_split(dataset, random_state=np.random.RandomState(42))

print('Split into \n {} and \n {}.'.format(train, test))

Split into 
 <Interactions dataset (944 users x 1683 items x 80000 interactions)> and 
 <Interactions dataset (944 users x 1683 items x 20000 interactions)>.


In [128]:
model.fit(train, verbose=True) #losses are decreasing bc the model is becoming better

Epoch 0: loss 13.115769977811016
Epoch 1: loss 7.343487673167941
Epoch 2: loss 1.7594345792939392
Epoch 3: loss 1.0705936551094055
Epoch 4: loss 0.9445638196377815
Epoch 5: loss 0.8954034757010544
Epoch 6: loss 0.8691431676285176
Epoch 7: loss 0.8525760332240334
Epoch 8: loss 0.8426664230189745
Epoch 9: loss 0.8289334638209283


In [131]:
from spotlight.evaluation import rmse_score

train_rmse = rmse_score(model, train)
test_rmse = rmse_score(model, test)

print('Train RMSE {:.3f}, test RMSE {:.3f}'.format(train_rmse, test_rmse)) #how good the predictions are

Train RMSE 0.895, test RMSE 0.939


In [132]:
model.predict(user_ids=1)

array([0.13233915, 3.9321253 , 3.2043025 , ..., 1.6875831 , 1.8348737 ,
       1.9538184 ], dtype=float32)

In [136]:
def get_movielens_dataset(movieId, test):
    movie_data = dataset[dataset['movieId'] == movieId]
    return movie_data[['title']].to_dict(orient='records')
