# https://github.com/maciejkula/spotlight

In [27]:
import torch
import numpy as np
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.evaluation import rmse_score
from spotlight.factorization.explicit import ExplicitFactorizationModel

In [24]:
dataset = get_movielens_dataset(variant='100K')

print(dataset)

<Interactions dataset (944 users x 1683 items x 100000 interactions)>


In [25]:
model = ExplicitFactorizationModel(loss='regression',
                                   embedding_dim=128,  # latent dimensionality
                                   n_iter=10,  # number of epochs of training
                                   batch_size=1024,  # minibatch size
                                   l2=1e-9,  # strength of L2 regularization
                                   learning_rate=1e-3,
                                   use_cuda=torch.cuda.is_available())

In [28]:
train, test = random_train_test_split(dataset, random_state=np.random.RandomState(42))

print('Split into \n {} and \n {}.'.format(train, test))

Split into 
 <Interactions dataset (944 users x 1683 items x 80000 interactions)> and 
 <Interactions dataset (944 users x 1683 items x 20000 interactions)>.


In [29]:
model.fit(train, verbose=True)

  epoch_loss += loss.data[0]


Epoch 0: loss 13.098525047302246
Epoch 1: loss 7.324594020843506
Epoch 2: loss 1.7548301219940186
Epoch 3: loss 1.0716947317123413
Epoch 4: loss 0.9445939660072327
Epoch 5: loss 0.8987710475921631
Epoch 6: loss 0.8789787888526917
Epoch 7: loss 0.8615714907646179
Epoch 8: loss 0.8535652756690979
Epoch 9: loss 0.8428651690483093


In [32]:
train_rmse = rmse_score(model, train)
test_rmse = rmse_score(model, test)


print('Train RMSE {:.3f}, test RMSE {:.3f}'.format(train_rmse, test_rmse))

Train RMSE 0.905, test RMSE 0.944


In [61]:
print(train)
uid = test.user_ids[2]
print(uid)
preds = model.predict(uid)
print(preds)

F = [(preds[i],i) for i in range(len(preds))]
print(max(F))
z,x = max(F)
print(x)
movie_name = dataset.item_ids[x]


<Interactions dataset (944 users x 1683 items x 80000 interactions)>
218
[0.12084319 3.5884275  2.970261   ... 1.5114372  1.7241297  1.7488763 ]
(4.25579, 408)
408


In [63]:
dir(dataset)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check',
 'item_ids',
 'num_items',
 'num_users',
 'ratings',
 'timestamps',
 'to_sequence',
 'tocoo',
 'tocsr',
 'user_ids',
 'weights']