In [157]:
import warnings
warnings.filterwarnings('ignore')

In [158]:
import torch

In [159]:
import spotlight

In [160]:
import numpy as np
import pandas as pd

In [161]:
# Build shallow and deep recommender models

In [162]:
from spotlight.datasets.movielens import get_movielens_dataset

dataset = get_movielens_dataset(variant='100K')

In [163]:
from spotlight.cross_validation import random_train_test_split

train, test = random_train_test_split(dataset)

In [164]:
print(train)

<Interactions dataset (944 users x 1683 items x 80000 interactions)>


In [165]:
print(test)

<Interactions dataset (944 users x 1683 items x 20000 interactions)>


In [166]:
# both train and test are Interactions datasets:

In [167]:
# Explicit factorization model

In [168]:
from spotlight.factorization.explicit import ExplicitFactorizationModel

model = ExplicitFactorizationModel( 
                                  loss='regression',
                                   embedding_dim=128,  # latent dimensionality
                                   n_iter=10,  # number of epochs of training
                                   batch_size=1024,  # minibatch size
                                   l2=1e-9,  # strength of L2 regularization
                                   learning_rate=1e-3,
                                   use_cuda=torch.cuda.is_available()
                                   )


In [169]:
model.fit(train, verbose = True)

Epoch 0: loss 13.102626993686338
Epoch 1: loss 7.343984546540659
Epoch 2: loss 1.760753965076012
Epoch 3: loss 1.0663905166372467
Epoch 4: loss 0.9374866161165358
Epoch 5: loss 0.8874881184553798
Epoch 6: loss 0.8622164001947716
Epoch 7: loss 0.844689649871633
Epoch 8: loss 0.8269687310049806
Epoch 9: loss 0.814429962936836


In [170]:
from spotlight.evaluation import rmse_score

train_rmse = rmse_score(model, train)
test_rmse = rmse_score(model, test)


In [171]:
print("The Train Root mean squared error is : " + str(train_rmse))
print("The Test Root mean squared error is : " + str(test_rmse))

The Train Root mean squared error is : 0.8882106
The Test Root mean squared error is : 0.9452395


In [172]:
# Implicit Factorization model

In [173]:
from spotlight.datasets.movielens import get_movielens_dataset

dataset = get_movielens_dataset(variant='100K')

In [174]:
from spotlight.cross_validation import random_train_test_split

train_2, test_2 = random_train_test_split(dataset)

In [175]:
from spotlight.factorization.implicit import ImplicitFactorizationModel

model_2 = ImplicitFactorizationModel(loss='pointwise',
                                       embedding_dim=32,
                                       n_iter=10,
                                       batch_size=256, 
                                       l2=0.0, 
                                       learning_rate=0.01)

In [176]:
model_2.fit(train_2, verbose = True)

Epoch 0: loss 0.7051847720869814
Epoch 1: loss 0.49396898352300017
Epoch 2: loss 0.45015634277377264
Epoch 3: loss 0.42219457468285726
Epoch 4: loss 0.40291141149715876
Epoch 5: loss 0.38861463302240584
Epoch 6: loss 0.37210562225347893
Epoch 7: loss 0.359981769285263
Epoch 8: loss 0.3469677418946458
Epoch 9: loss 0.3376966994791366


In [177]:
model_2.predict( user_ids = 5)

array([-8.477442 , 10.948789 , 14.368172 , ..., -6.5751863, -7.5927672,
       -9.1422825], dtype=float32)

In [178]:
model_2.predict( user_ids = 15)

array([-11.263647,  13.174006, -15.468727, ..., -10.127995,  -7.677381,
        -8.717113], dtype=float32)

In [179]:
# mean reciprocal rank 
from spotlight.evaluation import mrr_score

mrr = mrr_score(model_2, test_2)

In [180]:
# numpy array of shape num_users:
print(mrr)

[0.01358905 0.06459099 0.02376197 0.05453671 0.02463989 0.01652867
 0.01618702 0.03447073 0.00508511 0.01835612 0.00827755 0.02134035
 0.00915235 0.09351695 0.07196375 0.02005673 0.01659988 0.02774767
 0.15434551 0.02008614 0.04317795 0.05720446 0.00984205 0.01590831
 0.01577337 0.03938553 0.02091556 0.00914351 0.10471442 0.04615974
 0.02636593 0.02006841 0.04617265 0.01599524 0.05130753 0.02792756
 0.00854116 0.01135634 0.04146671 0.13709949 0.0467006  0.02977948
 0.00834788 0.03124541 0.08706658 0.00999641 0.02592908 0.11596851
 0.00979898 0.08743709 0.0220855  0.02729337 0.09623827 0.01731166
 0.10293222 0.03293043 0.01058861 0.01672781 0.00941641 0.01776872
 0.11148878 0.0124746  0.0438643  0.01734961 0.01391666 0.02563872
 0.01885218 0.032784   0.07769622 0.04349517 0.01443981 0.02373638
 0.01790698 0.01517111 0.01604054 0.00934572 0.06420635 0.02062058
 0.00666129 0.00940779 0.01776626 0.01542079 0.01753664 0.02701173
 0.00773667 0.01714286 0.04164801 0.08946923 0.00975567 0.0098

In [181]:
from spotlight.evaluation import precision_recall_score

prs = precision_recall_score(model_2, test_2, k  = 10)

In [182]:
print("The precision recall score is : " + str( prs))

The precision recall score is : (array([0.2, 0.3, 0. , 0.1, 0.2, 0.1, 0.2, 0.1, 0. , 0.1, 0. , 0.1, 0.3,
       0.1, 0.1, 0.1, 0. , 0.4, 0.1, 0. , 0.3, 0.1, 0. , 0. , 0. , 0.2,
       0. , 0. , 0.1, 0.1, 0. , 0. , 0.1, 0. , 0.2, 0. , 0. , 0. , 0. ,
       0.2, 0.2, 0.2, 0. , 0.3, 0.2, 0. , 0. , 0.2, 0.1, 0.2, 0. , 0.1,
       0.1, 0. , 0.1, 0.3, 0. , 0.1, 0.1, 0.2, 0.4, 0. , 0.2, 0.1, 0. ,
       0. , 0. , 0. , 0.1, 0.1, 0. , 0.2, 0.1, 0. , 0. , 0. , 0.3, 0. ,
       0. , 0. , 0. , 0.1, 0.1, 0. , 0. , 0. , 0.2, 0.1, 0. , 0.1, 0. ,
       0. , 0. , 0. , 0.2, 0.1, 0. , 0.1, 0.1, 0.1, 0. , 0.2, 0.2, 0.4,
       0.1, 0.1, 0. , 0.1, 0.1, 0.1, 0.1, 0. , 0. , 0. , 0.2, 0.2, 0.1,
       0.1, 0.2, 0.1, 0. , 0.1, 0.2, 0.1, 0. , 0.2, 0.2, 0.1, 0. , 0.5,
       0. , 0.1, 0.1, 0.1, 0. , 0.3, 0. , 0. , 0.1, 0.1, 0.1, 0. , 0.2,
       0. , 0.2, 0. , 0. , 0.2, 0.1, 0. , 0.3, 0.1, 0.1, 0.2, 0. , 0. ,
       0. , 0.3, 0.3, 0.1, 0.2, 0.1, 0.2, 0. , 0.1, 0.1, 0. , 0.3, 0. ,
       0. , 0.2, 0.1, 0.1, 0.1,

In [183]:
# Sequential Model

In [184]:
# CNNNet representation sequential model:

In [185]:
# generating synthetic interactions dataset

from spotlight.datasets.synthetic import generate_sequential

synthetic_dataset = generate_sequential(num_users=100,
                              num_items=1000,
                              num_interactions=10000,
                              concentration_parameter=0.01,
                              order=3)

In [186]:
from spotlight.cross_validation import user_based_train_test_split

train_3, test_3 = user_based_train_test_split(synthetic_dataset)


In [187]:
# Convert into sequence representations

train_3 = train_3.to_sequence()
test_3 = test_3.to_sequence()

In [188]:
from spotlight.sequence.implicit import ImplicitSequenceModel

model_implicit_sequence = ImplicitSequenceModel(n_iter=10,
                              representation='cnn',
                              loss='bpr',
                              embedding_dim=32,
                              batch_size=256,
                              l2=0.0, 
                              learning_rate=0.01)

In [189]:
model_implicit_sequence.fit(train_3, verbose = True)


Epoch 0: loss 0.4997377023100853
Epoch 1: loss 0.4939677119255066
Epoch 2: loss 0.4866984486579895
Epoch 3: loss 0.4726169556379318
Epoch 4: loss 0.45454241335392
Epoch 5: loss 0.4323286861181259
Epoch 6: loss 0.40687383711338043
Epoch 7: loss 0.39838360249996185
Epoch 8: loss 0.38531702756881714
Epoch 9: loss 0.3658946305513382


In [191]:
from spotlight.evaluation import sequence_mrr_score

mrr_sequence = sequence_mrr_score(model_implicit_sequence, test_3)

In [192]:
mrr_sequence


array([0.00148368, 0.00228311, 0.00934579, 0.00632911, 0.00540541,
       0.00104058, 0.01052632, 0.00111359, 0.00136986, 0.0014881 ,
       0.00109649, 0.00190476, 0.05263158, 0.00355872, 0.00117096,
       0.03225806, 0.01470588, 0.00151976, 0.0010142 , 0.0015083 ,
       0.01265823, 0.00104603, 0.00146628, 0.00149477, 0.00121359,
       0.003663  , 0.05263158, 0.00123762, 0.00296736, 0.00144092,
       0.0010395 , 0.01351351, 0.002331  , 0.00826446, 0.00206186,
       0.0022779 , 0.5       , 0.00185529, 0.00112613, 0.00367647,
       0.00126103, 0.00124844, 0.02777778, 0.02439024, 0.00135135,
       0.00309598, 0.00166667, 0.0013369 , 0.00431034, 0.00125156,
       0.00263158, 0.00221239, 0.00304878, 0.00307692, 0.00546448,
       0.00151745, 0.06666667, 0.00287356, 0.01111111, 0.00719424,
       0.00806452, 0.00124378, 0.00452489, 0.01639344, 0.0078125 ,
       0.00564972, 0.00298507, 0.00145349, 0.00115207, 0.002079  ,
       0.00408163, 0.00440529, 0.0019685 , 0.00185529, 0.00110

In [193]:
mrr_sequence.shape

(186,)

In [194]:
# Sequential Model on MovieLens dataset:

In [195]:
from spotlight.datasets.movielens import get_movielens_dataset

dataset = get_movielens_dataset(variant='100K')

In [196]:
from spotlight.cross_validation import random_train_test_split

train_4, test_4 = random_train_test_split(dataset)

In [197]:
from spotlight.sequence.representations import CNNNet

cnn_net = CNNNet(train_4.num_items,
             embedding_dim=128,
             kernel_width=5,
             dilation=2,
             num_layers=5,
             nonlinearity='relu')

In [198]:
from spotlight.sequence.implicit import ImplicitSequenceModel

model_4 = ImplicitSequenceModel(loss='bpr',
                              representation=cnn_net,
                              batch_size=256,
                              learning_rate=0.01,
                              l2=0.0,
                              n_iter=10,
                              use_cuda=torch.cuda.is_available())

In [199]:
model_4.fit(train_4.to_sequence(), verbose = True)

Epoch 0: loss 0.2707342929912336
Epoch 1: loss 0.2598790955362898
Epoch 2: loss 0.26290226208441186
Epoch 3: loss 0.2626385928103418
Epoch 4: loss 0.2602262077006427
Epoch 5: loss 0.26640368727120484
Epoch 6: loss 0.2642526233738119
Epoch 7: loss 0.264411213723096
Epoch 8: loss 0.26808473467826843
Epoch 9: loss 0.27185893555482227


In [200]:
from spotlight.evaluation import sequence_mrr_score

test_mrr = sequence_mrr_score(model_4, test_4.to_sequence())


In [201]:
print("The Test mrr is : " + str(test_mrr))

The Test mrr is : [0.0007722  0.00127714 0.00066007 ... 0.01587302 0.025      0.5       ]


In [202]:
# Sequential Model :

In [203]:
# LSTMNet representation Sequential Model

In [204]:
from spotlight.datasets.movielens import get_movielens_dataset

dataset = get_movielens_dataset(variant='100K')

In [205]:
from spotlight.cross_validation import random_train_test_split

train_5, test_5 = random_train_test_split(dataset)

In [206]:
from spotlight.sequence.implicit import ImplicitSequenceModel

model_5 = ImplicitSequenceModel(loss='bpr',
                                  representation='lstm',
                                  batch_size=256,
                                  learning_rate=0.01,
                                  l2=0.0,
                                  n_iter=10,
                                  use_cuda=torch.cuda.is_available()
                                  )

In [207]:
model_5.fit(train_5.to_sequence(), verbose=True)

Epoch 0: loss 0.2957745907884656
Epoch 1: loss 0.20644947118831403
Epoch 2: loss 0.20198661088943481
Epoch 3: loss 0.19775428148833188
Epoch 4: loss 0.19755354704278888
Epoch 5: loss 0.19572606998862643
Epoch 6: loss 0.19390447889313553
Epoch 7: loss 0.1961326924237338
Epoch 8: loss 0.19201841860106497
Epoch 9: loss 0.19421355561776596


In [208]:
from spotlight.evaluation import sequence_mrr_score

test_mrr_lstm_representaion = sequence_mrr_score(model_5, test_5.to_sequence())

In [209]:
print("The LSTM representation mrr is : " + str(test_mrr_lstm_representaion))

The LSTM representation mrr is : [0.00076161 0.00101729 0.0019084  ... 0.00980392 0.01351351 0.01136364]


In [210]:
# Sequential Model :

In [211]:
# Pooling representation Sequential Model

In [212]:
from spotlight.datasets.movielens import get_movielens_dataset

dataset = get_movielens_dataset(variant='100K')

In [213]:
from spotlight.cross_validation import random_train_test_split

train_6, test_6 = random_train_test_split(dataset)

In [214]:
from spotlight.sequence.implicit import ImplicitSequenceModel

model_6 = ImplicitSequenceModel(loss='bpr',
                                  representation='pooling',
                                  batch_size=256,
                                  learning_rate=0.01,
                                  l2=0.0,
                                  n_iter=10,
                                  use_cuda=torch.cuda.is_available()
                                  )

In [215]:
model_6.fit(train_6.to_sequence(), verbose=True)


Epoch 0: loss 0.42880066235860187
Epoch 1: loss 0.22821564204765088
Epoch 2: loss 0.17431367256424643
Epoch 3: loss 0.15865221348675815
Epoch 4: loss 0.14969184904387503
Epoch 5: loss 0.14296503590814996
Epoch 6: loss 0.13879458561088098
Epoch 7: loss 0.13592730643171252
Epoch 8: loss 0.1320925969066042
Epoch 9: loss 0.13007174229080026


In [216]:
from spotlight.evaluation import sequence_mrr_score

test_mrr_pooling_representation = sequence_mrr_score(model_6, test_6.to_sequence())

In [217]:
print("The Pooling representation mrr is : " + str(test_mrr_pooling_representation))

The Pooling representation mrr is : [0.00083542 0.00537634 0.0027027  ... 0.01639344 0.00311526 0.03448276]
