In [2]:
import numpy as np
import pandas as pd
from lightfm.datasets import fetch_movielens

In [3]:
movielens = fetch_movielens()
movielens.keys()

dict_keys(['train', 'test', 'item_features', 'item_feature_labels', 'item_labels'])

The lightfm library provides out of the box a train/test split, which can be accessed through:

In [30]:
train = movielens['train']
test = movielens['test']
item_features = movielens['item_features']
item_feature_labels = movielens['item_feature_labels']
item_labels = movielens['item_labels']

There are 943 users, 1682 movies.

The train set has 90570 ratings and the test set has 9430.

These elements are stored as sparse matrices due to the large numbers of 0s (combinations of users/movies without ratings)

# Model

In [34]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

model = LightFM(learning_rate=0.05, loss='bpr')
model.fit(train, epochs=10)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10, train_interactions=train).mean()

train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test, train_interactions=train).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.58, test 0.20.
AUC: train 0.89, test 0.88.


In [8]:
print(test)

  (0, 19)	4
  (0, 32)	4
  (0, 60)	4
  (0, 116)	3
  (0, 154)	2
  (0, 159)	4
  (0, 170)	5
  (0, 188)	3
  (0, 201)	5
  (0, 264)	4
  (1, 12)	4
  (1, 49)	5
  (1, 250)	5
  (1, 279)	3
  (1, 280)	3
  (1, 289)	3
  (1, 291)	4
  (1, 296)	4
  (1, 311)	3
  (1, 313)	1
  (2, 244)	1
  (2, 293)	2
  (2, 322)	2
  (2, 327)	5
  (2, 330)	4
  :	:
  (940, 180)	5
  (940, 256)	4
  (940, 257)	4
  (940, 474)	4
  (940, 992)	4
  (941, 116)	4
  (941, 199)	4
  (941, 260)	4
  (941, 322)	3
  (941, 422)	5
  (941, 426)	5
  (941, 486)	4
  (941, 583)	4
  (941, 603)	4
  (941, 614)	3
  (942, 10)	4
  (942, 57)	4
  (942, 110)	4
  (942, 185)	5
  (942, 214)	5
  (942, 231)	4
  (942, 355)	4
  (942, 569)	1
  (942, 807)	4
  (942, 1066)	2


In [16]:
test.col

array([  19,   32,   60, ...,  569,  807, 1066])

In [38]:
prediction=model.predict(user_ids=test.row, item_ids=test.col)

In [42]:
# Predictions are relative orders
prediction[0:10]

array([-4.901043 , -4.1578956, -4.6789007, -3.5722992, -4.2807946,
       -4.269999 , -4.2337465, -4.028219 , -3.1322682, -3.8462079],
      dtype=float32)

In [39]:
len(prediction)

9430