In [2]:
import numpy as np
import helpers
import pandas as pd
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k

interactions_train = helpers.load_interactions_df()

In [3]:
items_dict = helpers.load_items()

In [46]:
interactions = interactions_train[interactions_train['event_type'] != 'search'][:10000]

In [47]:
from scipy.sparse import coo_matrix

interactions['user_id'] = interactions['user_id'].astype("category")
interactions['item_id'] = interactions['item_id'].astype("category")
interactions['weight'] = 1
train = coo_matrix((interactions['weight'].astype(float), (interactions['user_id'].cat.codes, interactions['item_id'].cat.codes)))
train

<646x5537 sparse matrix of type '<class 'numpy.float64'>'
	with 10000 stored elements in COOrdinate format>

In [48]:
category_to_item = dict(enumerate(interactions['item_id'].cat.categories))
item_to_category = {int(category_to_item[k]): k for k in category_to_item.keys()}

In [70]:
%%time

model = LightFM(no_components=100, loss='warp')
model.fit(train, epochs=20, , num_threads=2)

CPU times: user 1 s, sys: 0 ns, total: 1 s
Wall time: 1.02 s


<lightfm.lightfm.LightFM at 0x7fb5d34355c0>

In [69]:
item_ids = [item_to_category[int(x)] for x in interactions_train[interactions_train['event_type'] != 'search'].item_id.unique()[:25]]

In [75]:
predictions = model.predict([1], item_ids, num_threads=4)
predictions

array([-0.70547247,  0.56811035,  1.57110441,  1.3926332 ,  1.58641517,
        2.01251626,  1.65573072,  1.64194977, -1.02389526, -1.03885615,
       -1.28123689, -0.39475459, -0.58415085, -1.23554349, -0.60138637,
       -0.55006695, -1.64301479, -1.79190564, -0.89585686, -0.91704494,
       -0.42013872, -0.63583434, -0.73371774, -1.13565505, -0.63652289])

In [76]:
[category_to_item[item_ids[x]] for x in np.argsort(predictions)[::-1]]

[643652.0,
 1943604.0,
 1156086.0,
 206667.0,
 228737.0,
 1282813.0,
 1615991.0,
 505541.0,
 130440.0,
 1447583.0,
 1313192.0,
 2038385.0,
 2049207.0,
 2082794.0,
 1786148.0,
 1019280.0,
 1587422.0,
 277311.0,
 248595.0,
 1230082.0,
 1476099.0,
 1755169.0,
 937557.0,
 1614943.0,
 86082.0]

In [68]:
 interactions_train[interactions_train['user_id'] == 2]

Unnamed: 0,user_id,item_id,event_type,event_timestamp,target
32,2,248595.0,view,2019-10-01T12:46:03.145-0400,1909110
33,2,248595.0,view,2019-10-01T13:21:50.697-0400,1909110


In [30]:
interactions['item_id'].values[:10]

[1786148.0, 1615991.0, 1615991.0, 1615991.0, 1615991.0, 1615991.0, 1615991.0, 1615991.0, 1615991.0, 1615991.0]
Categories (508, float64): [8343.0, 26038.0, 27205.0, 29678.0, ..., 2083291.0, 2087594.0, 2095678.0, 2095987.0]

In [40]:
dict(enumerate(interactions['item_id'].cat.categories))

{0: 8343.0,
 1: 26038.0,
 2: 27205.0,
 3: 29678.0,
 4: 30781.0,
 5: 31537.0,
 6: 32970.0,
 7: 47291.0,
 8: 47721.0,
 9: 50725.0,
 10: 57364.0,
 11: 58972.0,
 12: 61994.0,
 13: 62917.0,
 14: 65077.0,
 15: 76301.0,
 16: 76713.0,
 17: 80197.0,
 18: 86082.0,
 19: 87338.0,
 20: 87559.0,
 21: 91328.0,
 22: 95173.0,
 23: 96683.0,
 24: 103027.0,
 25: 107425.0,
 26: 109879.0,
 27: 130440.0,
 28: 133209.0,
 29: 139329.0,
 30: 144569.0,
 31: 145450.0,
 32: 150910.0,
 33: 151327.0,
 34: 152433.0,
 35: 159865.0,
 36: 162855.0,
 37: 167007.0,
 38: 167581.0,
 39: 171278.0,
 40: 171492.0,
 41: 173571.0,
 42: 177427.0,
 43: 181998.0,
 44: 184242.0,
 45: 185505.0,
 46: 185736.0,
 47: 188396.0,
 48: 188563.0,
 49: 204132.0,
 50: 206620.0,
 51: 206667.0,
 52: 210415.0,
 53: 212865.0,
 54: 218241.0,
 55: 221588.0,
 56: 222535.0,
 57: 223156.0,
 58: 223503.0,
 59: 225218.0,
 60: 228737.0,
 61: 231254.0,
 62: 243621.0,
 63: 245469.0,
 64: 246461.0,
 65: 248595.0,
 66: 256337.0,
 67: 260699.0,
 68: 261598.0,
