In [1]:
import numpy as np
import pandas as pd
import scipy

from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import recall_at_k
from lightfm.evaluation import auc_score

from sklearn.model_selection import train_test_split

%load_ext autoreload
%autoreload 2



In [2]:
data = pd.read_csv("data/Sobazaar-bprh-cleaned.csv")

In [3]:
# Here we only consider data with View
data = data[~(data.Action == "L")]
data.reset_index(inplace=True, drop=True)
data.drop_duplicates(inplace=True)

In [4]:
original_item_list = sorted(set(data.ItemID))
original_user_list = sorted(set(data.UserID))

data_without_target = data[data.Action != 'P']
data_only_with_target = data[data.Action == 'P']

In [5]:
train_data, test_data = train_test_split(data_only_with_target,
                                 test_size=0.2,
                                 random_state=20200715
                                 )

In [6]:
# prepare data

lfm_train_data = np.zeros(shape=(4712,7015))
lfm_test_data = np.zeros(shape=(4712,7015))
for u in original_user_list:
    train_item_pool = set(train_data[(train_data.UserID == u) & (train_data.Action == 'P')].ItemID)
    test_item_pool = set(test_data[(test_data.UserID == u) & (test_data.Action == 'P')].ItemID)
    u_idx = original_user_list.index(u)
    for i in train_item_pool:
        i_idx = original_item_list.index(i)
        lfm_train_data[u_idx,i_idx] = 1
    for i in test_item_pool:
        i_idx = original_item_list.index(i)
        lfm_test_data[u_idx,i_idx] = 1

lfm_train_data = scipy.sparse.coo_matrix(lfm_train_data)
lfm_test_data = scipy.sparse.coo_matrix(lfm_test_data)

In [17]:
bpr_model = LightFM(no_components=50,
                    learning_rate=0.001,
                    item_alpha=0.00001,
                    user_alpha=0.00001,
                    loss='bpr',
                    random_state=20200716)

bpr_model.fit(lfm_train_data,
              epochs=5000)

<lightfm.lightfm.LightFM at 0x182560faac8>

In [16]:
train_recall = recall_at_k(bpr_model, lfm_train_data, k=10).mean()
test_recall = recall_at_k(bpr_model, lfm_test_data, k=10).mean()

train_precision = precision_at_k(bpr_model, lfm_train_data, k=10).mean()
test_precision = precision_at_k(bpr_model, lfm_test_data, k=10).mean()

train_auc = auc_score(bpr_model, lfm_train_data).mean()
test_auc = auc_score(bpr_model, lfm_test_data).mean()

print('Recall: train %.4f, test %.4f.' % (train_recall, test_recall))
print('Precision: train %.4f, test %.4f.' % (train_precision, test_precision))
print('AUC: train %.4f, test %.4f.' % (train_auc, test_auc))

"""
100
Recall: train 0.0246, test 0.0122.
Precision: train 0.0053, test 0.0017.
AUC: train 0.5462, test 0.5223.

500
Recall: train 0.0597, test 0.0227.
Precision: train 0.0132, test 0.0036.
AUC: train 0.6751, test 0.5473.

1000
Recall: train 0.1084, test 0.0263.
Precision: train 0.0229, test 0.0040.
AUC: train 0.8675, test 0.5878.

1500
Recall: train 0.1129, test 0.0253.
Precision: train 0.0246, test 0.0039.
AUC: train 0.9149, test 0.5751.


2000
Recall: train 0.1183, test 0.0239.
Precision: train 0.0262, test 0.0038.
AUC: train 0.9338, test 0.5628.
"""

Recall: train 0.0597, test 0.0227.
Precision: train 0.0132, test 0.0036.
AUC: train 0.6751, test 0.5473.


'\n100\nRecall: train 0.0246, test 0.0122.\nPrecision: train 0.0053, test 0.0017.\nAUC: train 0.5462, test 0.5223.\n\n1000\nRecall: train 0.1084, test 0.0263.\nPrecision: train 0.0229, test 0.0040.\nAUC: train 0.8675, test 0.5878.\n\n1500\nRecall: train 0.1129, test 0.0253.\nPrecision: train 0.0246, test 0.0039.\nAUC: train 0.9149, test 0.5751.\n\n\n2000\nRecall: train 0.1183, test 0.0239.\nPrecision: train 0.0262, test 0.0038.\nAUC: train 0.9338, test 0.5628.\n'