In [29]:
import numpy as np
import pandas as pd
import scipy
from sklearn.model_selection import train_test_split

from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import recall_at_k
from lightfm.evaluation import auc_score

from bprH_gpu import bprH

%load_ext autoreload
%autoreload 2



In [8]:
data = pd.read_csv("data/Sobazaar-bprh-cleaned.csv")

In [22]:
# Here we only consider data with View
data = data[~(data.Action == "L")]
data.reset_index(inplace=True, drop=True)
data.drop_duplicates(inplace=True)

In [27]:
original_item_list = sorted(set(data.ItemID))
original_user_list = sorted(set(data.UserID))

data_without_target = data[data.Action != 'P']
data_only_with_target = data[data.Action == 'P']

In [28]:
d1, test_data = train_test_split(data_only_with_target,
                                 test_size=0.2,
                                 random_state=20200713
                                 )

train_data = pd.concat([d1, data_without_target])
train_data.reset_index(inplace=True,drop=True)
test_data.reset_index(inplace=True,drop=True)

In [41]:
bprh_model = bprH(dim=50,
                 omega=1000,
                 rho=1,
                 lambda_u=0.000001,
                 lambda_v=0.000001,
                 lambda_b=0.000001,
                 gamma=1,
                 random_state=20200715,
                 num_iter=180000,
                  existed_model_path=None)

bprh_model.fit(X=train_data,
               eval_X=test_data,
               original_item_list=original_item_list,
               original_user_list=original_user_list,
               coselection_saved_path='data/item-set-coselection-test.pkl',
               model_saved_path="bprh_soba.pkl",
               correlation=True,
               coselection=True,
               plot_metric=False,
               log_metric=False,
               iter_to_log=10000,
               iter_to_save=10000)

Registering Model Parameters
Build I_u_t, I_u_a
Calculate auxiliary-target correlation
Generate Itemset Coselection - Build U_i
Generate Itemset Coselection - Build S
Initializing User and Item Matrices
Start Training


100%|██████████| 4712/4712 [01:47<00:00, 43.85it/s]
100%|██████████| 4712/4712 [00:00<00:00, 336762.15it/s]
100%|██████████| 7015/7015 [01:32<00:00, 76.06it/s] 
100%|██████████| 7015/7015 [00:29<00:00, 241.43it/s]
  4%|▍         | 3834/90000 [34:26<12:54:06,  1.86it/s, est_changed=82.92516138683224, len_I=1, len_J=1, len_K=1]    


KeyboardInterrupt: 

In [38]:
scoring_list, p, r, _ = bprh_model.scoring(ground_truth=bprh_model.test_data,
                                      K=10,
                                      train_data_as_reference_flag=True)
print('Recall: test %.4f.' % r)
print('Precision: test %.4f.' % p)

Recall: test 0.0012.
Precision: test 0.0003.


In [39]:
scoring_list, p, r, _ = bprh_model.scoring(ground_truth=bprh_model.train_data,
                                      K=10,
                                      train_data_as_reference_flag=False)
print('Recall: train %.4f.' % r)
print('Precision: train %.4f.' % p)

Recall: train 0.0023.
Precision: train 0.0004.
