In [1]:
from bprH import bprH, adv_index
import pandas as pd
from sklearn.model_selection import KFold

In [2]:
data = [['U1','I1','V'], ['U1','I1','P'], ['U1','I3','V'], ['U1','I4','V'],
        ['U2','I2','V'], ['U2','I2','P'], ['U2','I1','V'],
        ['U3','I1','V'], ['U3','I1','P'], ['U3','I2','V'], ['U3','I4','V'], ['U3','I4','P'],
        ['U4','I2','V'], ['U4','I3','V'], ['U4','I3','P'], ['U4','I4','V'],
        ['U5','I1','V'], ['U5','I3','V'], ['U5','I2','V'], ['U5','I2','P']
        ]
data = pd.DataFrame(data, columns=['UserID', 'ItemID', 'Action'])

In [3]:
# start a BPRH model
model = bprH(dim=3, omega=1, rho=1, lambda_u=0.1, lambda_v=0.1, lambda_b=0.1, gamma=0.008, random_state=20200704)

In [5]:
model.fit(X=data, original_item_list=['I1', 'I2', 'I3', 'I4'], original_user_list=['U1', 'U2', 'U3', 'U4', 'U5'], num_iter=1000)

Calculate auxiliary-target correlation
Generate Itemset Coselection


100%|██████████| 5/5 [00:00<00:00, 250.13it/s]
100%|██████████| 4/4 [00:00<00:00, 235.43it/s]
ITER 999: 100%|██████████| 1000/1000 [00:02<00:00, 323.19it/s, loss=-1.17]


In [9]:
model.predict_estimation(user_to_predict=['U1'], item_to_predict=['I1', 'I2'])

array([[ 0.27948036, -1.13437315]])

In [2]:
# read sobazaar data
data = pd.read_csv('data/sb_unique_actions_20.csv')
# change column name
data.columns = ['ItemID', 'UserID', 'Action', 'Action_Date', 'Action_Time',
       'SessionID']

In [3]:
# the best way is to split the data with a time manner - from sklearn.model_selection import TimeSeriesSplit
# but first let's do it in a simple way

original_item_list = sorted(set(data.ItemID))
original_user_list = sorted(set(data.UserID))

data_without_target = data[data.Action != 'P']
data_only_with_target = data[data.Action == 'P']

In [4]:
# split the data_only_with_target into 5 folds
kf = KFold(n_splits=5,shuffle=True, random_state=0)
kf.get_n_splits(X=data_only_with_target)

5

In [None]:
cnt = 0
for train_index, test_index in kf.split(X=data_only_with_target):
    # build train and test data
    train_data = pd.concat([data_only_with_target.iloc[train_index], data_without_target])
    test_data = data_only_with_target.iloc[test_index]

    # start a BPRH model
    model = bprH(dim=50, omega=1000, rho=1, lambda_u=0.1, lambda_v=0.1, lambda_b=0.1, gamma=0.001, random_state=20200704)

    model.fit(X=train_data, original_item_list=original_item_list, original_user_list=original_user_list, num_iter=10000, saved_path='data/item-set-coselection-'+str(cnt)+'.pkl')
    cnt += 1

Calculate auxiliary-target correlation
Generate Itemset Coselection


100%|██████████| 4514/4514 [01:34<00:00, 47.87it/s]
 21%|██        | 494/2334 [14:23<1:02:52,  2.05s/it]