## Application of BPR on Zazzle Data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import itertools
import numpy as np
import pandas as pd
from subprocess import call
from pybpr import *
import scipy.sparse as sp
from functools import reduce
import matplotlib.pyplot as plt
from functools import partial

The history saving thread hit an unexpected error (OperationalError('database is locked')).History will not be written to the database.


## Put Zazzle data in UserItemInteration Class object

In [3]:
DATA_DIR = '/projects/zazzle/rsandhu/pybpr/examples/output/zazzle_data'
vdf = pd.read_parquet(os.path.join(DATA_DIR, 'view_data.parquet'))
cdf = pd.read_parquet(os.path.join(DATA_DIR, 'click_data.parquet'))
odf = pd.read_parquet(os.path.join(DATA_DIR, 'order_data.parquet'))

In [4]:
%%time
num_users = vdf['user_idx'].max()+1
num_items = vdf['product_idx'].max()+1
view_data = UserItemInteractions(
    name='ZAZZLE VIEW DATA',
    users_index=vdf['user_idx'],
    items_index=vdf['product_idx']
)
view_data.generate_train_test(user_test_ratio=0.0)


----UserItemInteractions--ZAZZLE VIEW DATA
# of users (active/total): 16462/16462
# of items (active/total): 779192/779192
# of interactions: 28926971
Sparsity in the UI mat: 0.002255
Memory used by sparse UI mat: 27.59 MB
CPU times: user 2.07 s, sys: 142 ms, total: 2.21 s
Wall time: 2.19 s


In [5]:
%%time
click_data = UserItemInteractions(
    name='ZAZZLE CLICK DATA',
    users_index=cdf['user_idx'],
    items_index=cdf['product_idx'],
    num_users = num_users,
    num_items = num_items
)
click_data.generate_train_test(user_test_ratio=0.0)


----UserItemInteractions--ZAZZLE CLICK DATA
# of users (active/total): 16375/16462
# of items (active/total): 313420/779192
# of interactions: 833645
Sparsity in the UI mat: 6.5e-05
Memory used by sparse UI mat: 0.8 MB
CPU times: user 88.2 ms, sys: 1.2 ms, total: 89.4 ms
Wall time: 89.3 ms


In [6]:
%%time
order_data = UserItemInteractions(
    name='ZAZZLE ORDER DATA',
    users_index=odf['user_idx'],
    items_index=odf['product_idx'],
    num_users = num_users,
    num_items = num_items
)
order_data.generate_train_test(user_test_ratio=0.0)


----UserItemInteractions--ZAZZLE ORDER DATA
# of users (active/total): 16278/16462
# of items (active/total): 139623/779192
# of interactions: 266598
Sparsity in the UI mat: 2.1e-05
Memory used by sparse UI mat: 0.25 MB
CPU times: user 29.4 ms, sys: 0 ns, total: 29.4 ms
Wall time: 29.3 ms


## BPR 

In [7]:
bpr1 = BPR(
    num_features=50,
    reg_lambda=0.0,
    num_iters=50,
    learning_rate = 0.02,
    batch_size=10000,
    initial_std=0.0001,
)
bpr1.initiate(num_users=view_data.num_users, num_items=view_data.num_items)

In [None]:
training_data = click_data.mat_train
metric_log_train = []
neg_sampler = partial(
    uniform_negative_sampler, 
    uimat=training_data
)
for _ in range(10):
    results = bpr_fit(
        bpr_obj=bpr1, 
        neg_sampler=neg_sampler, 
        ncores=104
    )
    mfunc = partial(
        bpr1.get_metric_v1,
        perc_active_users=0.5,
        perc_active_items=0.5,
        num_recs=60
    )
    metric_log_train.append(mfunc(uimat=training_data))
metric_log_train = np.asarray(metric_log_train)

BPR-Fit: 100%|██████████| 500000/500000 [00:02<00:00, 167088.13it/s]
