## Application of BPR on Zazzle Data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import itertools
import numpy as np
import pandas as pd
from subprocess import call
from pybpr import *
import scipy.sparse as sp
from functools import reduce
import matplotlib.pyplot as plt
from functools import partial

## Put Zazzle data in UserItemInteration Class object

In [3]:
DATA_DIR = '/projects/zazzle/rsandhu/pybpr/examples/output/zazzle_data'
df_v = pd.read_parquet(os.path.join(DATA_DIR, 'view_data.parquet'))
df_c = pd.read_parquet(os.path.join(DATA_DIR, 'click_data.parquet'))
df_o = pd.read_parquet(os.path.join(DATA_DIR, 'order_data.parquet'))
df_v_not_c = pd.read_parquet(os.path.join(
    DATA_DIR, 'viewed_not_clicked_data.parquet'))
df_c_not_o = pd.read_parquet(os.path.join(
    DATA_DIR, 'clicked_not_ordered_data.parquet'))

In [13]:
# viewed not clicked
test_ratio = 0.0
data_viewed_not_clicked = UserItemInteractions(
    users_index=df_v_not_c['user_idx'],
    items_index=df_v_not_c['product_idx'],
    num_users=num_users,
    num_items=num_items
)
data_viewed_not_clicked.generate_train_test(user_test_ratio=test_ratio)

# clicked
data_clicked = UserItemInteractions(
    users_index=df_c['user_idx'],
    items_index=df_c['product_idx'],
    num_users=num_users,
    num_items=num_items
)
data_clicked.generate_train_test(user_test_ratio=test_ratio)

# clicked not ordered
data_clicked_not_ordered = UserItemInteractions(
    users_index=df_c_not_o['user_idx'],
    items_index=df_c_not_o['product_idx'],
    num_users=num_users,
    num_items=num_items
)
data_clicked_not_ordered.generate_train_test(user_test_ratio=test_ratio)

# ordered
data_ordered = UserItemInteractions(
    users_index=df_o['user_idx'],
    items_index=df_o['product_idx'],
    num_users=num_users,
    num_items=num_items
)
data_ordered.generate_train_test(user_test_ratio=test_ratio)


----UserItemInteractions--Sample
# of users (active/total): 16455/16462
# of items (active/total): 779192/779192
# of interactions: 28093326
Sparsity in the UI mat: 0.00219
Memory used by sparse UI mat: 26.79 MB

----UserItemInteractions--Sample
# of users (active/total): 16375/16462
# of items (active/total): 313420/779192
# of interactions: 833645
Sparsity in the UI mat: 6.5e-05
Memory used by sparse UI mat: 0.8 MB

----UserItemInteractions--Sample
# of users (active/total): 15446/16462
# of items (active/total): 250741/779192
# of interactions: 567047
Sparsity in the UI mat: 4.4e-05
Memory used by sparse UI mat: 0.54 MB

----UserItemInteractions--Sample
# of users (active/total): 16278/16462
# of items (active/total): 139623/779192
# of interactions: 266598
Sparsity in the UI mat: 2.1e-05
Memory used by sparse UI mat: 0.25 MB


## BPR 

In [18]:
bpr1 = BPR(
    num_features=200,
    reg_lambda=0.0,
    num_iters=500,
    learning_rate = 0.1,
    batch_size=15000,
    initial_std=0.0001,
)
bpr1.initiate(num_users=view_data.num_users, num_items=view_data.num_items)

In [22]:
pos_data = data_clicked
neg_data = data_clicked_not_ordered
metric_log_train = []
neg_sampler = partial(
    uniform_negative_sampler, 
    uimat=training_data
)
neg_sampler = partial(
    explicit_negative_sampler,
    pos_uimat=pos_data.mat,
    neg_uimat=neg_data.mat
)

for _ in range(10):
    results = bpr_fit(
        bpr_obj=bpr1, 
        neg_sampler=neg_sampler, 
        ncores=104
    )
    mfunc = partial(
        bpr1.get_metric_v1,
        perc_active_users=0.5,
        perc_active_items=0.5,
        num_recs=60
    )
    metric_log_train.append(mfunc(uimat=pos_data.mat))
metric_log_train = np.asarray(metric_log_train)

BPR-Fit: 100%|██████████| 7500000/7500000 [00:12<00:00, 624548.28it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:11<00:00, 626326.06it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:12<00:00, 620160.32it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:11<00:00, 632093.99it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:11<00:00, 630580.84it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:11<00:00, 627442.22it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:11<00:00, 627930.58it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:11<00:00, 625361.69it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:11<00:00, 629537.46it/s]
BPR-Fit: 100%|██████████| 7500000/7500000 [00:12<00:00, 624987.23it/s]


In [24]:
metric_log_train

array([[0.11666667, 0.21666667, 0.31666667],
       [0.13333333, 0.225     , 0.325     ],
       [0.14670014, 0.23333333, 0.33333333],
       [0.15      , 0.23333333, 0.33333333],
       [0.15      , 0.23529412, 0.33333333],
       [0.15      , 0.23913043, 0.34482759],
       [0.15      , 0.24074074, 0.34375   ],
       [0.15686275, 0.24137931, 0.34693878],
       [0.15909091, 0.24324324, 0.35      ],
       [0.15686275, 0.24390244, 0.34514107]])

In [41]:
OUT_DIR ='/projects/zazzle/rsandhu/pybpr/examples/output'
bpr1.save_model(dir_name=OUT_DIR)

Saving the model in /projects/zazzle/rsandhu/pybpr/examples/output


In [39]:
bpr1.load_model(OUT_DIR)

Loading the model from /projects/zazzle/rsandhu/pybpr/examples/output
NpzFile '/projects/zazzle/rsandhu/pybpr/examples/output/bpr_model.npz' with keys: umat, imat


In [43]:
bpr2 = BPR()
bpr2.load_model(OUT_DIR)

Loading the model from /projects/zazzle/rsandhu/pybpr/examples/output
NpzFile '/projects/zazzle/rsandhu/pybpr/examples/output/bpr_model.npz' with keys: umat, imat (16462, 200)


In [47]:
np.all(bpr2.umat == bpr1.umat)
np.all(bpr2.imat == bpr1.imat)

True

In [48]:
bpr2

BPR(num_features=200, num_iters=100, batch_size=32, initial_std=0.0001, reg_lambda=0.0, learning_rate=0.001, verbose=False)

In [49]:
bpr1

BPR(mname='bpr_model', num_features=200, num_iters=500, batch_size=15000, initial_std=0.0001, reg_lambda=0.0, learning_rate=0.1, verbose=False)