## Application of BPR on Zazzle data

In [None]:
#%load_ext watermark
%load_ext autoreload 
%autoreload 2

In [None]:
import os
import sys
import itertools
import numpy as np
import pandas as pd
from subprocess import call
from pybpr import *
import matplotlib.pyplot as plt
from functools import partial

In [None]:
%%time
data_dir = os.path.join(os.path.curdir, 'data', 'zazzle')
df = pd.read_parquet(os.path.join(os.path.join(data_dir,'BPR_0005_part_00.parquet')))
df.columns
#print(df.memory_usage().sum() / 1024 / 1024)

In [None]:
print(df.memory_usage().sum() / 1024 / 1024)

In [None]:
df.user_id.nunique(), df.product_id.nunique()

In [None]:
%%time
cf = UserItemInteractions(
    name='Zazzle',
    users=df['user_id'],
    items=df['product_id'],
    min_num_rating_per_user=200,
    min_num_rating_per_item=50,
    num_cores = 1
)
cf.print_memory_usage()


In [None]:
cf.num_users, cf.num_items, cf.sparsity

In [None]:
fig, ax = plt.subplots(1,2,figsize=(10,3))
ax = ax.flatten()
cf.df_user.NumRatings.hist(bins=100, ax=ax[0], range=(0,1000))
#cf.df_user.NumRatings.divide(cf.num_items).hist(bins=100, ax=ax[0], range=(0,0.01))
ax[0].set_ylabel('# of interactions per user')
cf.df_item.NumRatings.hist(bins=100, ax=ax[1], range=(0,1000))
#cf.df_item.NumRatings.divide(cf.num_users).hist(bins=100, ax=ax[1], range=(0,0.01))
ax[1].set_ylabel('# of interactions per product')
fig.tight_layout()

In [None]:
%%time
cf.generate_train_test(user_test_ratio=0.1)

In [None]:
%%time
bpr = BPR(
    num_features=20,
    reg_lambda=0.,
    num_iters=200,
    learning_rate = 0.4,
    batch_size=10,
    initial_std=0.001,
    seed=None
)
bpr_ndcg_func = partial(
    cf.get_ndcg_metric,
    num_items=10,
    truncate=False
)
bpr.fit(cf.R_train, ndcg_func=bpr_ndcg_func)

In [None]:
ndcg_df = pd.DataFrame(bpr.ndcg_metric)
fig, ax = plt.subplots(1, 2, figsize=(10,3))
ax[0].plot(ndcg_df['train'])
ax[0].set_ylabel('NDCG@10 Training')
ax[1].plot(ndcg_df['test'])
ax[1].set_ylabel('NDCG@10 Testing')

In [None]:
ndcg_df

In [None]:
bpr_ndcg_func(test=True), bpr_ndcg_func(test=False)