In [1]:
# import packages
import pandas as pd
import numpy as np
from random import sample
from tqdm import tqdm, trange

In [2]:
# define function

def adv_index(list_to_index, list_to_match):
    return [ind for ind, match in enumerate(list_to_index) if match in list_to_match]

def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [3]:
# generate a small data

data = pd.read_csv('data/sb_unique_actions_20.csv')
# change column name
data.columns = ['ItemID', 'UserID', 'Action', 'Action_Date', 'Action_Time',
       'SessionId']
data_temp = data[['ItemID', 'UserID', 'Action']]
# drop duplicates
data_temp.drop_duplicates(inplace=True, ignore_index=True)
data = data_temp
del data_temp
# rename user and item
user_original_id_list = sorted(set(data.UserID))
item_original_id_list = sorted(set(data.ItemID))

data.UserID = data.UserID.apply(lambda x: user_original_id_list.index(x))
data.ItemID = data.ItemID.apply(lambda x: item_original_id_list.index(x))

item_list = sorted(set(data.ItemID))
user_list = sorted(set(data.UserID))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [4]:
# parameter setting
dim = 20
num_iter = 5000
omega = 100
rho = 1
lambda_u = 0.1
lambda_v = 0.1
lambda_b = 0.1
gamma = 0.008
num_u = len(user_list)
num_i = len(item_list)

In [6]:
# Calculate auxiliary-target correlation C for every user and each types of auxiliary action
# Here we only have one auxiliary action 'V' for 'View'

# TODO: what if a user has no purchase, no auxiliary action, or no no-action

target_action = 'P'
auxiliary_action = ['V']
C_u = dict()
user_set_bar = tqdm(user_list)
for u in user_set_bar:
    C_u[u] = dict()
    I_t_u = set(data[(data.UserID == u) & (data.Action == target_action)].ItemID)
    # TODO filtered item set
    for X in auxiliary_action:
        I_a_u = set(data[(data.UserID == u) & (data.Action == X)].ItemID)

        C_u_at = len(I_t_u.intersection(I_a_u)) / len(I_t_u) if len(I_t_u) != 0 else 0
        C_u_ta = len(I_t_u.intersection(I_a_u)) / len(I_a_u) if len(I_a_u) != 0 else 0

        C_u_X = 2 * C_u_at * C_u_ta / (C_u_ta + C_u_at) if C_u_ta + C_u_at != 0 else 1
        C_u[u][X] = C_u_X

temp = pd.DataFrame.from_dict(C_u, orient='index')
# We have only one auxiliary action 'V'
temp['alpha'] = omega * rho * temp.V
alpha_u = temp
del temp
alpha_u.reset_index(inplace=True)
alpha_u.columns = ['UserID', 'V', 'alpha']

100%|██████████| 4514/4514 [01:26<00:00, 52.24it/s]


In [7]:
# generate item-set based on co-selection
S = dict()
item_set_bar = tqdm(item_list)
for i in item_set_bar:
    S[i] = set()
    U_i = set(data[data.ItemID == i].UserID)
    for j in item_list:
        U_j = set(data[data.ItemID == j].UserID)
        if len(U_i.intersection(U_j)) >= 2: S[i].add(j)


100%|██████████| 2334/2334 [1:11:50<00:00,  2.19s/it]


In [8]:
# initialization
# we include item bias term in the last row of V
# and set the last column in U to all-1 vector
np.random.seed(20200701)
U = np.random.normal(size=(num_u, dim + 1))
V = np.random.normal(size=(dim + 1, num_i))
U[:, -1] = 1
# estimation is U dot V
estimation = np.dot(U, V)

In [None]:
# begin iteration
with trange(num_iter) as t:
    for index in t:
        # Description will be displayed on the left
        t.set_description('ITER %i' % index)

        # Build u, I, J, K
        # uniformly sample a user from U
        u = sample(set(data.UserID), 1)[0]

        # build I
        # uniformly sample a item i from I_u_t
        I_u_t = set(data[(data.UserID == u) & (data.Action == 'P')].ItemID)
        if len(I_u_t) != 0:
            i = sample(I_u_t, 1)[0]
            # build I = I_u_t cap S_i
            I = I_u_t.intersection(S[i])
        else: # if no item in I_u_t, then set I to empty set
            i = None
            I = set()

        # build J, since we only have one auxiliary action, we follow the uniform sampling
        I_u_oa = set(data[(data.UserID == u) & (data.Action == 'V')].ItemID) - I_u_t# TODO: optimize this
        if len(I_u_oa) != 0:
            j = sample(I_u_oa, 1)[0]
            J = I_u_oa.intersection(S[j])
        else: # if no item in I_u_oa, then set J to empty set
            j = None
            J = set()

        # build K
        I_u_n = set(data.ItemID) - I_u_t - I_u_oa
        if len(I_u_n) != 0:
            k = sample(I_u_n, 1)[0]
            # build K
            K = I_u_n.intersection(S[k])
        else: # if no item in I_u_n, then set K to empty set
            k = None
            K = set()

        # calculate intermediate variables
        # get specific alpha_u
        spec_alpha_u = alpha_u[alpha_u.UserID == u].alpha.values[0]
        u_index = user_list.index(u)
        U_u = U[u_index, :-1]
        # get r_hat_uIJ and r_hat_uJK
        r_hat_uI = np.average(estimation[u_index, adv_index(item_list, sorted(I))]) if len(I) != 0 else 0
        r_hat_uJ = np.average(estimation[u_index, adv_index(item_list, sorted(J))]) if len(J) != 0 else 0
        r_hat_uK = np.average(estimation[u_index, adv_index(item_list, sorted(K))]) if len(K) != 0 else 0

        r_hat_uIJ = r_hat_uI - r_hat_uJ
        r_hat_uJK = r_hat_uJ - r_hat_uK
        # get V_bar_I, V_bar_J, V_bar_K
        V_bar_I = np.average(V[:-1, adv_index(item_list, sorted(I))], axis=1) if len(I) != 0 else np.zeros(shape=V[:-1, 0].shape)
        V_bar_J = np.average(V[:-1, adv_index(item_list, sorted(J))], axis=1) if len(J) != 0 else np.zeros(shape=V[:-1, 0].shape)
        V_bar_K = np.average(V[:-1, adv_index(item_list, sorted(K))], axis=1) if len(K) != 0 else np.zeros(shape=V[:-1, 0].shape)
        # get b_I, b_J, b_K
        b_I = np.average(V[-1, adv_index(item_list, sorted(I))]) if len(I) != 0 else 0
        b_J = np.average(V[-1, adv_index(item_list, sorted(J))]) if len(J) != 0 else 0
        b_K = np.average(V[-1, adv_index(item_list, sorted(K))]) if len(K) != 0 else 0

        # calculate loss
        f_Theta = np.log(sigmoid(r_hat_uIJ / spec_alpha_u)) + np.log(sigmoid(r_hat_uJK))
        regula = lambda_u * np.linalg.norm(U_u, ord=2) + lambda_v * ((np.linalg.norm(V_bar_I, ord=2) if len(I) != 0 else 0) + (np.linalg.norm((V_bar_J), ord=2) if len(J) != 0 else 0) + (np.linalg.norm((V_bar_K), ord=2)) if len(K) != 0 else 0) + lambda_b * ((b_I if len(I) != 0 else 0) ** 2 + (b_J if len(J) != 0 else 0) ** 2 + (b_K if len(K) != 0 else 0) ** 2)
        bprh_loss = f_Theta - regula

        # get derivatives and update

        # NABULA U_u
        df_dUu = sigmoid(- r_hat_uIJ / spec_alpha_u) / spec_alpha_u * (V_bar_I - V_bar_J) + sigmoid(- r_hat_uJK) * (V_bar_J - V_bar_K)
        dR_dUu = 2 * lambda_u * U_u
        # update U_u = U_u + gamma * (df_dUu - dR_dUu)
        U[u_index, :-1] += gamma * (df_dUu - dR_dUu)

        if len(I) != 0:
            # NABULA V_i
            df_dbi = sigmoid(- r_hat_uIJ / spec_alpha_u) / (len(I) * spec_alpha_u)
            dR_dbi = 2 * lambda_b * b_I / len(I)
            df_dVi = df_dbi * U_u
            dR_dVi = 2 * lambda_v * V_bar_I / len(I)
            # update V_i = V_i + gamma * (df_dVi - dR_dVi)
            V[:-1, adv_index(item_list, sorted(I))] += gamma * (df_dVi - dR_dVi)[:,None] # trick: transpose here
            # update b_i = b_i + gamma * (df_dbi - dR_dbi)
            V[-1, adv_index(item_list, sorted(I))] += gamma * (df_dbi - dR_dbi)

        if len(J) != 0:
            # NABULA V_j
            df_dbj = (- sigmoid(- spec_alpha_u * r_hat_uIJ) / spec_alpha_u + sigmoid(- r_hat_uJK)) / len(J)
            dR_dbj = 2 * lambda_b * b_J / len(J)
            df_dVj = df_dbj * U_u
            dR_dVj = 2 * lambda_v * V_bar_J / len(J)
            # update V_j = V_j + gamma * (df_dVj - dR_dVj)
            V[:-1, adv_index(item_list, sorted(J))] += gamma * (df_dVj - dR_dVj)[:,None] # trick: transpose here
            # update b_j = b_j + gamma * (df_dbj - dR_dbj)
            V[-1, adv_index(item_list, sorted(J))] += gamma * (df_dbj - dR_dbj)

        if len(K) != 0:
            # NABULA V_k
            df_dbk = - sigmoid(- r_hat_uJK) / len(K)
            dR_dbk = 2 * lambda_b * b_K / len(K)
            df_dVk = df_dbk * U_u
            dR_dVk = 2 * lambda_v * V_bar_K / len(K)
            # update V_k = V_k + gamma * (df_dVk - dR_dVk)
            V[:-1, adv_index(item_list, sorted(K))] += gamma * (df_dVk - dR_dVk)[:,None] # trick: transpose here
            # update b_k = b_k + gamma * (df_dbk - dR_dbk)
            V[-1, adv_index(item_list, sorted(K))] += gamma * (df_dbk - dR_dbk)

        # update estimation
        estimation = np.dot(U, V)
        # Postfix will be displayed on the right,
        # formatted automatically based on argument's datatype
        t.set_postfix(loss=bprh_loss)

  import sys
ITER 1160:  23%|██▎       | 1161/5000 [05:39<15:45,  4.06it/s, loss=-2.29]