# Robustness Checks

In [1]:
import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt
import matplotlib as mpl 

mpl.rcParams['figure.dpi']= 200
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.spines.top'] = False

from rpy2.robjects import pandas2ri
pandas2ri.activate()

%reload_ext rpy2.ipython

In [2]:
%%R

library('lme4')
library('margins')
library("performance")
library('tidyverse')

getICCs <- function(m, type = 'poisson'){
  var_s <- as.numeric(getME(m, "theta")[1]^2) # seller level variance
  var_w <- as.numeric(getME(m, "theta")[2]^2) # week level variance
  
  if(type == 'poisson'){
    lambda = .139
    alpha <- log(1 + 1/lambda)
  }
  if(type == 'binomial'){
    alpha <- (pi^2) / 3
  }
  
  icc <- list(
    s = (var_s)/ (var_s + var_w + alpha),
    w = (var_w)/ (var_s + var_w + alpha),
    t = (var_s + var_w)/ (var_s + var_w + alpha),
    a = (alpha)/ (var_s + var_w + alpha)
  )
  
  return(icc)
}

sessionInfo()

R[write to console]: Loading required package: Matrix

R[write to console]: ── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

R[write to console]: [32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.4     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

R[write to console]: ── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mtidyr[39m::[32mexpand()[39m masks [34mMatrix[39m::expand()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[31m✖[39m [34mtidyr[39m::[32mpack()[39m   masks [34mMatrix[39m::pack()
[31m✖[39m [34mtidyr[39m::[32munpack()[39m masks [34mMatrix[39m::unpack(

R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16

Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib

locale:
[1] C/UTF-8/C/C/C/C

attached base packages:
[1] tools     stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] forcats_0.5.0     stringr_1.4.0     dplyr_1.0.2       purrr_0.3.4      
 [5] readr_1.4.0       tidyr_1.1.2       tibble_3.0.4      ggplot2_3.3.2    
 [9] tidyverse_1.3.0   performance_0.5.1 margins_0.3.23    lme4_1.1-25      
[13] Matrix_1.2-18    

loaded via a namespace (and not attached):
 [1] statmod_1.4.35    tidyselect_1.1.0  splines_4.0.3     haven_2.3.1      
 [5] lattice_0.20-41   colorspace_1.4-1  vctrs_0.3.4       generics_0.0.2   
 [9] blob_1.2.1        rlang_0.4.8       nloptr_1.2.2.2    pillar_1.4.6     
[13] withr_2.3.0       glue_1.4.2        DBI_1.1.0         dbplyr_1.4.4     
[17] mode

## Load Data

In [3]:
MAINDIR = os.getcwd().rsplit('/', 1)[0]
file = os.path.join(MAINDIR, 'data/analysis', 'vendor_item_period.csv')

In [4]:
df = pd.read_csv(file, index_col = False)
df = df.drop(df.columns[0], axis=1)

In [5]:
me = pd.read_pickle(os.path.join(MAINDIR, 'data/analysis', 'market_entry.pickle'))

## Robustness of ARF

In our paper we used liberal cut of values, here we provide robustness checks for stricter cut of criteria

In [6]:
# setting cut of values
t_cut = me['delta_t'].quantile(1/3)
p_cut = me['price_usd'].quantile(1/3)
s_cut = me.groupby('vendor')['delta_t'].std().quantile(1/3)


# ARF 1
arf1 = me\
    .groupby('vendor')\
    .apply(lambda x: (x['delta_t'].mean() < t_cut\
                      and (x['price_usd'].mean() < p_cut)\
                      and (x['delta_t'].std() < s_cut)))\
    .rename('arf_025')

# setting cut of values
t_cut = me['delta_t'].quantile(2/5)
p_cut = me['price_usd'].quantile(2/5)
s_cut = me.groupby('vendor')['delta_t'].std().quantile(2/5)


# ARF 1
arf2 = me\
    .groupby('vendor')\
    .apply(lambda x: (x['delta_t'].mean() < t_cut\
                      and (x['price_usd'].mean() < p_cut)\
                      and (x['delta_t'].std() < s_cut)))\
    .rename('arf_041')

arf = pd.concat([arf1, arf2], axis = 1).reset_index()\
    .assign(vendor = lambda x: x['vendor'].factorize()[0] + 1)

## Robustness of ARM

In our paper we used global cut of values to determine cases of artificial growth. However, some sellers might aswell have show fast, static and cheap growth by definition. Therefore we perform robustness checks were we includes a stricter definition of ARM, including internally defined cut-off values. This assumes that ARM is a strategy that is used at some time-point but not all the time during market activity. However, it assures that the growth of reputation is relatively fast, static and cheap for sellers themselves. 

In [7]:
# make ARM variable
t_cut_per_seller = df.groupby('vendor')['delta_t_m'].quantile(1/2).to_dict()
s_cut_per_seller = df.groupby('vendor')['delta_t_s'].quantile(1/2).to_dict()
p_cut_per_seller = df.groupby('vendor')['price_usd_m'].quantile(1/2).to_dict()
t_cut = df['delta_t_m'].quantile(1/2)
p_cut = df['price_usd_m'].quantile(1/2)
s_cut = df['delta_t_s'].quantile(1/2)


arm = df\
    .assign(t_low   = lambda x: x['delta_t_m'] < t_cut,
            p_low   = lambda x: x['price_usd_m'] < p_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['t_low', 'p_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()

arm['arm_t__tps'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 's_low_s', 't_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']


arm['arm_p__tps'] = df\
    .assign(p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut,
            p_low   = lambda x: x['price_usd_m'] < p_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['p_low_s', 't_low', 'p_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_s__tps'] =  df\
    .assign(s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut,
            p_low   = lambda x: x['price_usd_m'] < p_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['s_low_s', 't_low', 'p_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_tp__tps'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut,
            p_low   = lambda x: x['price_usd_m'] < p_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 't_low', 'p_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_ts__tps'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut,
            p_low   = lambda x: x['price_usd_m'] < p_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 's_low_s', 't_low', 'p_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_ps__tps'] = df\
    .assign(p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut,
            p_low   = lambda x: x['price_usd_m'] < p_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['p_low_s', 's_low_s', 't_low', 'p_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_a'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut,
            p_low   = lambda x: x['price_usd_m'] < p_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 's_low_s', 't_low', 'p_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_tps__tp'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut,
            p_low   = lambda x: x['price_usd_m'] < p_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 's_low_s', 't_low', 'p_low', ]].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_tps__ts'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 's_low_s', 't_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_tps__ps'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            p_low   = lambda x: x['price_usd_m'] < p_cut,
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 's_low_s', 'p_low', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_tps__p'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            p_low   = lambda x: x['price_usd_m'] < p_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 's_low_s', 'p_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_tps__t'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            t_low   = lambda x: x['delta_t_m'] < t_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 's_low_s', 't_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

arm['arm_tps__s'] = df\
    .assign(t_low_s = lambda x: x['delta_t_m'] < x['vendor'].map(t_cut_per_seller),
            p_low_s = lambda x: x['price_usd_m'] < x['vendor'].map(p_cut_per_seller),
            s_low_s = lambda x: x['delta_t_s'] < x['vendor'].map(s_cut_per_seller),
            s_low   = lambda x: x['delta_t_s'] < s_cut)\
    .assign(arm = lambda x: (x[['t_low_s', 'p_low_s', 's_low_s', 's_low']].all(axis = 1)))\
    .groupby(['vendor', 'w'])['arm'].max().reset_index()['arm']

# Shift Dependent variables with one week 
for col in ['arm', 'arm_t__tps', 'arm_p__tps', 'arm_s__tps',
            'arm_tp__tps', 'arm_ts__tps', 'arm_ps__tps', 
            'arm_a', 'arm_tps__tp', 'arm_tps__ts', 
            'arm_tps__ps', 'arm_tps__p', 'arm_tps__t', 'arm_tps__s']: 
    arm['{0}_shift'.format(col)] = arm\
        .groupby('vendor')\
        .apply(lambda x: x[col].shift(1))\
        .reset_index(drop = True)

In [8]:
# import person week file
df = pd.read_pickle(os.path.join(MAINDIR, 'data/analysis', 'vendor_week.pickle'))
df = df.merge(df.groupby('vendor')['me_min'].min().rename('me'), on = 'vendor')
df = df.rename(columns= {
    'international_shipment_count_w': 'int_shipment_count_w', 
    'international_shipment': 'int_shipment'})

In [9]:
df = df.merge(arm, on = ['vendor', 'w']).merge(arf, on = ['vendor'])

In [11]:
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer()

# normalize variables
skewed_variables = [
    'neg_count_min', 'pos_count_min', 'neg_count_w', 
    'neg_count_w_shift', 'pos_count_w', 'pos_count_w_shift', 'sales_volume_w', 
    'item_count_w', 'int_shipment_count_w']

bc_vars = pd.DataFrame(
    pt.fit_transform(df[skewed_variables]),
    columns = ['bc_' + var for var in skewed_variables])

c_vars = df[skewed_variables].add_prefix('c_')

dummy_vars = df.assign(
        items   = pd.qcut(df['item_count'], 3,  labels=["low", "medium", "high"]),
        items_w = pd.qcut(df['item_count_w'], 3,  labels=["low", "medium", "high"]),
        sales   = pd.qcut(df['cum_count'], 2,  labels=["low", "high"]),
        sales_w = pd.qcut(df['sales_volume_w'], 2,  labels=["low", "high"]))\
    .loc[:,['items', 'items_w', 'sales', 'sales_w']]

n_vars = df[['cum_count', 'sales_volume_w', 'item_count', 
             'item_count_w', 'me', 'int_shipment_count_w',
             'empty_stock_last_week_count_w', 'empty_stock_last_week_maxw']].rename({'cum_count': 'sales_volume'})

bool_vars = df[
    ['arf', 'arf_025', 'arf_041', 'arm_maxw', 'arm_maxw_shift', 
     'arm', 'arm_t__tps', 'arm_p__tps', 'arm_s__tps',
     'arm_tp__tps', 'arm_ts__tps', 'arm_ps__tps', 
     'arm_a', 'arm_tps__tp', 'arm_tps__ts', 
     'arm_tps__ps', 'arm_tps__p', 'arm_tps__t', 'arm_tps__s', 
     'int_shipment', 'has_price_drop', 'has_price_drop_shift']].astype('int')

lev_vars =  df[['vendor','w', 'me_min']]

data = pd.concat([lev_vars, bool_vars, bc_vars, c_vars, dummy_vars, n_vars], axis = 1)

In [12]:
%%R -i data

data['me_'] <- scale(data$me)[,1]
data['w_'] <- scale(data$w)[,1]
data['me_2'] <- scale(data$me^2)[,1]
data['w_2'] <- scale(data$w^2)[,1]

variables = c(
    'bc_pos_count_w', 'bc_item_count_w', 'bc_int_shipment_count_w',
    'bc_neg_count_w', 'c_neg_count_w', 'c_pos_count_w')

for (var in variables){
  for (vendor in unique(data$vendor)){
    # calculate mean and deviances
    x = data[data$vendor == vendor, var]
    m = mean(x)
    dev = x - m
    
    # concat information to dataframe
    data[data$vendor == vendor, paste(var, "m", sep=".")] <- m
    data[data$vendor == vendor, paste(var, "dev", sep=".")] <- dev
  }
}

Optimizer <- glmerControl(optimizer = "bobyqa",
                          optCtrl = list(maxfun=2e5))

model_arf <- list()

## ARF
### Stricter criteria

In [13]:
%%R
model <- glmer(c_neg_count_w_shift ~ arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = poisson,
      control = Optimizer)

model_arf <- append(model_arf, list(neg_a114 = model))

In [14]:
%%R
model <- glmer(c_neg_count_w_shift ~ arf_041
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = poisson,
      control = Optimizer)

model_arf <- append(model_arf, list(neg_a041 = model))

In [15]:
%%R
model <- glmer(c_neg_count_w_shift ~ arf_025
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = poisson,
      control = Optimizer)

model_arf <- append(model_arf, list(neg_a025 = model));

In [16]:
%%R
model <- glmer(arm ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arf <- append(model_arf, list(arm_a114 = model))

In [17]:
%%R
model <- glmer(arm ~ arf_041
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf_041
      + bc_neg_count_w.dev:arf_041
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arf <- append(model_arf, list(arm_a041 = model))

In [18]:
%%R
model <- glmer(arm ~ arf_025
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf_025
      + bc_neg_count_w.dev:arf_025
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arf <- append(model_arf, list(arm_a025 = model))

In [19]:
%%R
model <- glmer(has_price_drop_shift ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arf <- append(model_arf, list(drop_a114 = model))

In [20]:
%%R
model <- glmer(has_price_drop_shift ~ arf_041
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf_041
      + bc_neg_count_w.dev:arf_041
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arf <- append(model_arf, list(drop_a041 = model))

In [21]:
%%R
model <- glmer(has_price_drop_shift ~ arf_025
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf_025
      + bc_neg_count_w.dev:arf_025
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arf <- append(model_arf, list(drop_a025 = model))

R[write to console]: boundary (singular) fit: see ?isSingular



In [22]:
%%R -o results

cols = c("B", 'se', 'z', 'p')

results = data.frame()

for (name in names(model_arf)){
    model <- round(data.frame(summary(model_arf[[name]])[10]),3)
    colnames(model) <- cols
    rownames(model) <- paste(name, rownames(model), sep = '__')
    model['or'] <- round(exp(model$B), 3)
    
    results <- rbind(results, model)
}

In [23]:
def sig_stars(x):
    if x < .001: 
        return "***"
    if x < .01: 
        return "**"
    if x < .05:
        return "*"
    if x < .1: 
        return "."
    else: 
        return ""


results\
    .assign(star = lambda x: x['p'].apply(sig_stars))\
    .assign(coef = lambda x: x["B"].apply(lambda v: f'{v:.3f}') + x["star"],
            se_ = lambda x: x['se'].apply(lambda v: f'({v:.3f})'))[['coef', 'se_']]\
    .reset_index()\
    .melt(id_vars = 'index')\
    .assign(
        condition = lambda x: x['index'].str.split('__').str[0],
        coefficient = lambda x: x['index'].str.split('__').str[1]\
                                          .str.replace('_041', '')\
                                          .str.replace('_025', ''))\
    .drop('index', axis = 1)\
    .pivot_table(index= ['coefficient', 'variable'], columns= 'condition', values = 'value', aggfunc='max')

Unnamed: 0_level_0,condition,arm_a025,arm_a041,arm_a114,drop_a025,drop_a041,drop_a114,neg_a025,neg_a041,neg_a114
coefficient,variable,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
(Intercept),coef,-5.961***,-5.964***,-6.036***,-2.503***,-2.505***,-2.501***,-0.396**,-0.415**,-0.460**
(Intercept),se_,(0.148),(0.148),(0.151),(0.105),(0.106),(0.106),(0.149),(0.150),(0.151)
arf,coef,0.865,0.633,0.654**,0.265,0.197,0.010,0.067,0.697*,0.474**
arf,se_,(0.556),(0.456),(0.241),(0.217),(0.166),(0.073),(0.427),(0.319),(0.151)
arf:bc_neg_count_w.dev,coef,-0.160,0.158,0.052,0.254,0.263,0.104,,,
arf:bc_neg_count_w.dev,se_,(0.295),(0.198),(0.091),(0.281),(0.180),(0.084),,,
arf:bc_neg_count_w.m,coef,0.127,-0.146,0.007,-0.362,-0.016,-0.010,,,
arf:bc_neg_count_w.m,se_,(0.646),(0.537),(0.335),(0.346),(0.236),(0.116),,,
bc_int_shipment_count_w.dev,coef,0.175,0.174,0.182,0.297*,0.301*,0.300*,0.129***,0.128***,0.128***
bc_int_shipment_count_w.dev,se_,(0.186),(0.186),(0.186),(0.116),(0.116),(0.116),(0.021),(0.021),(0.021)


## ARM
### Internal and external cut-offs

In [24]:
%%R

model_arm <- list()

model <- glmer(arm ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm = model))

In [25]:
%%R

model <- glmer(arm_t__tps ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_t__tps = model))

In [26]:
%%R

model <- glmer(arm_p__tps ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_p__tps = model))

In [27]:
%%R

model <- glmer(arm_s__tps ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_s__tps = model))

In [28]:
%%R
model <- glmer(arm_tp__tps ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_tp__tps = model))

In [29]:
%%R
model <- glmer(arm_ts__tps ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_ts__tps = model))

In [30]:
%%R
model <- glmer(arm_ps__tps ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_ps__tps = model))

In [31]:
%%R
model <- glmer(arm_a ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_a = model))

In [32]:
%%R
model <- glmer(arm_tps__tp ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_tps__tp = model))

In [33]:
%%R
model <- glmer(arm_tps__ts ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_tps__ts = model))

In [34]:
%%R
model <- glmer(arm_tps__ps ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_tps__ps = model))

In [35]:
%%R
model <- glmer(arm_tps__t ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_tps__t = model))

In [36]:
%%R
model <- glmer(arm_tps__p ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_tps__p = model))

In [37]:
%%R
model <- glmer(arm_tps__s ~ arf
      + bc_neg_count_w.m
      + bc_neg_count_w.dev
      + bc_neg_count_w.m:arf
      + bc_neg_count_w.dev:arf
      + bc_pos_count_w.m
      + bc_pos_count_w.dev
      + bc_item_count_w.m
      + bc_item_count_w.dev
      + bc_int_shipment_count_w.m
      + bc_int_shipment_count_w.dev
      + me_ + me_2 
      + w_ + w_2 
      + (1 | vendor) + (1 | w),
      data, 
      family = binomial, 
      control = Optimizer)

model_arm <- append(model_arm, list(arm_tps__s = model))

In [38]:
%%R -o results2

cols <- c("B", 'se', 'z', 'p')

results2 <- data.frame()

for (name in names(model_arm)){
    model <- round(data.frame(summary(model_arm[[name]])[10]),3)
    colnames(model) <- cols
    rownames(model) <- paste(name, rownames(model), sep = '___')
    model['or'] <- round(exp(model$B), 3)
     
    results2 <- rbind(results2, model)
}

In [42]:
def sig_stars(x):
    if x < .001: 
        return "***"
    if x < .01: 
        return "**"
    if x < .05:
        return "*"
    if x < .1: 
        return "."
    else: 
        return ""


results2\
    .assign(star = lambda x: x['p'].apply(sig_stars))\
    .assign(coef = lambda x: x["B"].apply(lambda v: f'{v:.3f}') + x["star"],
            se_ = lambda x: x['se'].apply(lambda v: f'({v:.3f})'))[['coef', 'se_']]\
    .reset_index()\
    .melt(id_vars = 'index')\
    .assign(
        condition = lambda x: x['index'].str.split('___').str[0],
        coefficient = lambda x: x['index'].str.split('___').str[1]\
                                          .str.replace('_041', '')\
                                          .str.replace('_025', ''))\
    .drop('index', axis = 1)\
    .loc[lambda x: (x['coefficient'].str.contains('arf'))]\
    .pivot_table(index= ['coefficient', 'variable'], columns= 'condition', values = 'value', aggfunc='max').T

coefficient,arf,arf,arf:bc_neg_count_w.dev,arf:bc_neg_count_w.dev,arf:bc_neg_count_w.m,arf:bc_neg_count_w.m
variable,coef,se_,coef,se_,coef,se_
condition,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
arm,0.654**,(0.241),0.052,(0.091),0.007,(0.335)
arm_a,0.393,(0.258),0.038,(0.083),-0.367,(0.355)
arm_p__tps,0.492*,(0.233),0.161.,(0.091),0.049,(0.324)
arm_ps__tps,0.509*,(0.235),0.145,(0.090),0.015,(0.326)
arm_s__tps,0.712**,(0.243),0.052,(0.090),-0.036,(0.338)
arm_t__tps,-0.038,(0.245),-0.008,(0.079),-0.032,(0.337)
arm_tp__tps,0.353,(0.248),0.055,(0.084),-0.278,(0.341)
arm_tps__p,0.328.,(0.180),-0.004,(0.059),-0.643*,(0.262)
arm_tps__ps,0.293.,(0.157),-0.016,(0.064),-0.434.,(0.228)
arm_tps__s,-0.058,(0.124),-0.012,(0.062),-0.156,(0.181)
