In [1]:
import json
import sys
import pandas as pd

sys.path.append('../')

from functions import *

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
data = pd.read_csv("../../data/demeaned.csv")
df = data.groupby(["id", "ispolice", "sellerfeedbackscore", "bidcount", "apple", "amazon", "samsung", "others", "increment_residual"])["residual"].apply(lambda x: x.values).reset_index()

valid_bids = list(df[df.ispolice == 1].bidcount.value_counts().index)
include = df[(df.bidcount > 1) & (df.bidcount.isin(valid_bids))]

bids = list(include.residual)

logged_feedback = np.log(include.sellerfeedbackscore+1)
logged_feedback = transform_covariates(logged_feedback, 100)
include.sellerfeedbackscore = logged_feedback
include = include.reset_index(drop=True)

o_covariates = np.array(include[["ispolice", "sellerfeedbackscore"]])
o_covariates = list([list(cov) for cov in o_covariates])
o_covariates = [[c[0],round(c[1], 5)] for c in o_covariates]

apple_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.apple == 1].index)]
samsung_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.samsung == 1].index)]
amazon_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.amazon == 1].index)]
others_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.others == 1].index)]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  include.sellerfeedbackscore = logged_feedback


### Import dumps

In [3]:
with open(f"./final dumps/CMF_main.json", "r") as f:
    d = json.load(f)
    covariates = d["covariates"]
    median_lower = d["lower"]
    median_upper = d["upper"]
    
    covariates = [[c[0],round(c[1], 5)] for c in covariates]
    median_lower = {f"{[eval(k)[0], round(eval(k)[1],5)]}": v for k, v in median_lower.items()}
    median_upper = {f"{[eval(k)[0], round(eval(k)[1],5)]}": v for k, v in median_upper.items()}

#### Main specification

In [4]:
def loss_function1(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(covariates, median_upper, median_lower, cef)

point1, interval1 = get_estimates(loss_function1, 4, [0,0.5,0,0])

print(point1)
print(interval1)

[-0.90745165  0.28223717  0.10400445  0.04094814]
[-0.9199388497314462, -0.8950663933139305, 0.13893647675033916, 0.38034447428177187, 0.10214789387783955, 0.10584955993725813, 0.026805688926432095, 0.050669398793987325]


#### Dummies specification

In [5]:
percentile = lambda p: np.percentile(np.array(include.sellerfeedbackscore), p)

estimates = []

for i in range(2):
    
    covs1 = [c for c in covariates if c[1] >= percentile(0) and c[1] < percentile(91.5) and c[0] == i]
    loss_function = lambda c: get_loss_function(covs1, median_upper, median_lower, lambda cov: c)
    
    point, interval = get_estimates(loss_function, 1, [0])
    estimates += [c[0] for c in interval]
    
    covs1 = [c for c in covariates if c[1] >= percentile(91.5) and c[1] < percentile(100) and c[0] == i]
    loss_function = lambda c: get_loss_function(covs1, median_upper, median_lower, lambda cov: c)
    
    point, interval = get_estimates(loss_function, 1, [0])
    estimates += [c[0] for c in interval]
        
print(estimates)

[-0.3247559666026739, -0.29741491346275456, 0.38966097091661517, 0.49064331054688776, -0.37125561079858116, -0.10598501381650231, -0.3969580529112641, -0.04990031278250435]


#### Brand-specific specification

In [6]:
def loss_function3(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(amazon_covs, median_upper, median_lower, cef)

point3, interval3 = get_estimates(loss_function3, 4, [0,0.5,0,0])

print(point3)
print(interval3)

[-0.87786815  0.4391263   0.09566554  0.02321946]
[-0.8999110385309034, -0.8569160993873751, 0.2516395994338509, 0.5768730653151491, 0.09241019762821258, 0.09874761328443689, 0.004821222465710407, 0.036827004722374206]


In [47]:
def loss_function4(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(samsung_covs, median_upper, median_lower, cef)

point4, interval4 = get_estimates(loss_function4, 4, [0,0.5,0,0])

print(point4)
print(interval4)

[-0.97612787  0.40452798  0.12601     0.04570228]
[-1.0018857430455579, -0.949975114849752, -0.015397906670380532, 0.720817390988442, 0.1224278176621307, 0.1296752618436335, 0.004099837692607663, 0.07703735938009097]


In [8]:
def loss_function5(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(apple_covs, median_upper, median_lower, cef)

point5, interval5 = get_estimates(loss_function5, 4, [0,0.5,0,0])

print(point5)
print(interval5)

[-0.77528482  0.49835632  0.08366161 -0.00210577]
[-0.7971862885394333, -0.7531665086391135, 0.3638035833252901, 0.7504153249461836, 0.07998655868467609, 0.0873158659277836, -0.015460349953916024, 0.02292959844772547]


In [9]:
def loss_function6(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(others_covs, median_upper, median_lower, cef)

point6, interval6 = get_estimates(loss_function6, 4, [0,0.5,0,0])

print(point6)
print(interval6)

[-1.07340031  0.50323414  0.13494833  0.03249643]
[-1.1145233223056168, -1.0328194025802702, 0.35365344313474995, 0.6626504240258898, 0.13022434009197315, 0.13950684479554476, 0.01766539029380184, 0.048309660299615205]


### Bootstrap Confidence Intervals

In [10]:
prefixed = [filename for filename in os.listdir("./final dumps") if filename.startswith("bootstrap_main")]

results = []

for name in prefixed:
    with open(f"./final dumps/{name}", "r") as f:
        d = json.load(f)
        covariates = d["covariates"]
        median_lower = d["lower"]
        median_upper = d["upper"]
        
        covariates = [[c[0],round(c[1], 5)] for c in covariates]
        median_lower = {f"{[eval(k)[0], round(eval(k)[1],5)]}": v for k, v in median_lower.items()}
        median_upper = {f"{[eval(k)[0], round(eval(k)[1],5)]}": v for k, v in median_upper.items()}
        
        results.append((covariates, median_lower, median_upper))

#### Main specification

In [11]:
estimates1 = []

for i, result in enumerate(results):
    
    def loss_function1(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(result[0], result[2], result[1], cef)
    
    point1, interval1 = get_estimates(loss_function1, 4, [0,0.5,0,0])
    
    estimates1.append(interval1)

In [12]:
conf_intervals1 = report_intervals(estimates1, 95)

95% confidence interval for variable 0:
[-1.0810805468684106, -0.3010392487266563]
95% confidence interval for variable 1:
[-0.7828472164970424, 0.6867565956049403]
95% confidence interval for variable 2:
[0.02197846563836084, 0.1472108526984288]
95% confidence interval for variable 3:
[-0.03498061749570955, 0.1284245014492054]


#### Dummies specification

In [55]:
estimates2 = []

percentile = lambda p: np.percentile(np.array(include.sellerfeedbackscore), p)

for result in results:
    
    estimates = []

    for i in range(2):

        covs1 = [c for c in result[0] if c[1] >= percentile(0) and c[1] < percentile(91.5) and c[0] == i]
        loss_function = lambda c: get_loss_function(covs1, result[2], result[1], lambda cov: c)

        point, interval = get_estimates(loss_function, 1, [0])
        estimates += [c[0] for c in interval]

        covs1 = [c for c in result[0] if c[1] >= percentile(91.5) and c[1] < percentile(100) and c[0] == i]
        loss_function = lambda c: get_loss_function(covs1, result[2], result[1], lambda cov: c)

        point, interval = get_estimates(loss_function, 1, [0])
        estimates += [c[0] for c in interval]

    estimates2.append(estimates)

In [56]:
report_intervals(estimates2, 95)

95% confidence interval for variable 0:
[-0.3299511941075753, 0.0020011543577281676]
95% confidence interval for variable 1:
[0.1408001864173434, 0.6334632345073752]
95% confidence interval for variable 2:
[-0.40295774739439705, 0.1452468826884603]
95% confidence interval for variable 3:
[-0.44990847742539486, 0.21725736384777597]


[[-0.3299511941075753, 0.0020011543577281676],
 [0.1408001864173434, 0.6334632345073752],
 [-0.40295774739439705, 0.1452468826884603],
 [-0.44990847742539486, 0.21725736384777597]]

#### Brand-specific specification

In [50]:
estimates_samsung = []

for i, result in enumerate(results):
    
    covs = [c for c in samsung_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]*(1-cov[0])+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.4,0,0])
    
    estimates_samsung.append(interval)

In [51]:
report_intervals(estimates_samsung,95)

95% confidence interval for variable 0:
[-1.2758881815931227, -0.3615791727757852]
95% confidence interval for variable 1:
[-0.026594139194354622, 0.8574711294092471]
95% confidence interval for variable 2:
[0.02062497437988826, 0.16469691453406002]
95% confidence interval for variable 3:
[-0.10253137020437217, 0.0007102971548719609]


[[-1.2758881815931227, -0.3615791727757852],
 [-0.026594139194354622, 0.8574711294092471],
 [0.02062497437988826, 0.16469691453406002],
 [-0.10253137020437217, 0.0007102971548719609]]

In [38]:
estimates_amazon = []

for i, result in enumerate(results):
    
    covs = [c for c in amazon_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.5,0,0])
    
    estimates_amazon.append(interval)

In [43]:
report_intervals(estimates_amazon,95)

95% confidence interval for variable 0:
[-1.099374749563581, -0.2773724695036865]
95% confidence interval for variable 1:
[-0.922631325513298, 0.7236865015523338]
95% confidence interval for variable 2:
[0.01148470148998482, 0.146069248454594]
95% confidence interval for variable 3:
[-0.04049514732381121, 0.14019035697702847]


[[-1.099374749563581, -0.2773724695036865],
 [-0.922631325513298, 0.7236865015523338],
 [0.01148470148998482, 0.146069248454594],
 [-0.04049514732381121, 0.14019035697702847]]

In [39]:
estimates_apple = []

for i, result in enumerate(results):
    
    covs = [c for c in apple_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.5,0,0])
    
    estimates_apple.append(interval)

In [41]:
report_intervals(estimates_apple,95)

95% confidence interval for variable 0:
[-0.9935493722732301, -0.14813230265378377]
95% confidence interval for variable 1:
[-0.48172847049910655, 0.7998356595987787]
95% confidence interval for variable 2:
[0.02011073558705111, 0.13417857201164954]
95% confidence interval for variable 3:
[-0.045096466991176844, 0.07065553183450685]


[[-0.9935493722732301, -0.14813230265378377],
 [-0.48172847049910655, 0.7998356595987787],
 [0.02011073558705111, 0.13417857201164954],
 [-0.045096466991176844, 0.07065553183450685]]

In [52]:
estimates_others = []

for i, result in enumerate(results):
    
    covs = [c for c in others_covs if c in others_covs]
    
    def loss_function(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.5,0,0])
    
    estimates_others.append(interval)

In [53]:
report_intervals(estimates_others,95)

95% confidence interval for variable 0:
[-1.186827521014219, -0.3947472217454029]
95% confidence interval for variable 1:
[-0.36630234968898684, 0.6969546800087092]
95% confidence interval for variable 2:
[0.039267691345530445, 0.16125919728398544]
95% confidence interval for variable 3:
[-0.03233094722326156, 0.06687213340049457]


[[-1.186827521014219, -0.3947472217454029],
 [-0.36630234968898684, 0.6969546800087092],
 [0.039267691345530445, 0.16125919728398544],
 [-0.03233094722326156, 0.06687213340049457]]