In [1]:
import json
import sys
import pandas as pd

sys.path.append('../')

from functions import *

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
data = pd.read_csv("../../data/demeaned.csv")
df = data.groupby(["id", "ispolice", "endtime", "sellerfeedbackscore", "bidcount", "apple", "amazon", "samsung", "others", "increment_residual"])["residual"].apply(lambda x: x.values).reset_index()
df.endtime = pd.to_datetime(df.endtime)
df = df.sort_values("endtime").reset_index(drop=True)
df["early"] = (df.index < 476).astype(int)

valid_bids = list(df[df.ispolice == 1].bidcount.value_counts().index)
include = df[(df.bidcount > 1) & (df.bidcount.isin(valid_bids))]

bids = list(include.residual)

logged_feedback = np.log(include.sellerfeedbackscore+1)
logged_feedback = transform_covariates(logged_feedback, 100)
include.sellerfeedbackscore = logged_feedback
include = include.reset_index(drop=True)

o_covariates = np.array(include[["ispolice", "sellerfeedbackscore"]])
o_covariates = list([list(cov) for cov in o_covariates])
o_covariates = [[c[0],round(c[1], 5)] for c in o_covariates]

apple_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.apple == 1].index)]
samsung_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.samsung == 1].index)]
amazon_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.amazon == 1].index)]
others_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.others == 1].index)]

early_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.early == 1].index)]
late_covs = [c for i, c in enumerate(o_covariates) if i in list(include[include.early == 0].index)]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  include.sellerfeedbackscore = logged_feedback


### Import dumps

In [3]:
with open(f"./final dumps/CMF_main.json", "r") as f:
    d = json.load(f)
    covariates = d["covariates"]
    median_lower = d["lower"]
    median_upper = d["upper"]
    
    covariates = [[c[0],round(c[1], 5)] for c in covariates]
    median_lower = {f"{[eval(k)[0], round(eval(k)[1],5)]}": v for k, v in median_lower.items()}
    median_upper = {f"{[eval(k)[0], round(eval(k)[1],5)]}": v for k, v in median_upper.items()}

#### Main specification

Table 4 Interval Estimates

In [4]:
def loss_function1(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(covariates, median_upper, median_lower, cef)

point1, interval1 = get_estimates(loss_function1, 4, [0,0.5,0,0])

print(point1)
print(interval1)

[-0.90745165  0.28223717  0.10400445  0.04094814]
[-0.9199388497314462, -0.8950663933139305, 0.13893647675033916, 0.38034447428177187, 0.10214789387783955, 0.10584955993725813, 0.026805688926432095, 0.050669398793987325]


#### Dummies specification

Table 5 Interval Estimates

In [5]:
percentile = lambda p: np.percentile(np.array(include.sellerfeedbackscore), p)

estimates = []

for i in range(2):
    
    covs1 = [c for c in covariates if c[1] >= percentile(0) and c[1] < percentile(91.5) and c[0] == i]
    loss_function = lambda c: get_loss_function(covs1, median_upper, median_lower, lambda cov: c)
    
    point, interval = get_estimates(loss_function, 1, [0])
    estimates += [c[0] for c in interval]
    
    covs1 = [c for c in covariates if c[1] >= percentile(91.5) and c[1] < percentile(100) and c[0] == i]
    loss_function = lambda c: get_loss_function(covs1, median_upper, median_lower, lambda cov: c)
    
    point, interval = get_estimates(loss_function, 1, [0])
    estimates += [c[0] for c in interval]
        
print(estimates)

[-0.3247559666026739, -0.29741491346275456, 0.38966097091661517, 0.49064331054688776, -0.37125561079858116, -0.10598501381650231, -0.3969580529112641, -0.04990031278250435]


#### Brand-specific specification

Table 10 Amazon Interval Estimates

In [6]:
def loss_function3(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(amazon_covs, median_upper, median_lower, cef)

point3, interval3 = get_estimates(loss_function3, 4, [0,0.5,0,0])

print(point3)
print(interval3)

[-0.87815194  0.50191071  0.09569396  0.0077988 ]
[-0.899862519100599, -0.8571986867300674, 0.39245455854887, 0.730288359878228, 0.09243688314783557, 0.09877548096133065, -0.003057203787388995, 0.03046290558692028]


Table 10 Samsung Interval Estimates

In [7]:
def loss_function4(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(samsung_covs, median_upper, median_lower, cef)

point4, interval4 = get_estimates(loss_function4, 4, [0,0.5,0,0])

print(point4)
print(interval4)

[-1.01288497  0.50213164  0.11745596  0.02150855]
[-1.0387935802288093, -0.9878180994037266, 0.18133681535700896, 0.9383040661408254, 0.11351952201964623, 0.1211273848327085, -0.01027288264626452, 0.06472055398339122]


Table 10 Apple Interval Estimates

In [8]:
def loss_function5(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(apple_covs, median_upper, median_lower, cef)

point5, interval5 = get_estimates(loss_function5, 4, [0,0.5,0,0])

print(point5)
print(interval5)

[-0.77255189  0.44892765  0.0832867   0.01236126]
[-0.7946626465591154, -0.7504345579644409, 0.23795500135126593, 0.6062368711876722, 0.07961059885383935, 0.08694228460701353, -0.008270800106799387, 0.027879734958750214]


Table 10 Others Interval Estimates

In [9]:
def loss_function6(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(others_covs, median_upper, median_lower, cef)

point6, interval6 = get_estimates(loss_function6, 4, [0,0.5,0,0])

print(point6)
print(interval6)

[-1.08632746  0.50336191  0.13633031  0.03377791]
[-1.127478008890618, -1.0459915376852535, 0.3536497106499733, 0.6626525754929058, 0.1315970803703769, 0.14085675034966103, 0.018934078368863936, 0.04957834376676532]


#### Time-specific specification

Table 11 First Round Interval Estimates

In [10]:
def loss_function7(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(early_covs, median_upper, median_lower, cef)

point7, interval7 = get_estimates(loss_function7, 4, [0,0.5,0,0])

print(point7)
print(interval7)

[-0.86658465  0.50116455  0.09396965  0.01164114]
[-0.8862100510325743, -0.8479864284187909, 0.38582845280328193, 0.6365670943686139, 0.09093249871458325, 0.09682409139993836, 0.00020554061253534839, 0.02508016804253118]


Table 11 Second Round Interval Estimates

In [11]:
def loss_function8(c):
    cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
    return get_loss_function(late_covs, median_upper, median_lower, cef)

point8, interval8 = get_estimates(loss_function8, 4, [0,0.5,0,0])

print(point8)
print(interval8)

[-0.92583672  0.49962652  0.10949088  0.02501286]
[-0.9424638982503822, -0.9090985269846944, 0.20259830534789353, 0.7338237649694392, 0.10712627706404632, 0.11187347220690211, -0.003544049642591064, 0.047672750314096554]


### Bootstrap Confidence Intervals

In [12]:
prefixed = [filename for filename in os.listdir("./final dumps") if filename.startswith("bootstrap_main")]

results = []

for name in prefixed:
    with open(f"./final dumps/{name}", "r") as f:
        d = json.load(f)
        covariates = d["covariates"]
        median_lower = d["lower"]
        median_upper = d["upper"]
        
        covariates = [[c[0],round(c[1], 5)] for c in covariates]
        median_lower = {f"{[eval(k)[0], round(eval(k)[1],5)]}": v for k, v in median_lower.items()}
        median_upper = {f"{[eval(k)[0], round(eval(k)[1],5)]}": v for k, v in median_upper.items()}
        
        results.append((covariates, median_lower, median_upper))

#### Main specification

In [13]:
estimates1 = []

for i, result in enumerate(results):
    
    def loss_function1(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(result[0], result[2], result[1], cef)
    
    point1, interval1 = get_estimates(loss_function1, 4, [0,0.5,0,0])
    
    estimates1.append(interval1)

Table 4 Confidence Intervals

In [14]:
conf_intervals1 = report_intervals(estimates1, 95)

95% confidence interval for variable 0:
[-1.0810805468684106, -0.3010392487266563]
95% confidence interval for variable 1:
[-0.7828472164970424, 0.6867565956049403]
95% confidence interval for variable 2:
[0.02197846563836084, 0.1472108526984288]
95% confidence interval for variable 3:
[-0.03498061749570955, 0.1284245014492054]


#### Dummies specification

In [15]:
estimates2 = []

percentile = lambda p: np.percentile(np.array(include.sellerfeedbackscore), p)

for result in results:
    
    estimates = []

    for i in range(2):

        covs1 = [c for c in result[0] if c[1] >= percentile(0) and c[1] < percentile(91.5) and c[0] == i]
        loss_function = lambda c: get_loss_function(covs1, result[2], result[1], lambda cov: c)

        point, interval = get_estimates(loss_function, 1, [0])
        estimates += [c[0] for c in interval]

        covs1 = [c for c in result[0] if c[1] >= percentile(91.5) and c[1] < percentile(100) and c[0] == i]
        loss_function = lambda c: get_loss_function(covs1, result[2], result[1], lambda cov: c)

        point, interval = get_estimates(loss_function, 1, [0])
        estimates += [c[0] for c in interval]

    estimates2.append(estimates)

Table 5 Confidence Intervals

In [16]:
report_intervals(estimates2, 95)

95% confidence interval for variable 0:
[-0.3299511941075753, 0.0020011543577281676]
95% confidence interval for variable 1:
[0.1408001864173434, 0.6334632345073752]
95% confidence interval for variable 2:
[-0.40295774739439705, 0.1452468826884603]
95% confidence interval for variable 3:
[-0.44990847742539486, 0.21725736384777597]


[[-0.3299511941075753, 0.0020011543577281676],
 [0.1408001864173434, 0.6334632345073752],
 [-0.40295774739439705, 0.1452468826884603],
 [-0.44990847742539486, 0.21725736384777597]]

#### Brand-specific specification

In [17]:
estimates_samsung = []

for i, result in enumerate(results):
    
    covs = [c for c in samsung_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]*(1-cov[0])+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.4,0,0])
    
    estimates_samsung.append(interval)

Table 10 Samsung Confidence Intervals

In [18]:
report_intervals(estimates_samsung,95)

95% confidence interval for variable 0:
[-1.2739462786345834, -0.35141800326862904]
95% confidence interval for variable 1:
[-0.02661613922534855, 0.8588444974618918]
95% confidence interval for variable 2:
[0.0201321340781108, 0.16443615405624748]
95% confidence interval for variable 3:
[-0.10253135018689004, 0.0007108926945862543]


[[-1.2739462786345834, -0.35141800326862904],
 [-0.02661613922534855, 0.8588444974618918],
 [0.0201321340781108, 0.16443615405624748],
 [-0.10253135018689004, 0.0007108926945862543]]

In [19]:
estimates_amazon = []

for i, result in enumerate(results):
    
    covs = [c for c in amazon_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.5,0,0])
    
    estimates_amazon.append(interval)

Table 10 Amazon Confidence Intervals

In [20]:
report_intervals(estimates_amazon,95)

95% confidence interval for variable 0:
[-1.098888433579965, -0.28567523184148075]
95% confidence interval for variable 1:
[-0.5095797130521579, 0.7739827258084678]
95% confidence interval for variable 2:
[0.011646725417094442, 0.1459846431979715]
95% confidence interval for variable 3:
[-0.03966619475703053, 0.0682268492734002]


[[-1.098888433579965, -0.28567523184148075],
 [-0.5095797130521579, 0.7739827258084678],
 [0.011646725417094442, 0.1459846431979715],
 [-0.03966619475703053, 0.0682268492734002]]

In [21]:
estimates_apple = []

for i, result in enumerate(results):
    
    covs = [c for c in apple_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.5,0,0])
    
    estimates_apple.append(interval)

Table 10 Apple Confidence Intervals

In [22]:
report_intervals(estimates_apple,95)

95% confidence interval for variable 0:
[-0.9922061863524653, -0.14035883826184867]
95% confidence interval for variable 1:
[-0.8694077218147527, 0.79965824296318]
95% confidence interval for variable 2:
[0.01979307415094355, 0.13413468379092947]
95% confidence interval for variable 3:
[-0.04643231736222965, 0.1370384975347045]


[[-0.9922061863524653, -0.14035883826184867],
 [-0.8694077218147527, 0.79965824296318],
 [0.01979307415094355, 0.13413468379092947],
 [-0.04643231736222965, 0.1370384975347045]]

In [23]:
estimates_others = []

for i, result in enumerate(results):
    
    covs = [c for c in others_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.5,0,0])
    
    estimates_others.append(interval)

Table 10 Others Confidence Intervals

In [24]:
report_intervals(estimates_others,95)

95% confidence interval for variable 0:
[-1.1944246251873, -0.39725908669172183]
95% confidence interval for variable 1:
[-0.3686556514612886, 0.6968456694605154]
95% confidence interval for variable 2:
[0.040047001763240817, 0.16193861485783587]
95% confidence interval for variable 3:
[-0.03207786601724698, 0.06749234360287949]


[[-1.1944246251873, -0.39725908669172183],
 [-0.3686556514612886, 0.6968456694605154],
 [0.040047001763240817, 0.16193861485783587],
 [-0.03207786601724698, 0.06749234360287949]]

#### Time-specific estimates

In [25]:
estimates_early = []

for i, result in enumerate(results):
    
    covs = [c for c in early_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.5,0,0])
    
    estimates_early.append(interval)

Table 11 First Round Confidence Intervals

In [26]:
report_intervals(estimates_early, 95)

95% confidence interval for variable 0:
[-1.0789279849790325, -0.24126944460477576]
95% confidence interval for variable 1:
[-0.4759515676211425, 0.6985894814757462]
95% confidence interval for variable 2:
[0.014491019836170469, 0.14339843063977145]
95% confidence interval for variable 3:
[-0.0316428779285742, 0.06408554802720093]


[[-1.0789279849790325, -0.24126944460477576],
 [-0.4759515676211425, 0.6985894814757462],
 [0.014491019836170469, 0.14339843063977145],
 [-0.0316428779285742, 0.06408554802720093]]

In [27]:
estimates_late = []

for i, result in enumerate(results):
    
    covs = [c for c in late_covs if c in result[0]]
    
    def loss_function(c):
        cef = lambda cov: c[0]+c[1]*cov[0]+c[2]*cov[1]*(1-cov[0])+c[3]*cov[1]*cov[0]
        return get_loss_function(covs, result[2], result[1], cef)
    
    point, interval = get_estimates(loss_function, 4, [0,0.5,0,0])
    
    estimates_late.append(interval)

Table 11 Second Round Confidence Intervals

In [28]:
report_intervals(estimates_late, 95)

95% confidence interval for variable 0:
[-1.1097033611270979, -0.30590764217629246]
95% confidence interval for variable 1:
[-1.0846104614055028, 0.847416485574725]
95% confidence interval for variable 2:
[0.02921982574074949, 0.14603584303218595]
95% confidence interval for variable 3:
[-0.07605944190902861, 0.17363535591430765]


[[-1.1097033611270979, -0.30590764217629246],
 [-1.0846104614055028, 0.847416485574725],
 [0.02921982574074949, 0.14603584303218595],
 [-0.07605944190902861, 0.17363535591430765]]