Here we estimate the following conditional median function:
$$
\text{med}[valuation_i|ispolice_i, sellerfeedbackscore_i]=\alpha+\beta_1 ispolice_i+\beta_2 sellerfeedbackscore_i
$$
using the entire sample where the number of bids received is strictly between 3 and 12.

In [1]:
import pandas as pd
import numpy as np
from scipy import optimize
from main import estimate_median, get_loss_function, transform_covariates

data = pd.read_csv("../../data/demeaned.csv")
df = data.groupby(["id", "ispolice", "sellerfeedbackscore", "bidcount", "apple", "amazon"])["bids"].apply(lambda x: x.values).reset_index()

In [2]:
include = df[(df.bidcount > 3) & (df.bidcount < 12)]

bids = list(include.bids)

logged_feedback = np.log(include.sellerfeedbackscore+1)
logged_feedback = transform_covariates(logged_feedback, 100)
include.sellerfeedbackscore = logged_feedback

covariates = np.array(include[["ispolice", "sellerfeedbackscore"]])
covariates = list([list(cov) for cov in covariates])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [3]:
median_upper, median_lower = estimate_median(bids, covariates, (0,9))

calculating values for covariate: [0.0, 9.869815537797194] (1/97)
total time elapsed: 0.00018111799999998013s
calculating values for covariate: [0.0, 3.4599452524466194] (2/97)
total time elapsed: 11.745001254s
calculating values for covariate: [0.0, 5.3228978543165555] (3/97)
total time elapsed: 25.413597884999998s
calculating values for covariate: [0.0, 5.439956999893489] (4/97)
total time elapsed: 37.780208802s
calculating values for covariate: [0.0, 1.4606755448912938] (5/97)
total time elapsed: 50.133579372s
calculating values for covariate: [0.0, 5.724391198805521] (6/97)
total time elapsed: 62.81763041199999s
calculating values for covariate: [0.0, 3.5636780469053466] (7/97)
total time elapsed: 74.809073286s
calculating values for covariate: [0.0, 2.440017827490736] (8/97)
total time elapsed: 86.337803323s
calculating values for covariate: [0.0, 6.926257377649698] (9/97)
total time elapsed: 98.045221716s
calculating values for covariate: [0.0, 4.257356013476545] (10/97)
total ti

calculating values for covariate: [0.0, 8.288236466386568] (81/97)
total time elapsed: 984.452227641s
calculating values for covariate: [0.0, 6.3836558487392425] (82/97)
total time elapsed: 995.765655001s
calculating values for covariate: [0.0, 7.307635641656483] (83/97)
total time elapsed: 1006.969052057s
calculating values for covariate: [0.0, 6.882170651082656] (84/97)
total time elapsed: 1018.082968108s
calculating values for covariate: [1.0, 10.394073463078007] (85/97)
total time elapsed: 1029.2971318730001s
calculating values for covariate: [0.0, 8.007850269063637] (86/97)
total time elapsed: 1044.7591175450002s
calculating values for covariate: [0.0, 6.33767709149798] (87/97)
total time elapsed: 1057.136864387s
calculating values for covariate: [0.0, 6.07562674059974] (88/97)
total time elapsed: 1070.6770655290002s
calculating values for covariate: [0.0, 5.25221821313404] (89/97)
total time elapsed: 1083.7502285080002s
calculating values for covariate: [0.0, 7.680821506025506] (

In [4]:
def loss_function(c):
    a, b1, b2 = c
    cef = lambda cov: a+b1*cov[0]+b2*cov[1]
    return get_loss_function(covariates, median_upper, median_lower, cef)

b_hat = optimize.brute(loss_function, ranges=[(0,2), (-1,1), (-1,1)])
# interval_lower = optimize.newton(lambda a_l, b_l: loss_function(a_l, b_l)-loss_function(b_hat)-10, b_hat-0.1)
# interval_upper = optimize.newton(lambda b_l, b_u: loss_function(b_l, b_u)-loss_function(b_hat)-10, b_hat+0.1)

In [5]:
def fun(c):
    return loss_function(c)-loss_function(b_hat)-10

print(optimize.newton(fun, [b_hat[0]+5,b_hat[1]+5,b_hat[2]+4]))
print(optimize.newton(fun, [b_hat[0]-2,b_hat[1]-2,b_hat[2]-2]))

[ 0.18945957  0.1948784  -0.01004716]
[-0.01405975 -0.00478646 -0.01663363]


In [6]:
loss_function([0.13918062, 0.13635494, 0.06331243])

180.79211345877812

In [7]:
b_hat

array([ 0.0072902 ,  0.03465769, -0.00030587])

In [8]:
def loss_function(c):
    a, b1, b2, b3 = c
    cef = lambda cov: a+b1*cov[0]+b2*cov[1]+b3*cov[0]*cov[1]
    return get_loss_function(covariates, median_upper, median_lower, cef)

b_hat_2 = optimize.brute(loss_function, ranges=[(0,2), (-1,1), (-1,1), (-1,1)])
# interval_lower = optimize.newton(lambda a_l, b_l: loss_function(a_l, b_l)-loss_function(b_hat)-10, b_hat-0.1)
# interval_upper = optimize.newton(lambda b_l, b_u: loss_function(b_l, b_u)-loss_function(b_hat)-10, b_hat+0.1)

In [10]:
interval_lower_b1 = optimize.newton(lambda b_l: loss_function([b_hat[0], b_l, b_hat[2]])-loss_function(b_hat)-1, b_hat[1]-0.1)
interval_upper_b1 = optimize.newton(lambda b_l: loss_function([b_hat[0], b_l, b_hat[2]])-loss_function(b_hat)-1, b_hat[1]+0.1)

interval_lower_b2 = optimize.newton(lambda b_l: loss_function([b_hat[0], b_hat[1], b_l]])-loss_function(b_hat)-1, b_hat[2]-0.1)
interval_upper_b2 = optimize.newton(lambda b_l: loss_function([b_hat[0], b_hat[1], b_l]])-loss_function(b_hat)-1, b_hat[2]+0.1)

interval_lower_a = optimize.newton(lambda b_l: loss_function([b_l, b_hat[1], b_hat[2]])-loss_function(b_hat)-1, b_hat[0]-0.1)
interval_upper_a = optimize.newton(lambda b_l: loss_function([b_l, b_hat[1], b_hat[2]])-loss_function(b_hat)-1, b_hat[0]+0.1)


SyntaxError: closing parenthesis ')' does not match opening parenthesis '[' (<ipython-input-10-15b61a5d1a22>, line 4)

In [9]:
interval_lower_a, interval_upper_a, interval_lower_b, interval_upper_b

array([ 0.00745954, -0.32908332, -0.00033118,  0.03668607])