Here we estimate the following conditional expectation function:
$$
\mathbb{E}[valuation_i|ispolice_i, log\_sellerfeedbackscore_i]=\alpha+\beta_1 ispolice_i+\beta_2 log\_sellerfeedbackscore_i
$$
using listings of apple tablets where the number of bids received is strictly between 3 and 12.

In [1]:
import pandas as pd
import numpy as np
from scipy import optimize
from main import estimate_median, get_loss_function, transform_covariates

data = pd.read_csv("../../data/demeaned.csv")
df = data.groupby(["id", "ispolice", "sellerfeedbackscore", "bidcount", "apple", "amazon"])["bids"].apply(lambda x: x.values).reset_index()

In [2]:
apple_bids = list(df[(df.apple == 1) & (df.ispolice == 1)].bidcount.value_counts().index)
include = df[(df.bidcount > 3) & (df.bidcount < 12) & (df.apple == 1) & (df.bidcount.isin(apple_bids))]

bids = list(include.bids)

logged_feedback = np.log(include.sellerfeedbackscore+1)
logged_feedback = transform_covariates(logged_feedback, 100)
include.sellerfeedbackscore = logged_feedback

covariates = np.array(include[["ispolice", "sellerfeedbackscore"]])
covariates = list([list(cov) for cov in covariates])

  covariates[order] = covariates[order].mean()
  ret = ret.dtype.type(ret / rcount)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [3]:
expected_upper, expected_lower = estimate_mean(bids, covariates, (0,9))

calculating values for covariate: [1.0, 9.962043589246067] (1/100)
total time elapsed: 0.00045846200000010384s


  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]


calculating values for covariate: [0.0, 5.602118820879701] (2/100)
total time elapsed: 49.654754234s
calculating values for covariate: [0.0, 1.791759469228055] (3/100)
total time elapsed: 95.906465216s
calculating values for covariate: [0.0, 4.795790545596741] (4/100)
total time elapsed: 149.424620993s
calculating values for covariate: [0.0, 6.070737728002491] (5/100)
total time elapsed: 205.398089943s
calculating values for covariate: [0.0, 3.912023005428146] (6/100)
total time elapsed: 254.85261978800003s
calculating values for covariate: [0.0, 5.529429087511423] (7/100)
total time elapsed: 304.548315229s
calculating values for covariate: [0.0, 3.713572066704308] (8/100)
total time elapsed: 354.918927834s
calculating values for covariate: [0.0, 3.4965075614664802] (9/100)
total time elapsed: 404.494109241s
calculating values for covariate: [0.0, 5.575949103146316] (10/100)
total time elapsed: 454.794881331s
calculating values for covariate: [0.0, 1.9459101490553132] (11/100)
total ti

In [4]:
def loss_function(c):
    a, b1, b2 = c
    cef = lambda cov: a+b1*cov[0]+b2*cov[1]
    return get_loss_function(covariates, expected_upper, expected_lower, cef)

b_hat = optimize.brute(loss_function, ranges=[(0,2), (-1,1), (-1,1)])
# interval_lower = optimize.newton(lambda a_l, b_l: loss_function(a_l, b_l)-loss_function(b_hat)-10, b_hat-0.1)
# interval_upper = optimize.newton(lambda b_l, b_u: loss_function(b_l, b_u)-loss_function(b_hat)-10, b_hat+0.1)

In [5]:
b_hat

array([ 0.76410628,  0.43918193, -0.0051102 ])

In [6]:
def loss_function(c):
    a, b1, b2, b3 = c
    cef = lambda cov: a+b1*cov[0]+b2*cov[1]+b3*cov[0]*cov[1]
    return get_loss_function(covariates, expected_upper, expected_lower, cef)

b_hat_2 = optimize.brute(loss_function, ranges=[(0,2), (-1,1), (-1,1), (-1,1)])
# interval_lower = optimize.newton(lambda a_l, b_l: loss_function(a_l, b_l)-loss_function(b_hat)-10, b_hat-0.1)
# interval_upper = optimize.newton(lambda b_l, b_u: loss_function(b_l, b_u)-loss_function(b_hat)-10, b_hat+0.1)

In [7]:
b_hat_2

array([ 0.7641037 ,  0.10040117, -0.00511256,  0.03313733])