Here we estimate the following conditional expectation function:
$$
\mathbb{E}[valuation_i|ispolice_i, log\_sellerfeedbackscore_i]=\alpha+\beta_1 ispolice_i + \beta_2log\_sellerfeedbackscore_i,
$$
using the entire sample where the number of bids received is equal to 6.

In [1]:
import pandas as pd
import numpy as np
from scipy import optimize
from main import estimate_mean, get_loss_function

data = pd.read_csv("../../data/demeaned.csv")
df = data.groupby(["id", "ispolice", "sellerfeedbackscore", "bidcount", "apple", "amazon"])["bids"].apply(lambda x: x.values).reset_index()

In [2]:
include = df[df.bidcount == 6]

bids = list(include.bids)

logged_feedback = np.log(include.sellerfeedbackscore+1)
logged_feedback = transform_covariates(logged_feedback, 100)
include.sellerfeedbackscore = logged_feedback

covariates = np.array(include[["ispolice", "sellerfeedbackscore"]])
covariates = list([list(cov) for cov in covariates])

  covariates[order] = covariates[order].mean()
  ret = ret.dtype.type(ret / rcount)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [3]:
expected_upper, expected_lower = estimate_mean(bids, covariates, (0,9))

calculating values for covariate: [0.0, 7.6290038896529575] (1/100)
total time elapsed: 0.008287569000000161s


  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]


calculating values for covariate: [0.0, 4.653960350157523] (2/100)
total time elapsed: 26.894708074s
calculating values for covariate: [0.0, 6.882437470997846] (3/100)
total time elapsed: 55.598566273s
calculating values for covariate: [0.0, 5.605802066295998] (4/100)
total time elapsed: 79.223135931s
calculating values for covariate: [0.0, 3.8918202981106265] (5/100)
total time elapsed: 101.878826628s
calculating values for covariate: [0.0, 7.789868559054706] (6/100)
total time elapsed: 129.235715138s
calculating values for covariate: [0.0, 5.420534999272286] (7/100)
total time elapsed: 150.208779248s
calculating values for covariate: [0.0, 6.410174881966167] (8/100)
total time elapsed: 170.177853923s
calculating values for covariate: [0.0, 5.545177444479562] (9/100)
total time elapsed: 190.915861865s
calculating values for covariate: [0.0, 6.862757913051401] (10/100)
total time elapsed: 215.476407237s
calculating values for covariate: [0.0, 6.7464121285733745] (11/100)
total time ela

calculating values for covariate: [0.0, 6.093569770045136] (81/100)
total time elapsed: 1962.459825656s
calculating values for covariate: [0.0, 2.302585092994046] (82/100)
total time elapsed: 1981.360362358s
calculating values for covariate: [0.0, 4.700480365792417] (83/100)
total time elapsed: 2000.407768847s
calculating values for covariate: [0.0, 5.8916442118257715] (84/100)
total time elapsed: 2020.841184765s
calculating values for covariate: [1.0, 9.955277308666151] (85/100)
total time elapsed: 2037.516462813s


In [4]:
def loss_function(c):
    a, b1, b2 = c
    cef = lambda cov: a+b1*cov[0]+b2*cov[1]
    return get_loss_function(covariates, expected_upper, expected_lower, cef)

b_hat = optimize.brute(loss_function, ranges=[(0,2), (-1,1), (-1,1)])
# interval_lower = optimize.newton(lambda a_l, b_l: loss_function(a_l, b_l)-loss_function(b_hat)-10, b_hat-0.1)
# interval_upper = optimize.newton(lambda b_l, b_u: loss_function(b_l, b_u)-loss_function(b_hat)-10, b_hat+0.1)

In [5]:
b_hat

array([0.59476515, 0.15436438, 0.00078184])

In [6]:
def loss_function(c):
    a, b1, b2, b3 = c
    cef = lambda cov: a+b1*cov[0]+b2*cov[1]+b3*cov[0]*cov[1]
    return get_loss_function(covariates, expected_upper, expected_lower, cef)

b_hat_2 = optimize.brute(loss_function, ranges=[(0,2), (-1,1), (-1,1), (-1,1)])
# interval_lower = optimize.newton(lambda a_l, b_l: loss_function(a_l, b_l)-loss_function(b_hat)-10, b_hat-0.1)
# interval_upper = optimize.newton(lambda b_l, b_u: loss_function(b_l, b_u)-loss_function(b_hat)-10, b_hat+0.1)

In [7]:
b_hat_2

array([ 0.5947999 , -0.43161861,  0.0007786 ,  0.04905682])