Here we estimate the following conditional expectation function:
$$
\mathbb{E}[valuation_i|ispolice_i]=\alpha+\beta_1 ispolice_i,
$$
using the entire sample where the number of bids received is strictly between 3 and 12.

In [1]:
import pandas as pd
import numpy as np
from scipy import optimize
from main import estimate_median

def get_loss_function(covariates, upper_dict, lower_dict, cef):
    _sum = 0
    for covariate in covariates:
        upper = upper_dict[f"{covariate}"]
        lower = lower_dict[f"{covariate}"]
        if cef(covariate) > upper:
            _sum += (cef(covariate)-upper)**2
        if cef(covariate) < lower:
            _sum += (cef(covariate)-lower)**2
    return _sum

data = pd.read_csv("../../data/demeaned.csv")
df = data.groupby(["id", "ispolice", "sellerfeedbackscore", "bidcount", "apple", "amazon"])["bids"].apply(lambda x: x.values).reset_index()

In [2]:
include = df[(df.bidcount > 5) & (df.bidcount < 12)]

p = np.random.permutation(len(include))

l1 = include.ispolice.values
l2 = include.bidcount.values
l1 = l1[p]
l2 = l2[p]
include.loc[:,"ispolice"] = l1
include.loc[:,"bidcount"] = l2

bids = list(include.bids)
covariates = [[cov] for cov in list(include.ispolice)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [3]:
median_upper, median_lower = estimate_median(bids, covariates, (0,9))

calculating values for covariate: [0] (1/2)
total time elapsed: 0.00016762200000020044s
calculating values for covariate: [1] (2/2)
total time elapsed: 15.213898440000001s


In [4]:
def loss_function(c):
    a, b = c
    cef = lambda cov: a+b*cov[0]
    return get_loss_function(covariates, median_upper, median_lower, cef)

b_hat = optimize.brute(loss_function, ranges=[(0,2), (-1,1)])
# interval_lower = optimize.newton(lambda a_l, b_l: loss_function(a_l, b_l)-loss_function(b_hat)-10, b_hat-0.1)
# interval_upper = optimize.newton(lambda b_l, b_u: loss_function(b_l, b_u)-loss_function(b_hat)-10, b_hat+0.1)

In [5]:
b_hat

array([0.0043125 , 0.04037829])

In [6]:
median_upper, median_lower

({'[0]': 0.00494384765625, '[1]': 0.06866455078125},
 {'[0]': 0.00384521484375, '[1]': 0.04229736328125})

In [7]:
import matplotlib.pyplot as plt

x_points = np.linspace(-1,1,num=50)
y_points = np.linspace(-1,1,num=50)

fig = plt.figure(figsize = (10,10))
ax = plt.axes(projection='3d')

X, Y = np.meshgrid(x_points, y_points)
dps = ([np.ravel(X), np.ravel(Y)])
zs = np.array([loss_function(pair) for pair in list(zip(np.ravel(X), np.ravel(Y)))])
Z = zs.reshape(X.shape)

ax.plot_surface(X,Y,Z)
ax.set_ylabel(r"$\beta_1$")
ax.set_xlabel(r"$\beta_0$")
ax.set_zlabel(r"Loss function")

plt.tight_layout()
fig.savefig("loss_func.png", dpi=fig.dpi, bbox_inches="tight")
plt.close()