In [1]:
import sys
import pandas as pd
import numpy as np
import seaborn as sns
 
sys.path.append('../')

from functions import *

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
import matplotlib
import matplotlib.pyplot as plt

matplotlib.rcParams.update(matplotlib.rcParamsDefault)
plt.style.use('tableau-colorblind10')
plt.rcParams['figure.figsize'] = (8,8)
plt.rcParams['figure.dpi'] = 300
plt.rcParams['text.color'] = 'black'
plt.rcParams['axes.labelcolor'] = 'black'
plt.rcParams['xtick.color'] = 'black'
plt.rcParams['ytick.color'] = 'black'
plt.rcParams['axes.titlepad'] = 5
plt.rcParams['axes.titlesize'] = 9
plt.rcParams['axes.labelsize'] = 8
plt.rcParams['axes.labelpad'] = 5
plt.rcParams['legend.fontsize'] = 7
plt.rcParams['font.family'] = 'serif'
plt.rcParams['axes.facecolor'] = 'white'

In [3]:
data = pd.read_csv("../../data/demeaned.csv")
df = data.groupby(["id", "ispolice", "sellerfeedbackscore", "bidcount", "apple", "amazon", "increment_residual"])["residual"].apply(lambda x: x.values).reset_index()

valid_bids = list(df[df.ispolice == 1].bidcount.value_counts().index)
include = df[(df.bidcount > 1) & (df.bidcount.isin(valid_bids))]

bids = list(include.residual)

logged_feedback = np.log(include.sellerfeedbackscore+1)
logged_feedback = transform_covariates(logged_feedback, 100)
include.sellerfeedbackscore = logged_feedback

covariates = np.array(include[["ispolice", "sellerfeedbackscore"]])
covariates = list([list(cov) for cov in covariates])
incremented = list(include.increment_residual)

pdfs, pdfs_delta = get_order_statistic_pdfs(bids, covariates, incremented)

_min = min([c[1] for c in covariates])
median = np.percentile([c[1] for c in covariates], 50)
_max = max([c[1] for c in covariates])

n_upper_med, n_lower_med = get_estimated_distributions(pdfs, pdfs_delta, [0, median], (-4,6))
p_upper_med, p_lower_med = get_estimated_distributions(pdfs, pdfs_delta, [1, median], (-4,6))

n_upper_min, n_lower_min = get_estimated_distributions(pdfs, pdfs_delta, [0, _min], (-4,6))
p_upper_min, p_lower_min = get_estimated_distributions(pdfs, pdfs_delta, [1, _min], (-4,6))

n_upper_max, n_lower_max = get_estimated_distributions(pdfs, pdfs_delta, [0, _max], (-4,6))
p_upper_max, p_lower_max = get_estimated_distributions(pdfs, pdfs_delta, [1, _max], (-4,6))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  include.sellerfeedbackscore = logged_feedback


In [4]:
values = np.linspace(-4,5,num=50)

n_upper_vals_med = [n_upper_med(v,-1) for v in values]
n_lower_vals_med = [n_lower_med(v,1) for v in values]

p_upper_vals_med = [p_upper_med(v,-1) for v in values]
p_lower_vals_med = [p_lower_med(v,1) for v in values]


n_upper_vals_min = [n_upper_min(v,-1) for v in values]
n_lower_vals_min = [n_lower_min(v,1) for v in values]

p_upper_vals_min = [p_upper_min(v,-1) for v in values]
p_lower_vals_min = [p_lower_min(v,1) for v in values]


n_upper_vals_max = [n_upper_max(v,-1) for v in values]
n_lower_vals_max = [n_lower_max(v,1) for v in values]

p_upper_vals_max = [p_upper_max(v,-1) for v in values]
p_lower_vals_max = [p_lower_max(v,1) for v in values]

In [5]:
fig, (ax1, ax2, ax3) = plt.subplots(3,1, constrained_layout=True)

### Evaluated at minimum

In [6]:
ax1.plot(values, n_upper_vals_min, label="Non-police upper bound", color="red")
ax1.plot(values, n_lower_vals_min, label="Non-police lower bound", color="red", linestyle="--")

ax1.plot(values, p_upper_vals_min, label="Police upper bound", color="blue")
ax1.plot(values, p_lower_vals_min, label="Police lower bound", color="blue", linestyle="--")

ax1.set_title("Lowest Feedback")

Text(0.5, 1.0, 'Lowest Feedback')

### Evaluated at median

In [7]:
ax2.plot(values, n_upper_vals_med, label="Non-police upper bound", color="red")
ax2.plot(values, n_lower_vals_med, label="Non-police lower bound", color="red", linestyle="--")

ax2.plot(values, p_upper_vals_med, label="Police upper bound", color="blue")
ax2.plot(values, p_lower_vals_med, label="Police lower bound", color="blue", linestyle="--")

ax2.set_ylabel("Cumulative probability")
ax2.set_title("Median Feedback")
ax2.legend(loc="lower right")

<matplotlib.legend.Legend at 0x7f9770055550>

### Evaluated at maximum

In [8]:
ax3.plot(values, n_upper_vals_max, label="Non-police upper bound", color="red")
ax3.plot(values, n_lower_vals_max, label="Non-police lower bound", color="red", linestyle="--")

ax3.plot(values, p_upper_vals_max, label="Police upper bound", color="blue")
ax3.plot(values, p_lower_vals_max, label="Police lower bound", color="blue", linestyle="--")

ax3.set_xlabel("(Normalised) Willingness to pay")
ax3.set_title("Highest Feedback")

Text(0.5, 1.0, 'Highest Feedback')

In [9]:
plt.tight_layout()

fig.savefig("bounds.png", dpi=fig.dpi, bbox_inches="tight")

  plt.tight_layout()
