# Implement Brown (2008)'s baseball example

In [1]:
WDIR = 'C:/Users/billt/PycharmProjects/KW_NPEB/examples/bayesball/'
import os
os.chdir(WDIR)

In [2]:
import numpy as np
import pandas as pd
from utils.utils import *

from kw_mle.kw_mle import KWMLE

## Preprocessing

In [3]:
DATA_PATH = 'D:/py_projects/npeb/MAP/data/brown_2008.csv'
df_raw = pd.read_csv(DATA_PATH)

train, test = get_train_test(df_raw)
train_label = variance_stabilizing(train['H'].values, train['AB'].values)
test_label = variance_stabilizing(test['H'].values, test['AB'].values)
std_train = 1/(2*np.sqrt(train['AB'].values))
std_test = 1/(2*np.sqrt(test['AB'].values))

## Fit the model

In [4]:
kw_mle = KWMLE(train_label, stds=std_train)
kw_mle.fit()

11


## Make the comparison with the naive estimator and James-Stein estimator.

In [5]:
naive_pred_raw = pd.DataFrame(train_label, index=train.index)
npeb_pred_raw = pd.DataFrame(kw_mle.prediction(train_label, std_train), index=train.index)
js_pred_raw = pd.DataFrame(james_stein_prediction(train_label, std_train), index=train.index)

naive_pred = naive_pred_raw.loc[test.index]
npeb_pred = npeb_pred_raw.loc[test.index]
js_pred = js_pred_raw.loc[test.index]

In [6]:
tse_naive = tse(np.array(test_label), np.array(naive_pred).flatten(), std_test)
tse_npeb = tse(np.array(test_label), np.array(npeb_pred).flatten(), std_test)
tse_js = tse(np.array(test_label), np.array(js_pred).flatten(), std_test)

print("relative total squared error of nonparametric empirical Bayes: ", tse_npeb/tse_naive)
print("relative total squared error of James-Stein estimator: ", tse_js/tse_naive)

relative total squared error of nonparametric empirical Bayes:  0.6874632544417727
relative total squared error of James-Stein estimator:  0.5350256561846592
