In [1]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
from scipy.stats import norm

from chapters.chapter7 import CI
from chapters.chapter8 import bootstrap_ci_percentile
from chapters.chapter9 import parametric_bootstrap_variance

## Estimators

In [2]:
df = pd.read_csv('../data/ch09q03.csv')
n = len(df)
df.head()

Unnamed: 0,value
0,3.23
1,-2.5
2,1.88
3,-0.68
4,4.43


In [3]:
mu_hat = df.mean()
mu_hat

value    1.1908
dtype: float64

In [4]:
v_hat = np.var(df)
v_hat

value    3.303503
dtype: float64

In [5]:
sigma_hat = np.sqrt(v_hat)
sigma_hat

value    1.817554
dtype: float64

In [6]:
alpha = 0.05
z95 = norm.ppf(1 - alpha)
z95

1.6448536269514722

In [7]:
tau_hat = z95 * sigma_hat + mu_hat
tau_hat

value    4.180411
dtype: float64

## Delta method confidence interval

In [10]:
se_delta = (v_hat + (v_hat * z95**2) / 2) / n
se_delta

value    0.310896
dtype: float64

In [11]:
alpha = 0.05
z_alpha = norm.ppf(1 - alpha)

ci_delta = CI(tau_hat - z_alpha * se_delta, tau_hat + z_alpha * se_delta)
ci_delta

CI(lower=value    3.669033
dtype: float64, upper=value    4.691788
dtype: float64)

## Bootstrapped confidence interval

Since we are trying to estimate the 95th percentile, it could make sense to use the percentile function as our statistic.  We will create a non-parametric bootstrapped confidence interval this way.

In [12]:
statistic = lambda xs: np.percentile(xs, 95)
statistic(df)

4.3439999999999985

In the parametric case under consideration, we can write down the 95th percentile as a function of the mean and standard deviation.

In [13]:
pstatistic = lambda xs: np.mean(xs) + z95 * np.std(xs)
pstatistic(df)

value    4.180411
dtype: float64

In [14]:
v_pboot = parametric_bootstrap_variance(dist=norm(mu_hat, v_hat), n=n, statistic=pstatistic, iters=10000, random_state=484488)
se_pboot = np.sqrt(v_pboot)
se_pboot

1.0031753544847091

In [15]:
ci_pboot = CI(tau_hat - z_alpha * se_pboot, tau_hat + z_alpha * se_pboot)
ci_pboot

CI(lower=value    2.530334
dtype: float64, upper=value    5.830487
dtype: float64)

In [16]:
ci_boot = bootstrap_ci_percentile(df, statistic, 10000, a=alpha, random_state=46111110)
ci_boot

CI(lower=2.3300000000000001, upper=5.4299999999999997)

## Comparison of confidence intervals

In this case, the delta method yields a tighter confidence interval.