In [17]:
import sys
sys.path.append('..')

from scipy.stats import binom, norm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from chapters.chapter7 import CI
from chapters.chapter8 import bootstrap, bootstrap_variance
from chapters.chapter9 import parametric_bootstrap, parametric_bootstrap_variance

%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('ggplot')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## MLE

The MLE for $\psi$ is $\hat \psi = \hat p_1 - \hat p_2$.

The Fisher information matrix is
$$
\begin{pmatrix}
    \frac{n1}{p_1 (1-p_1)} & 0 \\
    0 & \frac{n1}{p_1 (1-p_1)}
\end{pmatrix}
$$

The delta method standard error is $\sqrt{\frac{p_1 (1 - p_1)}{n_1} + \frac{p_2 (1 - p_2)}{n_2}}$

In [26]:
n1 = 200
n2 = n1
x1 = 160
x2 = 148
p1_hat = x1 / n1
p2_hat = x2 / n2

psi_hat = p1_hat - p2_hat
mle = psi_hat
psi_hat

0.06000000000000005

In [29]:
se_delta = np.sqrt((p1_hat * (1 - p1_hat)) / n1 + (p2_hat * (1 - p2_hat)) / n2)
se_delta

0.041976183723630711

In [30]:
alpha = 0.05
z_alpha = norm.ppf(1 - alpha)
z_alpha

1.6448536269514722

In [31]:
ci_delta = CI(mle - z_alpha * se_delta, mle + z_alpha * se_delta)
ci_delta

CI(lower=-0.0090446780433952739, upper=0.12904467804339537)

## Parametric Bootstrap

In [28]:
dist1 = binom(n1, p1_hat)
dist2 = binom(n2, p2_hat)

statistic = np.mean

seed1 = 121111
seed2 = 951

iters = 10000

boots1 = parametric_bootstrap(dist1, 1, statistic, iters, random_state=seed1)
boots2 = parametric_bootstrap(dist2, 1, statistic, iters, random_state=seed2)

boots = (xx1/n1 - xx2/n2 for xx1, xx2 in zip(boots1, boots2))
se_boot = np.var(list(boots))
se_boot

0.0017500987677499999

In [32]:
ci_boot = CI(psi_hat - z_alpha * se_boot, psi_hat + z_alpha * se_boot)
ci_boot

CI(lower=0.057121343694343164, upper=0.062878656305656949)