In [2]:
import numpy as np
import altair as alt
import pandas as pd
import scipy
from scipy.stats import norm, lognorm
import math
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
alpha = 0.1

In [4]:
z_alpha_2 = norm.ppf(1-(alpha/2))

In [5]:
n1 = 50

In [6]:
n2 = 50

In [7]:
s1 = 40

In [8]:
s2 = 30

In [9]:
# MLE analytically computed
p1_hat = s1 / n1

In [10]:
# MLE analytically computed
p2_hat = s2 / n2

In [11]:
tau_hat = p2_hat - p1_hat

In [12]:
B = 10**4

In [13]:
bootstraps = []
for i in range(B):
    p1_hat_bootstrap = np.mean(np.random.binomial(n=1, p=p1_hat, size=n1))
    p2_hat_bootstrap = np.mean(np.random.binomial(n=1, p=p2_hat, size=n2))
    tau_hat_bootstrap = p2_hat_bootstrap - p1_hat_bootstrap
    bootstraps.append(tau_hat_bootstrap)

In [14]:
se_hat = np.std(bootstraps)

In [15]:
confidence_interval = (
    tau_hat - z_alpha_2 * se_hat, 
    tau_hat + z_alpha_2 * se_hat
)

In [16]:
print("Standard Error Estimation:", se_hat)

Standard Error Estimation: 0.08967972738584791


In [17]:
print(f"{(1-alpha)*100}% Confidence Interval:", confidence_interval)

90.0% Confidence Interval: (0.05248997514536888, 0.3475100248546312)


In [32]:
# p1 ~ Beta(s1+1, n1-s1+1)
# p2 ~ Beta(s2+1, n2-s2+1)

p1_posterior_data = np.random.beta(s1+1, n1-s1+1, size=1000)
p2_posterior_data = np.random.beta(s2+1, n2-s2+1, size=1000)
tau_data = p2_posterior_data - p1_posterior_data

In [33]:
posterior_mean = np.mean(tau_data)

In [34]:
print("Simulated Posterior Mean:", posterior_mean)

Simulated Posterior Mean: 0.19025734768248623


In [35]:
simulated_confidence_interval = (np.quantile(tau_data, alpha/2), np.quantile(tau_data, 1-(alpha/2)))
print("Simulated Confidence Interval:", simulated_confidence_interval)

Simulated Confidence Interval: (0.050348844986673184, 0.34445072937367166)


In [36]:
# Part e)

In [42]:
psi_data = np.log((p2_posterior_data / (1 - p2_posterior_data)) / (p1_posterior_data / (1 - p1_posterior_data)))


In [43]:
psi_mean_hat = psi_data.mean()


In [44]:
psi_simulated_confidence_interval = (np.quantile(psi_data, alpha/2), np.quantile(psi_data, 1-(alpha/2)))


In [45]:
print("Simulated Posterior Mean:", psi_mean_hat)

Simulated Posterior Mean: -0.9413800528768976


In [46]:
print("Simulated Confidence Interval:", psi_simulated_confidence_interval)

Simulated Confidence Interval: (-1.7378943193317054, -0.2371447965040797)
