In [52]:
import numpy as np
import altair as alt
import pandas as pd
import scipy
from scipy.stats import norm, lognorm
import math
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [53]:
n = 100

In [54]:
alpha = 0.05

In [55]:
miu = 5

In [56]:
B = 1000

In [57]:
real_theta = math.exp(miu)

In [58]:
x_list = np.random.normal(miu, 1, n)

In [96]:
theta_hat = math.exp(np.mean(x_list))

In [97]:
bootstraps = []
for i in range(B):
    theta_hat_b = math.exp(np.mean(np.random.choice(x_list, replace=True, size=len(x_list))))
    bootstraps.append(theta_hat_b)

In [98]:
se = math.sqrt(np.var(bootstraps))

In [99]:
print("Estimated Standard Error:", se)

Estimated Standard Error: 11.2421877741058


In [100]:
z_alpha_2 = norm.ppf(1-(alpha/2))

In [None]:
bootstrap_lower_quantile= np.quantile(bootstraps, alpha/2)

bootstrap_upper_quantile = np.quantile(bootstraps, 1-(alpha/2))

In [102]:
normal_confidence_interval = (theta_hat - z_alpha_2 * se, theta_hat + z_alpha_2 * se)
pivotal_confidence_interval = (2*theta_hat - bootstrap_upper_quantile, 2*theta_hat - bootstrap_lower_quantile)
percentile_confidence_interval = (bootstrap_lower_quantile, bootstrap_upper_quantile)

In [103]:
print("True Theta:", real_theta)

True Theta: 148.4131591025766


In [104]:
print("Estimated Theta:", theta_hat)

print(f"{(1-alpha)*100}% Confidence Intervals:")

print("Normal:", normal_confidence_interval)
print("Pivotal:", pivotal_confidence_interval)
print("Percentile:", percentile_confidence_interval)

Estimated Theta: 115.95799542185416
95.0% Confidence Intervals:
Normal: (93.92371227717028, 137.99227856653806)
Pivotal: (91.8738635357681, 135.60742684344208)
Percentile: (96.30856400026624, 140.04212730794023)


In [105]:
histogram_data = pd.DataFrame({
        'x': bootstraps,
        })

In [106]:
histogram_chart = alt.Chart(histogram_data).transform_density(
    'x',
    as_=['x', 'density']
).mark_line(filled=False).encode(
    alt.X('x:Q'),
    alt.Y('density:Q')
)

In [107]:
histogram_chart

In [141]:
data_range = np.linspace(148, 149, 100)

In [142]:
# Real sampling distribution calculated analytically
# X_mean = N(miu, n^-2)

real_distribution = lognorm.pdf(x=data_range, s=n**(-2), scale=math.exp(miu))

In [169]:
true_histogram_data = pd.DataFrame({
        'x': data_range,
        'y': real_distribution
        })

In [170]:
true_histogram_chart = alt.Chart(true_histogram_data).mark_line(filled=False).encode(
    alt.X('x'),
    alt.Y('y')
)

In [171]:
true_histogram_chart

In [173]:
true_histogram_chart | histogram_chart