In [33]:
import numpy as np
import altair as alt
import pandas as pd
import scipy
from scipy.stats import norm, lognorm
import math
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [34]:
n = 100

In [35]:
alpha = 0.05

In [36]:
miu = 5

In [37]:
B = 10000

In [38]:
real_theta = math.exp(miu)

In [39]:
x_list = np.random.normal(miu, 1, n)

In [57]:
# MLE and non-parametric estimator are equal in this case
miu_hat = np.mean(x_list)
theta_hat = math.exp(miu_hat)

In [58]:
non_parametric_bootstraps = []
for i in range(B):
    theta_hat_b = math.exp(np.mean(np.random.choice(x_list, replace=True, size=len(x_list))))
    non_parametric_bootstraps.append(theta_hat_b)

In [59]:
non_parametric_se = math.sqrt(np.var(non_parametric_bootstraps))

In [60]:
print("Estimated Standard Error non-parametric:", non_parametric_se)

Estimated Standard Error non-parametric: 17.00524139920685


In [61]:
z_alpha_2 = norm.ppf(1-(alpha/2))

In [62]:
normal_confidence_interval = (
    theta_hat - (z_alpha_2 * non_parametric_se), 
    theta_hat + (z_alpha_2 * non_parametric_se)
)

In [63]:
print("True Theta:", real_theta)

True Theta: 148.4131591025766


In [64]:
print("Estimated Theta:", theta_hat)

print(f"{(1-alpha)*100}% Confidence Interval non-parametric:")

print("Normal:", normal_confidence_interval)

Estimated Theta: 169.73466091919482
95.0% Confidence Interval non-parametric:
Normal: (136.40500022833987, 203.06432161004977)


In [71]:
parametric_bootstraps = []
for i in range(B):
    theta_hat_b = math.exp(np.mean(np.random.normal(miu_hat, 1, size=len(x_list))))
    parametric_bootstraps.append(theta_hat_b)

In [72]:
parametric_se = math.sqrt(np.var(parametric_bootstraps))

In [73]:
print("Estimated Standard Error parametric:", parametric_se)

Estimated Standard Error parametric: 16.927830102684602


In [74]:
print("True Theta:", real_theta)

True Theta: 148.4131591025766


In [75]:
normal_confidence_interval_parametric = (
    theta_hat - (z_alpha_2 * parametric_se), 
    theta_hat + (z_alpha_2 * parametric_se)
)

In [76]:
print("Estimated Theta (MLE):", theta_hat)

print(f"{(1-alpha)*100}% Confidence Interval parametric:")

print("Normal:", normal_confidence_interval_parametric)

Estimated Theta (MLE): 169.73466091919482
95.0% Confidence Interval parametric:
Normal: (136.55672358152003, 202.91259825686961)


In [77]:
# Delta method SE computed analytically

se_delta_method = (math.exp(miu_hat)) / math.sqrt(n)

In [78]:
normal_confidence_interval_delta_method = (
    theta_hat - (z_alpha_2 * se_delta_method), 
    theta_hat + (z_alpha_2 * se_delta_method)
)

In [79]:
print("Estimated Theta (MLE):", theta_hat)

print(f"{(1-alpha)*100}% Confidence Interval delta method:")

print("Normal:", normal_confidence_interval_delta_method)

Estimated Theta (MLE): 169.73466091919482
95.0% Confidence Interval delta method:
Normal: (136.46727868622082, 203.00204315216882)


In [80]:
print("Estimated Theta (MLE):", theta_hat)

print(f"{(1-alpha)*100}% Confidence Intervals:")
print("Delta Method:", normal_confidence_interval_delta_method)
print("Parametric:", normal_confidence_interval_parametric)
print("Non-parametric:", normal_confidence_interval)

Estimated Theta (MLE): 169.73466091919482
95.0% Confidence Intervals:
Delta Method: (136.46727868622082, 203.00204315216882)
Parametric: (136.55672358152003, 202.91259825686961)
Non-parametric: (136.40500022833987, 203.06432161004977)


In [98]:
histogram_data_non_parametric = pd.DataFrame({
        'x': non_parametric_bootstraps,
        })
histogram_data_parametric = pd.DataFrame({
        'x': parametric_bootstraps,
        })

delta_data_range = np.linspace(110, 240, 400)
delta_method_data = pd.DataFrame({
        'x': delta_data_range,
        'y': [norm.pdf(x, loc=theta_hat, scale=se_delta_method) 
            for x in delta_data_range]
        })

In [107]:
histogram_chart_non_parametric = alt.Chart(histogram_data_non_parametric).transform_density(
    'x',
    as_=['x', 'density']
).mark_line(filled=False, color='blue').encode(
    alt.X('x:Q'),
    alt.Y('density:Q')
)

histogram_chart_parametric = alt.Chart(histogram_data_parametric).transform_density(
    'x',
    as_=['x', 'density']
).mark_line(filled=False, color='red').encode(
    alt.X('x:Q'),
    alt.Y('density:Q')
)

distribution_chart_delta = alt.Chart(delta_method_data).mark_line(filled=False, color='green').encode(
    alt.X('x:Q'),
    alt.Y('y:Q')
)


In [100]:
data_range = np.linspace(148, 149, 100)

In [101]:
# Real sampling distribution calculated analytically
# X_mean = N(miu, n^-2)

real_distribution = lognorm.pdf(x=data_range, s=n**(-2), scale=math.exp(miu))

In [102]:
true_histogram_data = pd.DataFrame({
        'x': data_range,
        'y': real_distribution
        })

In [108]:
true_histogram_chart = alt.Chart(true_histogram_data).mark_line(filled=False, color='black').encode(
    alt.X('x'),
    alt.Y('y')
)

In [113]:
histogram_chart_non_parametric + histogram_chart_parametric + distribution_chart_delta | true_histogram_chart