# Confidence intervals

In [1]:
import numpy as np
from scipy.stats import norm, t

In [10]:
# generate data
np.random.seed(42)

N=1000
mu=5
sigma = 2
X = np.random.randn(N)*sigma+mu

In [11]:
# Z-confidence interval
mu_hat = np.mean(X)
sigma_hat = np.std(X, ddof=1)

z_left = norm.ppf(0.025)
z_right = norm.ppf(0.975)

z_lower = mu_hat + z_left * sigma_hat / np.sqrt(N)
z_upper = mu_hat + z_right * sigma_hat / np.sqrt(N)

print(mu_hat, z_lower, z_upper)

5.0386641116446516 4.917281476837209 5.160046746452094


In [13]:
# t-confidence interval
t_left = t.ppf(0.025, df=N-1)
t_right = t.ppf(0.975, df=N-1)
t_lower = mu_hat+t_left * sigma_hat / np.sqrt(N)
t_upper = mu_hat+t_right * sigma_hat / np.sqrt(N)
print(mu_hat, t_lower, t_upper)

5.0386641116446516 4.917134237206596 5.1601939860827075


### Interpreptation of confidence interval
If we do this experiment many times, then for the 95% CI, the 95%CI should contain the true value 95% of the time

In [28]:
def experiment():
    X = np.random.randn(N)*sigma + mu
    mu_hat = np.mean(X)
    sigma_hat = np.std(X, ddof=1)
    t_left = t.ppf(0.025, df=N-1)
    t_right = t.ppf(0.975, df=N-1)
    lower = mu_hat + t_left * sigma_hat / np.sqrt(N)
    upper = mu_hat + t_right * sigma_hat/ np.sqrt(N)

    return mu > lower and mu < upper

def multi_experiment(M):
    results = [experiment() for _ in range(M)]
    return np.mean(results)

In [31]:
multi_experiment(10000)

0.946