In [5]:
# ------------
#  APPROACH 1
# ------------

import numpy as np 
from scipy.stats import norm

s = 2     # standard deviation
x = 10    # mean
n = [5, 10, 20, 40, 80, 160, 1000]

SEM = s / np.sqrt(n)   

z_score = norm.ppf(0.975)

CI_lower = x - SEM * z_score
CI_upper = x + SEM * z_score

for i, ni in enumerate(n):
    print(f"n = {ni}:  CI = ({CI_lower[i]:.3f}, {CI_upper[i]:.3f})")

n = 5:  CI = (8.247, 11.753)
n = 10:  CI = (8.760, 11.240)
n = 20:  CI = (9.123, 10.877)
n = 40:  CI = (9.380, 10.620)
n = 80:  CI = (9.562, 10.438)
n = 160:  CI = (9.690, 10.310)
n = 1000:  CI = (9.876, 10.124)


In [6]:
# ------------
#  APPROACH 2
# ------------

import numpy as np 
from scipy.stats import t

s = 2     # standard deviation
x = 10    # mean
n = np.array([5, 10, 20, 40, 80, 160, 1000])

SEM = s / np.sqrt(n)   

t_score = t.ppf(0.975, df=n-1)

CI_lower = x - SEM * t_score
CI_upper = x + SEM * t_score

for i, ni in enumerate(n):
    print(f"n = {ni}:  CI = ({CI_lower[i]:.3f}, {CI_upper[i]:.3f})")

n = 5:  CI = (7.517, 12.483)
n = 10:  CI = (8.569, 11.431)
n = 20:  CI = (9.064, 10.936)
n = 40:  CI = (9.360, 10.640)
n = 80:  CI = (9.555, 10.445)
n = 160:  CI = (9.688, 10.312)
n = 1000:  CI = (9.876, 10.124)


In [7]:
# ------------
#  APPROACH 3
# ------------

import numpy as np

mu = 10   
sigma = 2 
n_array = np.array([5, 10, 20, 40, 80, 160, 1000])

n_boot = 10000   # number resamples
rng = np.random.default_rng(42)  

results = []

for n in n_array:
    data = rng.normal(mu, sigma, n)
    
    # bootstrap resampling
    boot_means = []
    for _ in range(n_boot):
        sample = rng.choice(data, size=n, replace=True)
        boot_means.append(sample.mean())
    
    boot_means = np.array(boot_means)
    
    # CI = 2.5th and 97.5th percentile
    CI_lower, CI_upper = np.percentile(boot_means, [2.5, 97.5])
    
    results.append((n, data.mean(), CI_lower, CI_upper))

# print results
for n, mean_obs, l, u in results:
    print(f"n={n:4d}, observed mean={mean_obs:.3f}, bootstrap CI=({l:.3f}, {u:.3f})")

n=   5, observed mean=9.602, bootstrap CI=(7.543, 11.475)
n=  10, observed mean=9.732, bootstrap CI=(8.637, 10.970)
n=  20, observed mean=9.474, bootstrap CI=(8.533, 10.507)
n=  40, observed mean=10.698, bootstrap CI=(9.983, 11.404)
n=  80, observed mean=9.793, bootstrap CI=(9.298, 10.292)
n= 160, observed mean=9.921, bootstrap CI=(9.595, 10.251)
n=1000, observed mean=9.932, bootstrap CI=(9.807, 10.057)


In [8]:
# ------------
#  APPROACH 4
# ------------
#technically the same as student's t distribution?

import numpy as np 
from scipy.stats import t

s = 2     # standard deviation
x = 10    # mean
n = np.array([5, 10, 20, 40, 80, 160, 1000])

SEM = s / np.sqrt(n)   

t_score = t.ppf(0.975, df=n-1)

CI_lower = x - SEM * t_score
CI_upper = x + SEM * t_score

for i, ni in enumerate(n):
    print(f"n = {ni}:  CI = ({CI_lower[i]:.3f}, {CI_upper[i]:.3f})")


n = 5:  CI = (7.517, 12.483)
n = 10:  CI = (8.569, 11.431)
n = 20:  CI = (9.064, 10.936)
n = 40:  CI = (9.360, 10.640)
n = 80:  CI = (9.555, 10.445)
n = 160:  CI = (9.688, 10.312)
n = 1000:  CI = (9.876, 10.124)
