Compute confidence/credible intervals based on the four methods above for simulated data sampled from a population that is Gaussian distributed with mean u=10 and standard deviation o=2, for n=5, 10, 20, 40, 80, 160, 1000 at a 95% confidence level.

In [2]:
import numpy as np
from scipy.stats import norm
import math


Method 1: The simple, analytic approach with large n and/or known standard deviation.

In [10]:
#Create a list containing the range of sample sizes.
sample_size = [5, 10, 20, 40, 80, 160, 1000]
confidence_level = 0.95
z_critical_value = 1.96     #based on (confidence level)/2 and z-table

#Calculate the confidence interval using z critical values
population_stdev = 2
sample_mean = 10
for x in sample_size:
    random_sample = norm.rvs(size = x, loc = sample_mean, scale = population_stdev)
    sample_mean = np.mean(random_sample)
    standard_error = population_stdev/math.sqrt(x)
    CI_neg = sample_mean - (z_critical_value * standard_error)
    CI_pos = sample_mean + (z_critical_value * standard_error)
    print(f"Given a sample size of {x}, the confidence interval is {CI_neg:4f} to {CI_pos:4f}.")

Given a sample size of 5, the confidence interval is 8.580436 to 12.086590.
Given a sample size of 10, the confidence interval is 8.777994 to 11.257220.
Given a sample size of 20, the confidence interval is 9.059593 to 10.812670.
Given a sample size of 40, the confidence interval is 9.885386 to 11.124999.
Given a sample size of 80, the confidence interval is 10.152924 to 11.029463.
Given a sample size of 160, the confidence interval is 10.289867 to 10.909673.
Given a sample size of 1000, the confidence interval is 10.515073 to 10.762996.


Method 2: The simple, analytic approach with small n and unknown population standard deviation

In [11]:
#Define t-distribution based on t-distribution table
t_distribution = [2.776, 2.262, 2.093, 1.990, 1.974, 1.962]

#Calculate the confidence interval using t-distributions and sample standard deviations
for x, t in zip(sample_size, t_distribution):
    random_sample = norm.rvs(size = x, loc = sample_mean)
    sample_mean = np.mean(random_sample)
    sample_standard_deviation = np.std(random_sample)
    standard_error = sample_standard_deviation/math.sqrt(x)
    CI_neg = sample_mean - (t * standard_error)
    CI_pos = sample_mean + (t * standard_error)
    print(f"Given a sample size of {x}, the confidence interval is {CI_neg:4f} to {CI_pos:4f}.")

Given a sample size of 5, the confidence interval is 9.709815 to 11.347657.
Given a sample size of 10, the confidence interval is 9.752557 to 10.987901.
Given a sample size of 20, the confidence interval is 9.634542 to 10.619209.
Given a sample size of 40, the confidence interval is 9.802453 to 10.331424.
Given a sample size of 80, the confidence interval is 9.938577 to 10.333933.
Given a sample size of 160, the confidence interval is 9.988712 to 10.268703.


Method 3: Bootstrapped confidence intervals

In [25]:
#Generate random data set and same in a list
for x in sample_size:
    random_sample = norm.rvs(size = x, loc = sample_mean)
    random_sample_data = random_sample.tolist()
    print(random_sample_data)

#Calculate bootstrap confidence interval
n_resample = 1000       #number of resamples
confidence_level = 0.95
for x in sample_size:
    bootstrap_resample = [np.random.choice(random_sample_data, size=x, replace=True) for _ in range(n_resample)]      #generate bootstraps
    bootstrap_means = np.mean(bootstrap_resample)
    lower_percentile = (1 - confidence_level) / 2 * 100
    upper_percentile = (1 + confidence_level) / 2 * 100
    CI_neg, CI_pos = np.percentile(bootstrap_means, [lower_percentile, upper_percentile])
    print(f"Bootstrap confidence interval for the mean of sample size {x}: ({CI_neg:.2f}, {CI_pos:.2f})")


[11.766198305472193, 9.155450406324825, 9.328881343787447, 9.547422081576771, 11.419373653317567]
[9.865654051093673, 8.64617079819886, 10.522299022140464, 10.553026452553526, 10.842546873910585, 11.102704361078516, 11.512927213171217, 10.27896566445633, 11.128123017801173, 10.152323372567992]
[9.430771974551694, 9.737227812244898, 9.774701341727312, 10.570022379279765, 9.887872906237728, 10.01953082167794, 10.231707728219781, 9.686232983196692, 9.783743625047721, 11.456877695131967, 10.707016770031327, 9.777521262774616, 11.167053602142737, 9.56728605938687, 9.991712872957834, 10.538148167850277, 8.51940173035919, 10.087161223233304, 8.623514613922136, 10.155820420784618]
[10.05010495377911, 10.11712419041126, 10.201251804781867, 9.905865879208688, 9.272124114477165, 9.438011799211807, 10.032965992224343, 9.17661017562467, 10.579523833437783, 11.47437642643288, 8.586665935554311, 8.313925812014261, 11.55221253931608, 10.829077992351602, 9.977443765048749, 12.026258185603579, 9.1313050

Method 4: Bayesian credible intervals

In [None]:
##I think this method is the same as Method 1, but I am not sure. I do not know how to start this one.