In [8]:
import pandas as pd
import numpy as np
import scipy.stats as stats

population = np.random.normal(loc=0, scale=1, size=10000)

population_mean = np.mean(population)
population_std = np.std(population)

num_samples = 100
sample_size = 20

sample_stats = []

for i in range(num_samples):
    sample = np.random.choice(population, size=sample_size, replace=False)
    sample_mean = np.mean(sample)
    se = population_std / np.sqrt(sample_size)
    ci = stats.norm.interval(0.95, loc=sample_mean, scale=se)
    
    mean_in_interval = ci[0] <= population_mean <= ci[1]

    sample_stats.append((ci[0], sample_mean, ci[1], mean_in_interval))


table = pd.DataFrame(sample_stats, columns=["Lower Bound", "Sample Mean", "Upper Bound", "Mean in CI"])
table.index.name = 'Sample'
table.reset_index(inplace=True)
print(table)

true_count = table["Mean in CI"].sum()
false_count = len(table) - true_count

percentage_true = (true_count / len(table)) * 100
percentage_false = (false_count / len(table)) * 100

print(percentage_true) 
print(percentage_false)


    Sample  Lower Bound  Sample Mean  Upper Bound  Mean in CI
0        0    -0.277686     0.156298     0.590281        True
1        1     0.086373     0.520357     0.954340       False
2        2    -0.732888    -0.298904     0.135079        True
3        3    -0.354032     0.079952     0.513935        True
4        4    -0.646044    -0.212060     0.221923        True
..     ...          ...          ...          ...         ...
95      95    -0.592209    -0.158225     0.275758        True
96      96    -0.144526     0.289457     0.723441        True
97      97    -0.264965     0.169019     0.603002        True
98      98    -0.232584     0.201400     0.635383        True
99      99    -0.629664    -0.195680     0.238303        True

[100 rows x 5 columns]
96.0
4.0
