In [14]:
#The Logic of Confidence Intervals

import pandas as pd
import numpy as np
import scipy.stats as stats

population = np.random.normal(loc=100, scale=20, size=10000)

population_mean = np.mean(population)
population_std = np.std(population)

num_samples = 100
sample_size = 20

sample_stats = []

for i in range(num_samples):
    sample = np.random.choice(population, size=sample_size, replace=False)
    sample_mean = np.mean(sample)
    se = population_std / np.sqrt(sample_size)
    ci = stats.norm.interval(0.95, loc=sample_mean, scale=se)
    
    mean_in_interval = ci[0] <= population_mean <= ci[1]

    sample_stats.append((ci[0], sample_mean, ci[1], mean_in_interval))


table = pd.DataFrame(sample_stats, columns=["Lower Bound", "Sample Mean", "Upper Bound", "Mean in CI"])
table.index.name = 'Sample'
table.reset_index(inplace=True)
print(table)

print(f'The population mean is {population_mean}')
mean_of_means = table['Sample Mean'].mean()
print(f'The mean of the sample means is {mean_of_means}')
max_upper = table['Upper Bound'].max()
min_lower = table['Lower Bound'].min()
print(f'The max upper bound is {max_upper}')
print(f'The min lower bound is {min_lower}')

true_count = table["Mean in CI"].sum()
false_count = len(table) - true_count
percentage_true = (true_count / len(table)) * 100
percentage_false = (false_count / len(table)) * 100
print(f'The percentage of intervals containing the true parameter is {percentage_true}') 
print(f'The percentage of intervals not containing the true parameter is {percentage_false}')

    Sample  Lower Bound  Sample Mean  Upper Bound  Mean in CI
0        0    89.690966    98.439678   107.188390        True
1        1    88.627337    97.376050   106.124762        True
2        2    97.958410   106.707122   115.455834        True
3        3    99.177348   107.926060   116.674773        True
4        4    86.163881    94.912594   103.661306        True
..     ...          ...          ...          ...         ...
95      95    84.072372    92.821084   101.569796        True
96      96    90.629894    99.378606   108.127318        True
97      97    93.908385   102.657097   111.405810        True
98      98    90.290667    99.039380   107.788092        True
99      99    90.285017    99.033729   107.782442        True

[100 rows x 5 columns]
The population mean is 100.28916533674712
The mean of the sample means is 100.28068240000336
The max upper bound is 126.78916426248327
The min lower bound is 79.71011438387191
The percentage of intervals containing the true paramete

In [74]:
import numpy as np
from scipy.stats import norm
from scipy.stats import t

x = [1, 2, 3, 4, 5, 6, 7]

x = np.random.normal(loc=100, scale=20, size=300)

def confidence_intervals(series, alpha):
    n = len(series)
    dof = n - 1
    mean = np.mean(series)
    std = np.std(series)
    se = std / np.sqrt(n)
    if len(series) >= 30:
        cv = stats.norm.ppf(1-(alpha/2))
        test = "Z"
    else:
        cv = stats.t.ppf(1-(alpha/2), df=df)
        test = "T"
    moe = cv * se

    print(f'The sample size is {n}.')
    print(f'The standard deviation is {std}')
    print(f'The sample mean is {mean}.')
    print(f'The test is {test}')
    print(f'The CV is {cv}')
    print(f'The sample mean has a margin of error of: {moe}.')
    print(f'The sample mean has an upper confidence interval of: {mean + moe}')
    print(f'The sample mean has a lower confidence interval of {mean - moe}')

confidence_intervals(x,0.05)

levels = [0.90, 0.95, 0.99]

for i in levels:
    confidence_intervals(x,i)

The sample size is 300.
The standard deviation is 19.879444758346796
The sample mean is 100.05007244414492.
The test is Z
The CV is 1.959963984540054
The sample mean has a margin of error of: 2.249529608990056.
The sample mean has an upper confidence interval of: 102.29960205313498
The sample mean has a lower confidence interval of 97.80054283515486


In [58]:
levels = [0.90, 0.95, 0.99]

for i in levels:
    confidence_intervals(x,i)

-1.9599639845400545