In [9]:
#The Logic of Confidence Intervals

import pandas as pd
import numpy as np
import scipy.stats as stats

population = np.random.normal(loc=100, scale=20, size=10000)

population_mean = np.mean(population)
population_std = np.std(population)

num_samples = 100
sample_size = 20

sample_stats = []

for i in range(num_samples):
    sample = np.random.choice(population, size=sample_size, replace=False)
    sample_mean = np.mean(sample)
    se = population_std / np.sqrt(sample_size)
    ci = stats.norm.interval(0.95, loc=sample_mean, scale=se)
    
    mean_in_interval = ci[0] <= population_mean <= ci[1]

    sample_stats.append((ci[0], sample_mean, ci[1], mean_in_interval))


table = pd.DataFrame(sample_stats, columns=["Lower Bound", "Sample Mean", "Upper Bound", "Mean in CI"])
table.index.name = 'Sample'
table.reset_index(inplace=True)
print(table)

print(f'The population mean is {population_mean}')
mean_of_means = table['Sample Mean'].mean()
print(f'The mean of the sample means is {mean_of_means}')
max_upper = table['Upper Bound'].max()
min_lower = table['Lower Bound'].min()
print(f'The max upper bound is {max_upper}')
print(f'The min lower bound is {min_lower}')

true_count = table["Mean in CI"].sum()
false_count = len(table) - true_count
percentage_true = (true_count / len(table)) * 100
percentage_false = (false_count / len(table)) * 100
print(f'The percentage of intervals containing the true parameter is {percentage_true}') 
print(f'The percentage of intervals not containing the true parameter is {percentage_false}')

    Sample  Lower Bound  Sample Mean  Upper Bound  Mean in CI
0        0    89.374175    98.072060   106.769946        True
1        1    79.851535    88.549420    97.247305       False
2        2    92.833545   101.531431   110.229316        True
3        3    92.083337   100.781222   109.479108        True
4        4    85.386925    94.084810   102.782696        True
..     ...          ...          ...          ...         ...
95      95    91.429776   100.127661   108.825546        True
96      96    96.240591   104.938477   113.636362        True
97      97    87.846538    96.544423   105.242309        True
98      98   100.403973   109.101858   117.799744       False
99      99    91.201524    99.899409   108.597295        True

[100 rows x 5 columns]
The population mean is 99.39449335694499
The mean of the sample means is 98.73831185354564
The max upper bound is 120.23938658563334
The min lower bound is 79.38998283577895
The percentage of intervals containing the true parameter 

In [7]:
import numpy as np
from scipy.stats import norm
from scipy.stats import t

x = [1, 2, 3, 4, 5, 6, 7]

#x = np.random.normal(loc=100, scale=20, size=300)

def confidence_intervals(series, alpha):
    n = len(series)
    dof = n - 1
    mean = np.mean(series)
    std = np.std(series)
    se = std / np.sqrt(n)
    if len(series) >= 30:
        cv = stats.norm.ppf(1-(alpha/2))
        test = "Z"
    else:
        cv = stats.t.ppf(1-(alpha/2), df=dof)
        test = "T"
    moe = cv * se
    upper = mean + moe
    lower = mean - moe

    print(f'The sample size is {n}.')
    print(f'The standard deviation is {std}')
    print(f'The sample mean is {mean}.')
    print(f'Alpha is {alpha}')
    print(f'The confidence level is {1-alpha}')
    print(f'The test is {test}')
    print(f'The CV is {cv}')
    print(f'The sample mean has a margin of error of: {moe}.')
    print(f'The sample mean has an upper confidence interval of: {upper}')
    print(f'The sample mean has a lower confidence interval of {lower}')

#confidence_intervals(x,0.05)

alphas = [0.1, 0.05, 0.01]

for i in alphas:
    confidence_intervals(x,i)

The sample size is 7.
The standard deviation is 2.0
The sample mean is 4.0.
Alpha is 0.1
The confidence level is 0.9
The test is T
The CV is 1.9431802803927816
The sample mean has a margin of error of: 1.46890622128116.
The sample mean has an upper confidence interval of: 5.46890622128116
The sample mean has a lower confidence interval of 2.53109377871884
The sample size is 7.
The standard deviation is 2.0
The sample mean is 4.0.
Alpha is 0.05
The confidence level is 0.95
The test is T
The CV is 2.4469118487916806
The sample mean has a margin of error of: 1.8496914948571628.
The sample mean has an upper confidence interval of: 5.849691494857163
The sample mean has a lower confidence interval of 2.1503085051428372
The sample size is 7.
The standard deviation is 2.0
The sample mean is 4.0.
Alpha is 0.01
The confidence level is 0.99
The test is T
The CV is 3.707428021324907
The sample mean has a margin of error of: 2.802552156599421.
The sample mean has an upper confidence interval of: 6.

In [3]:
import scipy.stats as stats
alpha=.10
cv = stats.norm.ppf(1-(alpha/2))
print(cv)
ci = stats.norm.interval(0.95, loc=sample_mean, scale=se)
print(ci)

1.6448536269514722
(92.68420112704213, 110.16269274208715)
