# Exercise 3.3 | Closeness Between Sample / Population

In this exercise we generate samples and test confidence intervals.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

## Wait Times: Probability of Closeness

How close is the sample mean to the true mean?

In [2]:
mu = 12
sigma = 2.5
n = 30

In [4]:
se = sigma / np.sqrt(n) # Variability in the sample means
se.item()

0.45643546458763845

The probability of being less than 2 standard errors above the population mean


In [None]:
stats.norm.cdf(mu+2*se, mu, se).item()

0.9772498680518208

The probability of being less than 2 standard errors BELOW the population mean


In [None]:
stats.norm.cdf(mu-2*se, mu, se).item()

The probability of being within 2 standard errors of the population mean


In [None]:
stats.norm.cdf(mu+2*se, mu, se).item() - stats.norm.cdf(mu-2*se, mu, se).item()

0.9544997361036416

The probability of being within 1 standard error of the population mean


In [None]:
stats.norm.cdf(mu+se, mu, se).item() - stats.norm.cdf(mu-se, mu, se).item()

0.6826894921370861

#### 90 % confidence interval

In [33]:
# Upper Bound: the upper tail with 5%
upper = stats.norm.ppf(0.95, mu, se).item()

In [34]:
# Lower Bound: the lower tail with 5%
lower = stats.norm.ppf(0.05, mu, se).item()

In [35]:
[lower, upper] # 90% confidence interval

[11.249230470603742, 12.750769529396257]

## Wait Times: Sampling

Lets go all in and test whether a 90% confidence interval truly contains 90% of sample means.

## Class Data

In [39]:
np.random.normal(12, 2.5, 30).mean().item()

12.159326320189054

In [41]:
class_sample_ci = [
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
]

In [42]:
np.mean(class_sample_ci).item()

0.96875

## Simulation Data

In [46]:
# Lets run this many times
sample_size = 100_000
sample_ci = []

for i in range(sample_size):
  sample_mean = np.random.normal(12, 2.5, 30).mean()

  if (sample_mean > lower and sample_mean < upper):
    sample_ci.append(1)
  else:
    sample_ci.append(0)

np.mean(sample_ci).item()

0.89894