In [None]:
import matplotlib
from datascience import *
%matplotlib inline
import matplotlib.pyplot as plots
import numpy as np
from _collections_abc import Iterator
plots.style.use('fivethirtyeight')

# Lecture 24

## Review: Lecture 20
From a random sample, construct a 95% confidence interval for the ages of the mothers in the population.

In [None]:
# original sample

births = Table.read_table('baby.csv')
births.show(3)

In [None]:
def one_bootstrap_mean():
    resample = births.sample()
    return np.average(resample.column('Maternal Age'))

In [None]:
# Generate means from 3000 bootstrap samples
num_repetitions = 3000
bstrap_means = make_array()
for i in np.arange(num_repetitions):
    bstrap_means = np.append(bstrap_means, one_bootstrap_mean())

In [None]:
# Get the endpoints of the 95% confidence interval
left = percentile(2.5, bstrap_means)
right = percentile(97.5, bstrap_means)

print(f"Left End: {left}")
print(f"Right End: {right}")

In [None]:
resampled_means = Table().with_columns(
    'Bootstrap Sample Mean', bstrap_means
)
resampled_means.hist(bins=15)
plots.plot([left, right], [0, 0], color='yellow', lw=8, label='95% CI')
plots.legend();

## Method Based on CLT

In [None]:
sampled_ages = births.column('Maternal Age')
sample_size = len(sampled_ages)
sample_average = np.average(sampled_ages)
sample_SD = np.std(sampled_ages)

print(f"Sample Size: {sample_size}")
print(f"Sample Average: {sample_average}")
print(f"Sample SD: {sample_SD}")

### Approximating the confidence interval

We need to add $2 \cdot ( \frac{ {Population\;SD}}{\sqrt{ {Sample\;Size}}})$ on one side and subtract it on the other side to get the confidence interval but we don't have the population SD.

In [None]:
# Try estimating it from the sample

estimated_SD_of_sample_average = sample_SD / (sample_size**0.5)
estimated_SD_of_sample_average

In [None]:
# Approximate 95% confidence interval for population mean

sample_average - 2*estimated_SD_of_sample_average, sample_average + 2*estimated_SD_of_sample_average

In [None]:
# Compare to Bootstrap 
print(f"Left End: {left}")
print(f"Right End: {right}")

<br><br><br><br>

---

Return to Slides

---

<br><br><br><br>

## SD of 0/1 population

In [None]:
# population of size 10

number_of_ones = 4
pop_size = 10
zero_one_population = np.append(np.ones(number_of_ones), np.zeros(pop_size - number_of_ones))

print(f"Population: {zero_one_population}")
print(f"Standard Deviation: {np.round(np.std(zero_one_population),2)}")

In [None]:
def sd_of_zero_one_population(number_of_ones):
    """Returns the SD of a population 
    that has 10 elements: num_ones with value 1 and (10 - num_ones) with value 0"""
    zero_one_population = np.append(np.ones(number_of_ones), 
                                    np.zeros(pop_size - number_of_ones))
    return np.std(zero_one_population)

In [None]:
possible_ones = np.arange(pop_size+1)
zero_one_pop = Table().with_columns(
    'Number of Ones', possible_ones,
    'Proportion of Ones', possible_ones / pop_size
)
zero_one_pop.show()

In [None]:
sds = zero_one_pop.apply(sd_of_zero_one_population, 'Number of Ones')
zero_one_pop = zero_one_pop.with_column('Pop SD', sds)
zero_one_pop.show()

In [None]:
from _collections_abc import Iterator
zero_one_pop.scatter('Proportion of Ones', 'Pop SD')

<br><br><br><br>

---

Return to Slides

---

<br><br><br><br>