In [None]:
from datascience import *
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import numpy as np

# Bootstrap Case Study: Mean Maternal Age

**Please run all cells before this cell, including the import cell at the top of the notebook.**

In [None]:
births = Table.read_table('baby.csv')
births.show(5)

In [None]:
births.hist('Maternal Age')

In [None]:
sample_mean = np.mean(births.column('Maternal Age'))
sample_mean

## Bootstrap confidence interval

**Please run all cells before this cell, including the import cell at the top of the notebook.**

In [None]:
def one_bootstrap_mean():
    return np.mean(births.sample(births.num_rows, with_replacement = True).column('Maternal Age'))

In [None]:
bootstrap_means = make_array()

for i in np.arange(1000):
    new_mean = one_bootstrap_mean()
    bootstrap_means = np.append(bootstrap_means, new_mean)
    
left = percentile(2.5, bootstrap_means)
right = percentile(97.5, bootstrap_means)

In [None]:
Table().with_column('Bootstrap means', bootstrap_means).hist()
plots.plot([left,right], [0,0], color="gold", lw=3, zorder=1);
plots.scatter(sample_mean,0,color="blue", zorder=2);
print("Our Sample Mean = ", sample_mean)
print("A 95% Bootstrap CI for the Mean = [", left, ",", right, "]")

## Use Bootstrap CI for hypothesis testing

**Please run all cells before this cell, including the import cell at the top of the notebook.**

### Hypotheses

#### Null hypothesis: population average mother age is 30
#### Alternative hypothesis: population average mother age is NOT 30

### Methods:
1. Determine the cutoff for P-value: p%
2. Construct a (100-p)% Bootstrap confidence interval for the population average
3. Make decision
    - If x is not in the interval, reject the null
    - If x is in the interval, do not reject the null

#### Example 1: cutoff for P-value is 5%

Since the cutoff is 5%, we need to construct a (100-5)% = 95% Bootstrap confidence interval.

In [None]:
bootstrap_means = make_array()

for i in np.arange(1000):
    new_mean = one_bootstrap_mean()
    bootstrap_means = np.append(bootstrap_means, new_mean)
    
left = percentile(2.5, bootstrap_means)
right = percentile(97.5, bootstrap_means)

Table().with_column('Bootstrap means', bootstrap_means).hist()
plots.plot([left,right], [0,0], color="gold", lw=3, zorder=1);
plots.scatter(sample_mean,0,color="blue", zorder=2);
print("A 95% Bootstrap CI for the Mean = [", left, ",", right, "]")

##### Conclusion: 
30 is not in the 95% Bootstrap confidence interval, therefore we reject the null

#### Example 2: cutoff for P-value is 1%

Since the cutoff is 1%, we need to construct a (100-1)% = 99% Bootstrap confidence interval.

In [None]:
bootstrap_means = make_array()

for i in np.arange(1000):
    new_mean = one_bootstrap_mean()
    bootstrap_means = np.append(bootstrap_means, new_mean)
    
left = percentile(0.5, bootstrap_means)
right = percentile(99.5, bootstrap_means)

Table().with_column('Bootstrap means', bootstrap_means).hist()
plots.plot([left,right], [0,0], color="gold", lw=3, zorder=1);
plots.scatter(sample_mean,0,color="blue", zorder=2);
print("A 99% Bootstrap CI for the Mean = [", left, ",", right, "]")

##### Conclusion: 
30 is not in the 99% Bootstrap confidence interval, therefore we reject the null