# Chapter 3 Problems

#### Notebook setup

In [1]:
# load Python modules
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Useful colors
snspal = sns.color_palette()
blue, orange, purple = snspal[0], snspal[1], snspal[4]
# red = sns.color_palette("tab10")[3]

# High-resolution please
%config InlineBackend.figure_format = 'retina'

# Where to store figures
DESTDIR = "figures/stats/intro_to_NHST"

In [3]:
# set random seed for repeatability
np.random.seed(42)

$\def\stderr#1{\mathbf{se}_{#1}}$
$\def\stderrhat#1{\hat{\mathbf{se}}_{#1}}$
$\newcommand{\Mean}{\textbf{Mean}}$
$\newcommand{\Var}{\textbf{Var}}$
$\newcommand{\Std}{\textbf{Std}}$
$\newcommand{\Freq}{\textbf{Freq}}$
$\newcommand{\RelFreq}{\textbf{RelFreq}}$
$\newcommand{\DMeans}{\textbf{DMeans}}$
$\newcommand{\Prop}{\textbf{Prop}}$
$\newcommand{\DProps}{\textbf{DProps}}$

$$
\newcommand{\CI}[1]{\textbf{CI}_{#1}}
\newcommand{\CIL}[1]{\textbf{L}_{#1}}
\newcommand{\CIU}[1]{\textbf{U}_{#1}}
\newcommand{\ci}[1]{\textbf{ci}_{#1}}
\newcommand{\cil}[1]{\textbf{l}_{#1}}
\newcommand{\ciu}[1]{\textbf{u}_{#1}}
$$


(this cell contains the macro definitions like $\stderr{\overline{\mathbf{x}}}$, $\stderrhat{}$, $\Mean$, ...)

#### Problem NN: alt t-test for the mean of Batch 04 (Example 1BT)

In [4]:
muK0 = 1000   # population mean (expected kombucha volume)

In [5]:
kombucha = pd.read_csv("../datasets/kombucha.csv")
ksample04 = kombucha[kombucha["batch"]==4]["volume"]
n04 = len(ksample04)
obsmean04 = np.mean(ksample04)

In [6]:
# bootstrap estimate for standard error of the mean
from stats_helpers import gen_boot_dist

np.random.seed(42)
kbars_boot04 = gen_boot_dist(ksample04, estfunc=np.mean)
sehat_boot04 = np.std(kbars_boot04)
sehat_boot04

1.225161704465105

In [7]:
# compute the t statistic using bootstrap se
obst04bt = (obsmean04 - muK0) / sehat_boot04
obst04bt

3.1289747190340322

In [8]:
from scipy.stats import t as tdist
from stats_helpers import tailprobs
rvT04 = tdist(n04 - 1)
pvalue04bt = tailprobs(rvT04, obst04bt, alt="two-sided")
pvalue04bt

0.003314349648233716

The $p$-value is very small,
so our decision is to reject $H_0$.

### Problem NN: alt t-test for the mean of Batch 01 (Example 2BT)

In [9]:
muK0 = 1000   # population mean (expected kombucha volume)

In [10]:
kombucha = pd.read_csv("../datasets/kombucha.csv")
ksample01 = kombucha[kombucha["batch"]==1]["volume"]
n01 = len(ksample01)
obsmean01 = np.mean(ksample01)

In [11]:
# bootstrap estimate for standard error of the mean
from stats_helpers import gen_boot_dist
np.random.seed(42)
kbars_boot01 = gen_boot_dist(ksample01, estfunc=np.mean)
sehat_boot01 = np.std(kbars_boot01)
sehat_boot01

1.530831183342292

In [12]:
# compute the t statistic using bootstrap se
obst01bt = (obsmean01 - muK0) / sehat_boot01
obst01bt

-0.5854662550335628

In [13]:
from scipy.stats import t as tdist
from stats_helpers import tailprobs
rvT01 = tdist(n01-1)
pvalue01bt = tailprobs(rvT01, obst01bt, alt="two-sided")
pvalue01bt

0.5616069624592427

The $p$-value is very large,
so we have no reason to reject $H_0$.