# Hypothesis Testing

This notebook demonstrates hypothesis testing, p-values, t-tests, two-sample tests, matched pairs, and practical significance.

## 1. Setup & Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

np.random.seed(42)

## 2. One-Sample z-Test for a Proportion

In [2]:
n = 10
x = 7
phat = x / n
p0 = 0.5

SE = (p0 * (1 - p0) / n) ** 0.5
z = (phat - p0) / SE

p_one_sided = 1 - stats.norm.cdf(z)
p_two_sided = 2 * (1 - stats.norm.cdf(abs(z)))

z, p_one_sided, p_two_sided

(1.2649110640673513, 0.1029516053660342, 0.2059032107320684)

## 3. One-Sample t-Test (Small Sample)

In [3]:
data = np.array([15.6, 15.2, 16.1, 15.8, 15.3])
mu0 = 15

n = len(data)
xbar = data.mean()
s = data.std(ddof=1)
SE = s / (n ** 0.5)

t_stat = (xbar - mu0) / SE
df = n - 1

p_one_sided_t = 1 - stats.t.cdf(t_stat, df=df)
p_two_sided_t = 2 * (1 - stats.t.cdf(abs(t_stat), df=df))

xbar, t_stat, df, p_one_sided_t, p_two_sided_t

(15.6, 3.651483716701099, 4, 0.010871489232618381, 0.021742978465236762)

## 4. Two-Sample z-Test for Proportions

In [4]:
n1 = 1000
p1_hat = 0.55
n2 = 1500
p2_hat = 0.58

diff_hat = p2_hat - p1_hat

SE_diff = (
    p1_hat * (1 - p1_hat) / n1 +
    p2_hat * (1 - p2_hat) / n2
) ** 0.5

z_diff = diff_hat / SE_diff
p_two_sided_diff = 2 * (1 - stats.norm.cdf(abs(z_diff)))

diff_hat, SE_diff, z_diff, p_two_sided_diff

(0.029999999999999916,
 0.020245987256738065,
 1.4817751102760186,
 0.13840014388805066)

## 5. Paired t-Test

In [5]:
husbands = np.array([35, 42, 29, 50, 38])
wives    = np.array([33, 40, 30, 47, 37])

d = husbands - wives

n = len(d)
d_bar = d.mean()
s_d = d.std(ddof=1)
SE_d = s_d / (n ** 0.5)

t_paired = d_bar / SE_d
df = n - 1

p_one_sided_paired = 1 - stats.t.cdf(t_paired, df=df)

d_bar, s_d, t_paired, df, p_one_sided_paired

(1.4, 1.5165750888103102, 2.06418738616856, 4, 0.05396941114613829)

## 6. Sign Test

In [6]:
signs = (husbands > wives).astype(int)

n = len(signs)
x = signs.sum()
phat = x / n

p0 = 0.5
SE_sign = (p0 * (1 - p0) / n) ** 0.5

z_sign = (phat - p0) / SE_sign
p_one_sided_sign = 1 - stats.norm.cdf(z_sign)

z_sign, p_one_sided_sign

(1.341640786499874, 0.08985624743949994)

## 7. Statistical vs Practical Significance

In [7]:
mu_control = 15.00
mu_treatment = 15.05
sigma = 1.0
n_large = 100_000

control = np.random.normal(mu_control, sigma, size=n_large)
treatment = np.random.normal(mu_treatment, sigma, size=n_large)

xbar_c = control.mean()
xbar_t = treatment.mean()

SE_large = ((sigma**2) / n_large + (sigma**2) / n_large) ** 0.5
z_large = (xbar_t - xbar_c) / SE_large
p_two_sided_large = 2 * (1 - stats.norm.cdf(abs(z_large)))

(xbar_c, xbar_t), (xbar_t - xbar_c), z_large, p_two_sided_large

((15.00096686814095, 15.050981034036045),
 0.050014165895095886,
 11.183507477938601,
 0.0)