In [1]:
import numpy as np

#### Lecture Note
* When null is true, F = 1.
* This example illustrates ANOVA test on 4 samples of different sizes.


<img src="figs/anova.png" alt="Drawing" style="height: 500px;"/>

In [2]:
p = np.array([1.5, 1.3, 1.8, 1.6, 1.3])
x = np.array([1.6, 1.7, 1.9, 1.2])
y = np.array([2.0, 1.4, 1.5, 1.5, 1.8, 1.7, 1.4])
z = np.array([2.9, 3.1, 2.8, 2.7])

combined = [p, x, y, z]

k = len(combined)
N = sum(len(g) for g in combined)
ns = list(map(len, combined))

##### Degree of freedom

In [3]:
df_between = k - 1
df_within = N - k # aka df error term
df_total = N - 1

#### Sample means

In [4]:
# grand mean
mu_grand = np.mean(np.concatenate(combined))

# sample means
mu_g = list(map(np.mean, combined))
mu_p, mu_x, mu_y, mu_z = mu_g

print("""
mu_p = %.2f
mu_x = %.2f
mu_y = %.2f
mu_z = %.2f
mu_grand = %.2f
"""%(mu_p, mu_x, mu_y, mu_z, mu_grand))


mu_p = 1.50
mu_x = 1.60
mu_y = 1.61
mu_z = 2.88
mu_grand = 1.84



##### Between sample variation
* Numerator of F-statistic.
* The greater the variation between sample, the __more__ likely to reject null.

In [5]:
ss_between = sum(n * (mu_sample - mu_grand) ** 2 for mu_sample, n in zip(mu_g, ns))
ss_between

5.449428571428573

##### Within sample variation (aka error term)
* Denominator of F-statistic.
* The greater the variation between sample, the __less__ likely to reject null.

In [6]:
ss_within = sum(sum((sample - mu_sample) ** 2) for sample, mu_sample in zip(combined, mu_g))
ss_within

0.8360714285714287

##### F-statistic

In [7]:
ms_between = ss_between / df_between
ms_between

1.816476190476191

In [8]:
ms_within = ss_within / df_within
ms_within

0.05225446428571429

In [9]:
F = ms_between / ms_within
F

34.7621244482415

##### Critical value [lookup](http://www.socr.ucla.edu/applets.dir/f_table.html)

In [10]:
# lookup critical value from table, with DDOF = (df_between, df_within), alpha = 0.05
F_c = 3.2389

In [11]:
if F < F_c:
    print("reject null, with F-score = %.2f"%F)
else:
    print("cannot reject null, with F-score = %.2f"%F)

cannot reject null, with F-score = 34.76


##### Effect sizes
* $\eta^2$: what proportion of difference is attributable to groups (i.e. different treatments)? This is similar to $r^2$ in t-test.
* cohen's d: must be calculated for each pair; difference in sample mean divided by $\sqrt{MS_{within}}$ (analogous to standard deviation)

In [12]:
# eta
ss_between / (ss_within + ss_between)

0.8669841017307408

In [13]:
# cohen's d, between placebo and x group
abs(mu_p - mu_x) / np.sqrt(ms_within)

0.43745995117849606

##### Tuky's HSD
* Lookup [here](https://www2.stat.duke.edu/courses/Spring98/sta110c/qtable.html) with treatment $k=4$ (placebo is counted as a treatment) and degree of freedom for error term (df_within) $N-k=16$.
* Formally defined as: $q^*\sqrt{MS_{within} / n}$

In [14]:
q = 3.65 # alpha = 0.05
hsd = q * np.sqrt(ms_within / min(ns)) # use smallest sample size (conservative)

hsd

0.4171810459639881