<a href="https://colab.research.google.com/github/tngzng/latex_to_python/blob/main/anova.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# analysis of variance (anova)

In [3]:
from typing import *

GROUPS = [
  [1.0, 2.0, 3.0],
  [2.0, 3.0, 4.0],
  [3.0, 4.0, 5.0],
]

## sample mean
sample mean of jth group

$\bar{y_j} = 1/n_j \sum_{i=1}^{n_j} y_{ij} $

In [4]:
def sample_mean(observations: List[float]) -> float:
  return sum(observations) / len(observations)

observations = GROUPS[0]
sample_mean(observations)

2.0

## grand mean
overall sample mean

$ \bar{\bar{y}} = 1/N \sum_{j=1}^k \sum_{i=1}^{n_j} y_{ij} $

where:
- N is the total observations (across all groups)

In [5]:
def overall_mean(groups: List[List[float]]) -> float:
  group_sums = [sum(g) for g in groups]
  N = sum([len(g) for g in groups])
  return sum(group_sums) / N

overall_mean(GROUPS)

3.0

## treatment sum of squares
difference of jth group mean to overall mean

$\mathrm{SST} = \sum_j \left( \bar{y_j} - \bar{\bar{y}} \right) ^ 2$

In [6]:
def treatment_sum_of_squares(groups: List[List[float]]) -> float:
  means = [sample_mean(g) for g in groups]
  grand_mean = overall_mean(groups)
  squared_dists = [(mean - grand_mean)**2 for mean in means]
  return sum(squared_dists)

treatment_sum_of_squares(GROUPS)

2.0

## error sum of squares 
difference of each observation and its corresponding group mean

$\mathrm{SSE} = \sum_j \sum_i \left( y_{ij} - \bar{y_j} \right) ^ 2$ 

In [7]:
def error_sum_of_squares(groups: List[List[float]]) -> float:
  errors = []
  for group in groups: 
    mean = sample_mean(group) 
    squared_dists = [(i - mean)**2 for i in group]
    errors.append(sum(squared_dists))
  
  return sum(errors)

error_sum_of_squares(GROUPS)

6.0

## treatment mean square
measures the variability of the treatment means $\bar{y_j}$

$\mathrm{MST} = \mathrm{SST} / (k - 1)$

where:
- k is the number of groups

In [8]:
def treatment_mean_square(groups: List[List[float]]) -> float:
  sum_of_squares = treatment_sum_of_squares(groups)
  k = len(groups)
  return sum_of_squares / (k - 1)

treatment_mean_square(GROUPS)

1.0

## error mean square
measures the variability within groups

$\mathrm{MSE} = \mathrm{SSE} / (N - k)$

where:
- N is the total observations (across all groups)
- k is the number of groups


In [9]:
def error_mean_square(groups: List[List[float]]) -> float:
  sum_of_squares = error_sum_of_squares(groups)
  N = sum([len(g) for g in groups])
  k = len(groups)
  return sum_of_squares / (N - k)

error_mean_square(GROUPS)

1.0

## f-test
compares variability between treatment means to the variability within the groups. 

(large values of F suggest large variation between the groups.)

$F = \mathrm{MST} / \mathrm{MSE}$

In [10]:
def f_test(groups: List[List[float]]) -> float:
  MST = treatment_mean_square(groups)
  MSE = error_mean_square(groups)
  return MST / MSE

f_test(GROUPS)

1.0

## total sum of squares

$\mathrm{TSS} = \mathrm{SST} + \mathrm{SSE}$ 

In [11]:
def total_sum_of_squares(groups: List[List[float]]) -> float:
  SST = treatment_sum_of_squares(groups)
  SSE = error_sum_of_squares(groups)
  return SST + SSE 

total_sum_of_squares(GROUPS)

8.0