# Example 6.8 (A univariate ANOVA table and F-test for treatment effects)

In [1]:
import numpy as np
import pandas as pd
from IPython.display import display, Math
from scipy import stats

In [2]:
p1 = np.array([9, 6, 9])
p2 = np.array([0, 2])
p3 = np.array([3, 1, 2])

In [3]:
x = np.vstack((p1, np.pad(p2.astype(float), (0, 1), 'constant', constant_values=np.nan), p3))
x

array([[ 9.,  6.,  9.],
       [ 0.,  2., nan],
       [ 3.,  1.,  2.]])

In [4]:
g = x.shape[0]
n1, n2, n3 = p1.size, p2.size, p3.size
n = n1 + n2 + n3

In [5]:
xbar = np.mean(np.hstack([p1,p2,p3]), axis=0)
display(Math(fr'\bar{{x}} = {xbar:.0f}'))

<IPython.core.display.Math object>

In [6]:
xbarl = np.nanmean(x, axis=1)[:,np.newaxis]
display(Math(fr'\bar{{x}}_{{\ell}} = \begin{{bmatrix}} {xbarl[0,0]:.0f} \\ {xbarl[1,0]:.0f} \\ {xbarl[2,0]:.0f} \end{{bmatrix}}'))

<IPython.core.display.Math object>

In [7]:
SS_obs = (x[~np.isnan(x)]**2).sum()
SS_obs

216.0

In [8]:
mean_matrix = np.ones((3,3))*xbar
SS_mean = (mean_matrix[~np.isnan(x)]**2).sum()

In [9]:
SS_tr = (((xbarl - xbar) * np.ones((3,3)))[~np.isnan(x)]**2).sum()

In [10]:
SS_res = ((x - xbarl)[~np.isnan(x)]**2).sum()

In [11]:
SS_cor = ((x- mean_matrix)[~np.isnan(x)]**2).sum()

In [12]:
df = pd.DataFrame({'Source of variation': [r'\text{Treatments}',
                                      r'\text{Residual}',
                                      r'\text{Total (corrected)}'],
              'Sum of squares': [f'SS_{{tr}} = {SS_tr}',
                                 f'SS_{{res}} = {SS_res}',
                                 f'SS_{{cor}} = {SS_cor}'],
              'Degrees of freedom': [f'g - 1 = {g} - 1 = {g - 1}',
                                     fr'\sum_{{\ell = 1}}^{{g}} n_{{\ell}} - g = ({n1} + {n2} + {n3}) - {g} = {n - g}',
                                     fr'\sum_{{\ell = 1}}^{{g}} n_{{\ell}} - 1 = ({n1} + {n2} + {n3}) - 1 = {n - 1}']})

In [13]:
def create_latex_table(df: pd.DataFrame) -> str:
    output_str = r'\begin{array}{lll} '
    output_str += ' & '.join([fr'\text{{{c}}}' for c in df]) + r' \\'
    output_str += r'\hline \\'

    for _, row in df.iterrows():
        output_str += row.iloc[0] + '&' + row.iloc[1] + '&' + row.iloc[2] + r' \\'
        if row.iloc[0] == r'\text{Residual}':
            output_str += r'\hline \\'
        else:
            output_str += r'\\'
    output_str += r'\end{array}'
    return output_str

In [14]:
display(Math(create_latex_table(df)))

<IPython.core.display.Math object>

$$
H_{0}: \tau_{1} = \tau_{2} = \tau_{3} = 0 \hspace{0.4cm} \text{(no treatment effect)}
$$

In [15]:
F = (SS_tr/(g-1))/(SS_res/(n-g))
display(Math(r'F = \frac{\text{SS}_{\text{tr}} / (g - 1)}{\text{SS}_{\text{res}} / (\sum n_{\ell} - g)} ='
             fr'\frac{{{SS_tr:.0f} / {g - 1} }}{{{SS_res:.0f} / {n- g} }} ='
             f'{F}'))

<IPython.core.display.Math object>

In [16]:
alpha = 0.01
f_crit = stats.f.ppf(1-alpha, dfn=g-1, dfd=n-g)
display(Math(fr'F_{{g - 1, n - g}}(\alpha) = F_{{{g - 1}, {n - g}}}({alpha}) = {f_crit:.2f}'))

<IPython.core.display.Math object>

In [17]:
if F > f_crit:
    display(Math(fr'\text{{We have that }} F = {F:.1f} > F_{{g - 1, n - g}}(\alpha) = '
                 fr'{f_crit:.2f} \text{{, so we would reject the null hypothesis that }} '
                 r'\tau_{1} = \tau_{2} = \tau_{3} = 0'))
else:
    display(Math(fr'\text{{We have that }} F = {F:.1f} \leq F_{{g - 1, n - g}}(\alpha) = '
                 fr'{f_crit:.2f} \text{{, so we would fail to reject the null hypothesis that }} '
                 r'\tau_{1} = \tau_{2} = \tau_{3} = 0'))

<IPython.core.display.Math object>

<table>
    <row>
    </row>
</table>