# Exercise 6.8

In [1]:
import pathlib
import numpy as np
import pandas as pd
import scipy.linalg as la
from IPython.display import display, Math
from scipy import stats
import chapter_6_utils

In [2]:
def load_data():
    f = pathlib.Path(r'..\..\Data\Exercise6.8.xlsx')
    df = pd.read_excel(f)
    return df

In [3]:
df = load_data()

In [4]:
df.loc[df.Treat.eq(3), ['resp1', 'resp2']].mean()

resp1    3.0
resp2    2.0
dtype: float64

In [5]:
n = df.shape[0]
p = 2
g = df.Treat.nunique()
n1, n2, n3 = df.Treat.value_counts().tolist()

## (a)

From (6-39)
$$
    \begin{array}{ccccccc}
        \textbf{x}_{\ell j} & = & \bar{\textbf{x}} & + & (\textbf{x}_{\ell} - \bar{\textbf{x}}) & + & (\textbf{x}_{\ell j} - \textbf{x}_{\ell}) \\
        \textbf{(observation)}
        &
        &
        \left(
            \begin{array}{c}
                \text{overall sample} \\
                \text{mean } \hat{\bm{\mu}}
            \end{array}
        \right)
        &
        &
        \left(
            \begin{array}{c}
                \text{estimated} \\
                \text{treatment} \\
                \text{effect } \hat{\bm{\tau}}_{\ell}
            \end{array}
        \right)
        &
        &
        \left(
            \begin{array}{c}
                \text{residual } \\
                \text{effect } \hat{\textbf{e}}_{\ell j}
            \end{array}
        \right)
    \end{array}
$$

In [6]:
resp1_breakdown = chapter_6_utils.manova_obs_breakdown(df, trt_col='Treat', var_col='resp1')
resp2_breakdown = chapter_6_utils.manova_obs_breakdown(df, trt_col='Treat', var_col='resp2')

In [7]:
some_spacing = ['0.5cm','2.0cm','2.2cm','2.5cm']
chapter_6_utils.display_manova_obs_breakdown(resp1_breakdown, spacing=some_spacing)

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

In [8]:
chapter_6_utils.display_manova_obs_breakdown(resp2_breakdown, spacing=some_spacing)

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

## (b)

Okay, what's displayed above is, $\textbf{X}_{v} = \textbf{M}_{v} + \textbf{T}_{v} + \textbf{E}_{v}$, where $v$ identifies which measurement we're looking at.
For the sum of squares and cross-products in the MANOVA table we need a matrix result, that comes from some block computations. An example of what that looks like for the treatment effect is below. The The Hadamard product, denoted by $\circ$, performs elementwise multiplication of the entries of matrices of the same dimensions. The code for the computations in the `compute_ss_matrices` function uses is more similar to the first part.

$$
\left[
    \begin{array}{cc}
        \text{sum}(\textbf{T}_{1} \circ \textbf{T}_{1}) & \text{sum}(\textbf{T}_{1} \circ \textbf{T}_{2}) \\
        \text{sum}(\textbf{T}_{2} \circ \textbf{T}_{1}) & \text{sum}(\textbf{T}_{2} \circ \textbf{T}_{2})
    \end{array}
\right]
=
\left[
    \begin{array}{cc}
        \text{tr}(\textbf{T}_{1}^{\prime} \textbf{T}_{1}) & \text{tr}(\textbf{T}_{1}^{\prime} \textbf{T}_{2}) \\
        \text{tr}(\textbf{T}_{2}^{\prime} \textbf{T}_{1}) & \text{tr}(\textbf{T}_{2}^{\prime} \textbf{T}_{2})
    \end{array}
\right]
$$

In [9]:
def compute_manova_ss_matrices(a1: np.ndarray, a2: np.ndarray):
    # Stack the two matrices. The first dim is the variable. The rest is the 3x5 matrix of data.
    X = np.stack([a1, a2])
    X = np.nan_to_num(X)
    # For [1,2], axis 1 is groups. Axis 2 is observation within group.
    Y = np.tensordot(X, X, axes=([1, 2], [1, 2]))
    return Y

In [10]:
# Treatment effect SS.
B = compute_manova_ss_matrices(resp1_breakdown.TreatmentEffect, resp2_breakdown.TreatmentEffect)

In [11]:
# Residual SS.
W = compute_manova_ss_matrices(resp1_breakdown.Residual, resp2_breakdown.Residual)

In [12]:
# This T is for Total SS, not Treatment effect SS.
T = compute_manova_ss_matrices(resp1_breakdown.Obs, resp2_breakdown.Obs) - compute_manova_ss_matrices(resp1_breakdown.Mean, resp2_breakdown.Mean)

In [13]:
display(Math(r'\begin{array}{lll}'
             r'\text{Source} & \text{Matrix of sum of squares} &  \\'
             r'\text{of variation} & \text{and cross products} & \text{Degrees of freedom} \\'
             r'\hline \\'
             r'\text{Treatment} & '
             f'{chapter_6_utils.create_array_text(B)} & '
             fr'{g} - 1 = {g - 1} \\ \\'
             r'\text{Residual} & '
             f'{chapter_6_utils.create_array_text(W)} &'
             fr'{n1} + {n2} + {n3} - {g} = {n - g} \\ \\'
             r'\hline \\'
             r'\text{Total (corrected)} & '
             f'{chapter_6_utils.create_array_text(T)} & '
             f'{(n - 1)}'
             r'\end{array}'
             ))

<IPython.core.display.Math object>

In [14]:
np.all(np.equal(T, B + W))

True

## (c)

In [15]:
lmbda_star = la.det(W)/la.det(B + W)

In [16]:
display(Math(r'\Lambda^{\star}'
             '='
             r'\frac{\left|\textbf{W}\right|}{\left|\textbf{B} + \textbf{W}\right|}'
             '='
             fr'\frac{{{W[0,0]:.0f}({W[1,1]:.0f}) - ({W[0,1]:.0f})^{{2}} }}{{{T[0,0]:.0f}({T[1,1]:.0f}) - ({T[0,1]:.0f})^{{2}} }}'
             '='
             fr'\frac{{{la.det(W):.0f}}}{{{la.det(T + W):.0f}}}'
             '='
             f'{lmbda_star:.4f}'
             ))

<IPython.core.display.Math object>

In [17]:
test_stat = ((n - g - 1)/(g-1)) * (1 - np.sqrt(lmbda_star))/np.sqrt(lmbda_star)

In [18]:
display(Math(r'F^{\star} = \left( \frac{ \sum n_{\ell} - g - 1 }{ g - 1 } \right)'
             r'\left( \frac{ 1 - \sqrt{\Lambda^{\star}} }{ \sqrt{\Lambda^{\star}} } \right)'
             '='
             fr'\left( \frac{{ {n} - {g} - 1 }}{{ {g} - 1 }} \right)'
             fr'\left( \frac{{ 1 - \sqrt{{ {lmbda_star:.4f} }} }}{{ \sqrt{{ {lmbda_star:.4f} }} }} \right)'
             '='
             f'{test_stat:.4f}'
             ))

<IPython.core.display.Math object>

In [19]:
alpha = 0.01
f_crit = stats.f.ppf(1-alpha, dfn=2*(g-1), dfd=2*(n-g-1))

In [20]:
display(Math(fr'\text{{Since}} \hspace{{0.2cm}} p = {p}'
             fr'\hspace{{0.2cm}} \text{{and}} \hspace{{0.2cm}} '
             fr'g = {g} \hspace{{0.2cm}} \text{{using Table 6.3}},'
             r'\left( \frac{ \sum n_{\ell} - g - 1 }{ g - 1 } \right)'
             r'\left( \frac{ 1 - \sqrt{\Lambda^{\star}} }{ \sqrt{\Lambda^{\star}} } \right)'
             r'\sim F_{2(g-1), 2(\sum n_{\ell} - g - 1)}(\alpha)'
             '='
             f'{f_crit:.4f}'
             ))

<IPython.core.display.Math object>

In [21]:
if test_stat > f_crit:
    display(Math(fr'\text{{We have that }} F^{{\star}} = {test_stat:.3f} > F_{{\text{{crit}}}} = F_{{{2*(g-1)}, {2*(n-g-1)} }} \left( {alpha} \right) = '
                 fr'{f_crit:.3f} \text{{, so we would reject the null hypothesis that }} '
                 r'\bm{\tau}_{1} = \bm{\tau}_{3} = \bm{\tau}_{3} = \textbf{0}'))
else:
    display(Math(fr'\text{{We have that }} F^{{\star}} = {test_stat:.3f} \leq F_{{\text{{crit}}}} = F_{{{2*(g-1)}, {2*(n-g-1)} }} \left( {alpha} \right) = '
                fr'{f_crit:.3f} \text{{, so we would fail to reject the null hypothesis that }} '
                r'\bm{\tau}_{1} = \bm{\tau}_{3} = \bm{\tau}_{3} = \textbf{0}'))

<IPython.core.display.Math object>

In [22]:
test_stat_bartlett = -(n - 1 - (p + g)/2)*np.log(lmbda_star)

In [23]:
display(Math(r'X^{\star}'
             '='
             '-(n - 1 - (p + g)/2)'
             r'\ln'
             r'\left('
             r'\frac{|\textbf{W}|}{|\textbf{B} + \textbf{W}|}'
             r'\right)'
             '='
             f'-({n} - 1 - ({p} + {g})/2)'
             r'\ln'
             r'\left('
             fr'\frac{{ {la.det(W):.0f} }}{{ {la.det(B) + la.det(W):.0f} }}'
             r'\right)'
             '='
             f'{test_stat_bartlett:.4f}'
             ))

<IPython.core.display.Math object>

In [24]:
bartlett_crit = stats.chi2.ppf(1-alpha, df=p*(g-1))

In [25]:
display(Math(r'X_{\text{crit}}'
             '='
             r'\chi_{p(g-1)}^{2}'
             '='
             fr'\chi_{{ {p}({g-1}) }}^{{2}}'
             '='
             f'{bartlett_crit:.4f}'
             ))

<IPython.core.display.Math object>

In [26]:
if test_stat > f_crit:
    display(Math(fr'\text{{For Bertlett\'s test, we have that }} X^{{\star}} = {test_stat_bartlett:.3f} > X_{{\text{{crit}}}} = \chi_{{ {p}({g-1}) }}^{{2}} \left( {alpha} \right) = '
                 fr'{bartlett_crit:.3f} \text{{, so we would reject the null hypothesis that }} '
                 r'\bm{\tau}_{1} = \bm{\tau}_{3} = \bm{\tau}_{3} = \textbf{0}'))
else:
    display(Math(fr'\text{{For Bertlett\'s test, we have that }} X^{{\star}} = {test_stat_bartlett:.3f} \leq X_{{\text{{crit}}}} = \chi_{{ {p}({g-1}) }}^{{2}} \left( {alpha} \right) = '
                fr'{bartlett_crit:.3f} \text{{, so we would fail to reject the null hypothesis that }} '
                r'\bm{\tau}_{1} = \bm{\tau}_{3} = \bm{\tau}_{3} = \textbf{0}'))

<IPython.core.display.Math object>