# Exercise 6.8

In [None]:
import pathlib
import numpy as np
import pandas as pd
import scipy.linalg as la
from collections import namedtuple
from IPython.display import display, Math
from scipy import stats

In [3]:
def load_data():
    f = pathlib.Path(r'..\..\Data\Exercise6.8.xlsx')
    df = pd.read_excel(f)
    return df

In [4]:
df = load_data()

In [None]:
n = df.shape[0]
p = 2
g = df.Treat.nunique()
n1, n2, n3 = df.Treat.value_counts().tolist()

## (a)

In [None]:
def obs_breakdown(df: pd.DataFrame, trt_col: str, var_col: str) -> namedtuple:
    '''
    Breakdown observations for an input variable into mean, treatment, and residual components.
    Args:
        df (pd.DataFrame): Input data with a column for treatments and columns for each variable.
        trt_col (str): The column with the treatments (groups).
        var_col (str): The variable breakdown.
    Return:
        namedtuple: The named tuple has elements: 'Variable', 'Obs', 'Mean', 'TreatmentEffect',
        'Residual'.
    '''
    # Store the output for the observations breakdown for a given variable.
    ObsBreakdown = namedtuple('ObsBreakdown', ['Variable', 'Obs', 'Mean', 'TreatmentEffect', 'Residual'])
    df = df[[trt_col, var_col]].copy()

    # The number of groups.
    g = df[trt_col].nunique()
    # The max number of observations within a group.
    n_max = df[trt_col].value_counts().max()

    # Setup the arrays to store the data.
    obs_a = np.full([g, n_max], np.nan)
    global_mean_a = np.full([g, n_max], np.nan)
    trt_effect_a = np.full([g, n_max], np.nan)
    residual_a = np.full([g, n_max], np.nan)

    global_mean = np.mean(df[var_col])
    for i, (_, r) in enumerate(df.groupby(trt_col)):
        # Store the group data for a variable in a numpy array.
        grp_a = r[var_col].to_numpy()
        n = grp_a.shape[0]
        grp_mean = np.mean(grp_a)

        # Compute the breakdown.
        obs_a[i, :len(grp_a)] = grp_a
        global_mean_a[i, :len(grp_a)] = np.repeat(global_mean, n)
        trt_effect_a[i, :len(grp_a)] = np.repeat(grp_mean, n) - np.repeat(global_mean, n)
        residual_a[i, :len(grp_a)] = grp_a - np.repeat(grp_mean, n)
        
    return ObsBreakdown(Variable=var_col,
                        Obs=obs_a,
                        Mean=global_mean_a,
                        TreatmentEffect=trt_effect_a,
                        Residual=residual_a)

In [308]:
def create_array_text(a: np.ndarray) -> str:
    '''
    Create a text string with the latex code to generate an array.
    Args:
        a (np.ndarray): Contains array data to generate a latex array for.
    Returns:
        str: Latex code for the input array.
    '''
    g, n = a.shape
    array_col_num = 'r' * n
    start_array_latex = fr'\left[\begin{{array}}{{ {array_col_num} }}'
    end_array_latex = r'\end{array}\right]'
    array_str = start_array_latex
    for i in range(g):
        array_str += ' & '.join(['' if np.isnan(v) else str(int(v)) for v in a[i,:]])
        array_str += r' \\ '
    array_str += end_array_latex
    return array_str

def display_obs_breakdown(data: namedtuple, spacing: list[str]) -> None:
    '''
    Take a breakdown of observations into components and display them using latex.
    Args:
        data (namedtuple): Contains breakdown of observations into components.
        Row for each group, column for each observation.
        spacing list[str]: A list with 4 string elements.
        Something like, ['0.5cm','2.0cm','2.2cm','2.5cm'].
    '''
    assert len(spacing)==4, 'Spacing must have 4 string elements.'
    obs_latex = create_array_text(data.Obs)
    mean_latex = create_array_text(data.Mean)
    trt_effect_latex = create_array_text(data.TreatmentEffect)
    residual_latex = create_array_text(data.Residual)
    display(Math(fr'\text{{Variable: {data.Variable}}}'))
    display(Math(f'{obs_latex}'
                 ' = '
                 f'{mean_latex}'
                 ' + '
                 f'{trt_effect_latex}'
                 ' + '
                 f'{residual_latex}'
                 ))
    display(Math(fr'\hspace{{ {spacing[0]} }}\text{{(observation)}}'
                 fr'\hspace{{ {spacing[1]} }}\text{{(mean)}}'
                 fr'\hspace{{ {spacing[2]} }}\text{{(treatment effect)}}'
                 fr'\hspace{{ {spacing[3]} }}\text{{(residual)}}'))

From (6-39)
$$
    \begin{array}{ccccccc}
        \textbf{x}_{\ell j} & = & \bar{\textbf{x}} & + & (\textbf{x}_{\ell} - \bar{\textbf{x}}) & + & (\textbf{x}_{\ell j} - \textbf{x}_{\ell}) \\
        \textbf{(observation)}
        &
        &
        \left(
            \begin{array}{c}
                \text{overall sample} \\
                \text{mean } \hat{\bm{\mu}}
            \end{array}
        \right)
        &
        &
        \left(
            \begin{array}{c}
                \text{estimated} \\
                \text{treatment} \\
                \text{effect } \hat{\bm{\tau}}_{\ell}
            \end{array}
        \right)
        &
        &
        \left(
            \begin{array}{c}
                \text{residual } \\
                \text{effect } \hat{\textbf{e}}_{\ell j}
            \end{array}
        \right)
    \end{array}
$$

In [309]:
resp1_breakdown = obs_breakdown(df, trt_col='Treat', var_col='resp1')
resp2_breakdown = obs_breakdown(df, trt_col='Treat', var_col='resp2')

In [310]:
some_spacing = ['0.5cm','2.0cm','2.2cm','2.5cm']
display_obs_breakdown(resp1_breakdown, spacing=some_spacing)

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

In [311]:
display_obs_breakdown(resp2_breakdown, spacing=some_spacing)

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

## (b)

Okay, what's displayed above is, $\textbf{X}_{v} = \textbf{M}_{v} + \textbf{T}_{v} + \textbf{E}_{v}$, where $v$ identifies which measurement we're looking at.
For the sum of squares and cross-products in the MANOVA table we need a matrix result, that comes from some block computations. An example of what that looks like for the treatment effect is below. The The Hadamard product, denoted by $\circ$, performs elementwise multiplication of the entries of matrices of the same dimensions. The code for the computations in the `compute_ss_matrices` function uses is more similar to the first part.

$$
\left[
    \begin{array}{cc}
        \text{sum}(\textbf{T}_{1} \circ \textbf{T}_{1}) & \text{sum}(\textbf{T}_{1} \circ \textbf{T}_{2}) \\
        \text{sum}(\textbf{T}_{2} \circ \textbf{T}_{1}) & \text{sum}(\textbf{T}_{2} \circ \textbf{T}_{2})
    \end{array}
\right]
=
\left[
    \begin{array}{cc}
        \text{tr}(\textbf{T}_{1}^{\prime} \textbf{T}_{1}) & \text{tr}(\textbf{T}_{1}^{\prime} \textbf{T}_{2}) \\
        \text{tr}(\textbf{T}_{2}^{\prime} \textbf{T}_{1}) & \text{tr}(\textbf{T}_{2}^{\prime} \textbf{T}_{2})
    \end{array}
\right]
$$

In [295]:
def compute_ss_matrices(a1: np.ndarray, a2: np.ndarray):
    # Stack the two matrices. The first dim is the variable. The rest is the 3x5 matrix of data.
    X = np.stack([a1, a2])
    X = np.nan_to_num(X)
    # For [1,2], axis 1 is groups. Axis 2 is observation within group.
    Y = np.tensordot(X, X, axes=([1, 2], [1, 2]))
    return Y

In [None]:
# Treatment effect SS.
B = compute_ss_matrices(resp1_breakdown.TreatmentEffect, resp2_breakdown.TreatmentEffect)

In [None]:
# Residual SS.
W = compute_ss_matrices(resp1_breakdown.Residual, resp2_breakdown.Residual)

In [None]:
# This T is for Total SS, not Treatment effect SS.
T = compute_ss_matrices(resp1_breakdown.Obs, resp2_breakdown.Obs) - compute_ss_matrices(resp1_breakdown.Mean, resp2_breakdown.Mean)

In [317]:
display(Math(r'\begin{array}{lll}'
             r'\text{Source} & \text{Matrix of sum of squares} &  \\'
             r'\text{of variation} & \text{and cross products} & \text{Degrees of freedom} \\'
             r'\hline \\'
             r'\text{Treatment} & '
             f'{create_array_text(B)} & '
             fr'{g} - 1 = {g - 1} \\ \\'
             r'\text{Residual} & '
             f'{create_array_text(B)} &'
             fr'{n1} + {n2} + {n3} - {g} = {n - g} \\ \\'
             r'\hline \\'
             r'\text{Total (corrected)} & '
             f'{create_array_text(T)} & '
             f'{(n - 1)}'
             r'\end{array}'
             ))

<IPython.core.display.Math object>

In [318]:
np.all(np.equal(T, B + W))

True

## (c)

In [321]:
lmbda_star = la.det(W)/la.det(B + W)

In [322]:
display(Math(r'\Lambda^{\star}'
             '='
             r'\frac{\left|\textbf{W}\right|}{\left|\textbf{B} + \textbf{W}\right|}'
             '='
             fr'\frac{{{W[0,0]:.0f}({W[1,1]:.0f}) - ({W[0,1]:.0f})^{{2}} }}{{{T[0,0]:.0f}({T[1,1]:.0f}) - ({T[0,1]:.0f})^{{2}} }}'
             '='
             fr'\frac{{{la.det(W):.0f}}}{{{la.det(T + W):.0f}}}'
             '='
             f'{lmbda_star:.4f}'
             ))

<IPython.core.display.Math object>

In [None]:
test_stat = ((n - g - 1)/(g-1)) * (1 - np.sqrt(lmbda_star))/np.sqrt(lmbda_star)

In [325]:
display(Math(r'F^{\star} = \left( \frac{ \sum n_{\ell} - g - 1 }{ g - 1 } \right)'
             r'\left( \frac{ 1 - \sqrt{\Lambda^{\star}} }{ \sqrt{\Lambda^{\star}} } \right)'
             '='
             fr'\left( \frac{{ {n} - {g} - 1 }}{{ {g} - 1 }} \right)'
             fr'\left( \frac{{ 1 - \sqrt{{ {lmbda_star:.4f} }} }}{{ \sqrt{{ {lmbda_star:.4f} }} }} \right)'
             '='
             f'{test_stat:.4f}'
             ))

<IPython.core.display.Math object>

In [327]:
alpha = 0.01
f_crit = stats.f.ppf(1-alpha, dfn=2*(g-1), dfd=2*(n-g-1))

In [330]:
display(Math(fr'\text{{Since}} \hspace{{0.2cm}} p = {p}'
             fr'\hspace{{0.2cm}} \text{{and}} \hspace{{0.2cm}} '
             fr'g = {g} \hspace{{0.2cm}} \text{{using Table 6.3}},'
             r'\left( \frac{ \sum n_{\ell} - g - 1 }{ g - 1 } \right)'
             r'\left( \frac{ 1 - \sqrt{\Lambda^{\star}} }{ \sqrt{\Lambda^{\star}} } \right)'
             r'\sim F_{2(g-1), 2(\sum n_{\ell} - g - 1)}'
             '='
             f'{f_crit:.4f}'
             ))

<IPython.core.display.Math object>

In [326]:
if test_stat > f_crit:
    display(Math(fr'\text{{We have that }} F^{{\star}} = {test_stat:.3f} > F_{{\text{{crit}}}} = F_{{{2*(g-1)}, {2*(n-g-1)} }} \left( {alpha} \right) = '
                 fr'{f_crit:.3f} \text{{, so we would reject the null hypothesis that }} '
                 r'\bm{\tau}_{1} = \bm{\tau}_{3} = \bm{\tau}_{3} = \textbf{0}'))
else:
    display(Math(fr'\text{{We have that }} F^{{\star}} = {test_stat:.3f} \leq F_{{\text{{crit}}}} = F_{{{2*(g-1)}, {2*(n-g-1)} }} \left( {alpha} \right) = '
                fr'{f_crit:.3f} \text{{, so we would fail to reject the null hypothesis that }} '
                r'\bm{\tau}_{1} = \bm{\tau}_{3} = \bm{\tau}_{3} = \textbf{0}'))

<IPython.core.display.Math object>