# Exercise 6.6

In [1]:
import pathlib
import numpy as np
import pandas as pd
import scipy.linalg as la
from IPython.display import display, Math
from scipy import stats

In [2]:
def load_data():
    f = pathlib.Path(r'..\..\Data\Exercise6.8.xlsx')
    df = pd.read_excel(f)
    # Only keep Treatments 2 and 3.
    df = df[df.Treat.isin([2,3])].copy()
    return df

In [3]:
df = load_data()

In [4]:
n = df.shape[0]
p = 2
g = df.Treat.nunique()

### (a)

In [5]:
S_pooled = sum([(g.shape[0] - 1)*g[['resp1','resp2']].cov() for _, g in df.groupby('Treat')])/(n - g)
S_pooled = S_pooled.to_numpy()

In [6]:
display(Math(r'\textbf{S}_{\text{pooled}}'
             '='
             r'\left['
             r'\begin{array}{rr}'
             fr'{S_pooled[0,0]} & {S_pooled[0,1]} \\'
             f'{S_pooled[1,0]} & {S_pooled[1,1]}'
             r'\end{array}'
             r'\right]'
             ))

<IPython.core.display.Math object>

### (b)

Using the test from page 285,
$$
    H_{0}: \bm{\mu}_{2} - \bm{\mu}_{3} = \bm{\delta}_{0} = \textbf{0}
$$

$$
    T^{2}
    =
    {(\bar{\textbf{x}}_{2} - \bar{\textbf{x}}_{3} - \bm{\delta}_{0})}^{2}
    {\left[
        \left(
            \frac{1}{n_{2}}
            +
            \frac{1}{n_{3}}
        \right)
        \textbf{S}_{\text{pooled}}
    \right]}^{-1}
    (\bar{\textbf{x}}_{2} - \bar{\textbf{x}}_{3} - \bm{\delta}_{0})
    > c^{2}
$$

Here,
$$
    \bm{\delta}_{0}
    =
    \textbf{0}
    =
    \left[
        \begin{array}{cc}
            0 \\
            0
        \end{array}
    \right]
$$

In [7]:
alpha = 0.01
n2, n3 = df.value_counts('Treat').sort_index()
xbar = df.groupby('Treat').mean().T.to_numpy()

In [8]:
# Create a vector used to take the difference between the second and third group mean vectors.
c = np.array([[1],[-1]])
# We're testing that the difference between groups two and three are zero, so \bm{\delta}_{0} is the zero vector.
delta0 = np.zeros([2,1])

In [9]:
# Take the difference between the second and third group mean vectors.
d = xbar @ c


In [10]:
T2 = ((d - delta0).T @ la.inv((1/n2 + 1/n3) * S_pooled) @ (d - delta0)).item()

In [11]:
# Compute c^{2} from what's on page 286.
f_value = stats.f.ppf(1-alpha, dfn=p, dfd=n2+n3-(p+1))
const = ((n2 + n3 - 2)*p)/(n2 + n3 - (p + 1))
c2 = const * f_value

In [18]:
display(Math('c^{2}'
             '='
             r'\frac{(n_{2} + n_{3}  - 2)p}{(n_{2} + n_{3} - (p + 1))}'
             r'F_{p, n_{2} + n_{3} - (p+1)}(\alpha)'
             '='
             fr'\frac{{ ({n2 + n3 - 2})({p}) }}{{ {n2 + n3 - (p + 1)} }}'
             fr'F_{{ {p}, {n2 + n3 - (p+1)} }}({alpha})'
             '='
             fr'{const:.3f} \times {f_value:.2f}'
             '='
             f'{c2:.2f}'
             ))

<IPython.core.display.Math object>

In [17]:
if T2 > c2:
    display(Math(fr'\text{{We have that }} T^{{2}} = {T2:.2f} > c^{2} = F_{{ {p}, {n2+n3-(p+1)} }}({alpha}) = '
                 fr'{c2:.2f} \text{{, so we would reject the null hypothesis that }} '
                 r'\bm{\mu}_{2} = \bm{\mu}_{3} \text{ (the mean vectors for the two groups are equal)}.'
                 ))
else:
    display(Math(fr'\text{{We have that }} T^{{2}} = {T2:.2f} < c^{2} = F_{{ {p}, {n2+n3-(p+1)} }}({alpha}) = '
                 fr'{c2:.2f} \text{{, so we would fail to reject the null hypothesis that }} '
                 r'\bm{\mu}_{2} = \bm{\mu}_{3} \text{ (the mean vectors for the two groups are equal)}.'
                ))

<IPython.core.display.Math object>

### (c)

Using Result 6.3 on page 288

$$
    \textbf{a}^{\prime}
    {(\bar{\textbf{X}}_{2} - \bar{\textbf{X}}_{3})}
    \pm
    c
    \sqrt{
        \textbf{a}^{\prime}
        \left(
            \frac{1}{n_{2}}
            +
            \frac{1}{n_{3}}
        \right)
        \textbf{S}_{\text{pooled}}
        \textbf{a}
    }
$$
where
$$
    c^{2}
    =
    \frac{(n_{2} + n_{3} - 2)p}{n_{2} + n_{3} - (p + 1)}
    F_{p, n_{2} + n_{3} - (p + 1)}
    (\alpha)
$$

Setup the $\textbf{a}$ vectors,
$$
\textbf{a}_{1}
=
\begin{bmatrix}
    1 \\
    0
\end{bmatrix}
\hspace{0.2cm}
\text{and}
\hspace{0.2cm}
\textbf{a}_{2}
=
\begin{bmatrix}
    1 \\
    0
\end{bmatrix}
$$

In [14]:
a1 = np.array([[1], [0]])
a2 = np.array([[0], [1]])

In [15]:
ci99 = np.vstack([
    a1.T @ d + np.array([-1, 1]) * np.sqrt(c2) * np.sqrt((1/n2 + 1/n3) * (a1.T @ S_pooled @ a1)).item(),
    a2.T @ d + np.array([-1, 1]) * np.sqrt(c2) * np.sqrt((1/n2 + 1/n3) * (a2.T @ S_pooled @ a2)).item()
    ])

In [16]:
for i in range(ci99.shape[0]):
    display(Math(fr'\mu_{{ 2{i+1} }} - \mu_{{ 3{i+1} }}:'
                 fr'({xbar[i,0]:.1f} - {xbar[i,1]:.1f})'
                 fr'\pm \sqrt{{{c2:.2f}}} '
                 fr'\sqrt{{\left(\frac{{1}}{{{n2}}} + \frac{{1}}{{{n3}}}\right) {S_pooled[i,i]:.2f}}}'
                 r'\hspace{0.4cm}\text{or}\hspace{0.4cm}'
                 fr'{ci99[i,0]:.3f} \leq \mu_{{ 2{i+1} }} - \mu_{{ 3{i+1} }} \leq {ci99[i,1]:.3f}'
                 ))

<IPython.core.display.Math object>

<IPython.core.display.Math object>