# Exercise 6.1

In [1]:
import pathlib
import numpy as np
import pandas as pd
import numpy.linalg as la
from IPython.display import display, Math
from scipy import stats

In [2]:
def load_data() -> pd.DataFrame:
    fpth = pathlib.WindowsPath(r'..\..\data\Table6.1.xlsx')
    return pd.read_excel(fpth)

In [3]:
effluent_df = load_data()
effluent_df.head(3)

Unnamed: 0,Sample j,Commercial lab BOD,Commercial lab SS,State lab of hygiene BOD,State lab of hygiene SS
0,1,6,27,25,15
1,2,6,23,28,13
2,3,18,64,36,22


In [4]:
effluent_df['d1'] = effluent_df['Commercial lab BOD'] - effluent_df['State lab of hygiene BOD']
effluent_df['d2'] = effluent_df['Commercial lab SS'] - effluent_df['State lab of hygiene SS']
effluent_df.head(3)

Unnamed: 0,Sample j,Commercial lab BOD,Commercial lab SS,State lab of hygiene BOD,State lab of hygiene SS,d1,d2
0,1,6,27,25,15,-19,12
1,2,6,23,28,13,-22,10
2,3,18,64,36,22,-18,42


In [5]:
X = effluent_df[['d1', 'd2']].to_numpy()
n, p = X.shape
alpha = 0.05

In [6]:
xbar = np.mean(X, axis=0)[:, np.newaxis]
xbar

array([[-9.36363636],
       [13.27272727]])

In [7]:
S = np.cov(X, rowvar=False)
S

array([[199.25454545,  88.30909091],
       [ 88.30909091, 418.61818182]])

$$
\begin{align}
    H_{0}: & \bm{\delta} = \textbf{0} \\
    H_{a}: & \bm{\delta} \ne \textbf{0}
\end{align}
$$

$$
T^{2}
=
n
{(\bar{\textbf{d}} - \bm{\delta})}^{\prime}
\textbf{S}_{d}^{-1}
(\bar{\textbf{d}} - \bm{\delta})
=
n
{(\bar{\textbf{d}} - \textbf{0})}^{\prime}
\textbf{S}_{d}^{-1}
(\bar{\textbf{d}} - \textbf{0})
=
n
{\bar{\textbf{d}}}^{\prime}
\textbf{S}_{d}^{-1}
\bar{\textbf{d}}
$$

In [8]:
T2 = (n * xbar.T @ la.inv(S) @ xbar).item()
T2

13.639312140174692

In [14]:
f_crit = ((n-1)*p)/(n-p) * stats.f.ppf(1-alpha, p, n-p)
f_crit

9.458877175763888

In [13]:
if T2 > f_crit:
    print('Reject the null hypothesis.')
else:
    print('Fail to reject the null hypothesis.')

Reject the null hypothesis.


In [11]:
# Construct the 95% simultaneous confidence intervals.
ci95 = xbar + np.array([-1, 1]) * np.sqrt(f_crit) * np.sqrt(np.diag(S)[:, np.newaxis]/n)

In [12]:
for i in range(p):
    display(Math(fr'\delta_{{{i+1}}}: \bar{{d}}_{{{i+1}}}'
                  + r'\pm'
                  + fr'\sqrt{{\frac{{(n-1)p}}{{n-p}}F_{{p, n-p}}(\alpha)}}'
                  + fr'\sqrt{{\frac{{S_{{d_{i+1}}}}}{{n}}}} ='
                  + fr'{xbar[i].item():.2f}'
                  + r'\pm'
                  + fr'\sqrt{{{f_crit:.2f}}}'
                  + fr'\sqrt{{\frac{{{round(S[i,i],2):.2f}}}{{{n}}}}}'
                  + r'\hspace{0.4cm}\text{or}\hspace{0.4cm}'
                  +fr'({ci95[i,0]:.2f}, {ci95[i,1]:.2f})'
                  ))

<IPython.core.display.Math object>

<IPython.core.display.Math object>