In [1]:
import numpy as np
import scipy.stats as stats
from math import sqrt

In [2]:
alpha = 0.05

## 7.14

In Example 5.2.2, we assumed that for the height and weight data of Table 3.1,
the population covariance matrix is:

$\sum = \left( \begin{array}{cc} 20 & 100 \\ 100 & 1000 \end{array} \right)$

Test this as a hypothesis using (7.2).

In [3]:
E_0 = np.asarray([[20, 100], [100, 1000]])
data = np.loadtxt('T3_1_HEIGHTWT.dat', dtype=int)[:, 1:]
nu, p = data.shape
data.shape

(20, 2)

In [4]:
S = np.cov(data, rowvar=False)
eig_vals, _ = np.linalg.eig(S @ np.linalg.pinv(E_0))
eig_vals = eig_vals[:p]

In [5]:
u = nu * (np.sum(eig_vals - np.log(eig_vals)) - p)
u

11.677624284744414

In [6]:
u_t = (1 - (1 / (6 * nu - 1) * (2 * p + 1 - (2 / (p + 1))))) * u
u_t

11.252388666532433

In [7]:
df = (0.5 * p * (p + 1))
xi_critical = stats.chi2.ppf(1 - alpha, df=df)
xi_critical

7.814727903251179

In [8]:
if u_t > xi_critical:
    print('Reject H_0')
else:
    print('Accept H_0')

Reject H_0


## 7.15

$H_{0} : \sum = σ^2I$

and

$H_{0} : C \sum C^{'} = σ^2I$

for the calculator speed data of Table 6.12.

In [3]:
data = np.loadtxt('T6_12_CALCSPD.DAT', dtype=int)
n, p = data.shape
nu = n - 1
data.shape

(5, 4)

part I

In [4]:
S = np.cov(data, rowvar=False)

In [5]:
u = (p ** p)  * (np.linalg.det(S) / (np.trace(S) ** p)) 
u_t = -(nu - (2 * (p**2) + p + 2) / (6 * p)) * np.log(u)
u, u_t

(5.9358646550531846e-05, 23.51878917015389)

In [6]:
eig_vals, _ = np.linalg.eig(S)
eig_vals = eig_vals[:p]

In [7]:
u = (p ** p)  * (np.prod(eig_vals) / (np.sum(eig_vals) ** p)) 
u_t = - (nu - (2 * (p**2) + p + 2) / (6 * p)) * np.log(u)
u, u_t

(5.935864655048137e-05, 23.51878917015594)

In [8]:
df = (0.5 * p * (p + 1) - 1)
xi_critical = stats.chi2.ppf(1 - alpha, df=df)
xi_critical

16.918977604620448

In [9]:
if u_t >= xi_critical:
    print('Reject H_0')
else:
    print('Accept H_0')

Reject H_0


part II

In [10]:
C = np.asarray([
    [3/sqrt(12), -1/sqrt(12), -1/sqrt(12), -1/sqrt(12)],
    [0, 2/sqrt(6), -1/sqrt(6), -1/sqrt(6)],
    [0, 0, 1/sqrt(2), -1/sqrt(2)],
])
S = np.cov(data, rowvar=False)
S_ort = C @ S @ C.T
p_ort = p - 1

In [11]:
u = (p_ort ** p_ort)  * (np.linalg.det(S_ort) / (np.trace(S_ort) ** p_ort)) 
u_t = - (nu - (2 * (p_ort**2) + p_ort + 2) / (6 * p_ort)) * np.log(u)
u, u_t

(0.47090219714321957, 2.0501187744088436)

In [12]:
df = (0.5 * p_ort * (p_ort + 1) - 1)
xi_critical = stats.chi2.ppf(1 - alpha, df=df)
xi_critical

11.070497693516351

In [13]:
if u_t >= xi_critical:
    print('Reject H_0')
else:
    print('Accept H_0')

Accept H_0


## 7.28

Test independence of $(y_1, y_2)$ and $(x_1, x_2)$ for the sons data in Table 3.7.

In [14]:
data = np.loadtxt('T3_7_SONS.DAT', dtype=int)
k = 2
n, p = data.shape
data.shape

(25, 4)

In [15]:
son_a = data[:, :2]
n_a, p_a = son_a.shape

son_b = data[:, 2:]
n_b, p_b = son_b.shape

nu = min(n_a, n_b) - k

In [16]:
S = np.cov(data, rowvar=False)
s_xx = S[:2, :2]
s_yy = S[-2:, -2:]

In [17]:
L = np.linalg.det(S) / (np.linalg.det(s_xx) * np.linalg.det(s_yy))
a2 = p**2 - (p_a ** 2 + p_b ** 2)
a3 = p**3 - (p_a ** 3 + p_b ** 3)
f = a2 * 0.5
c = 1 - (2 * a3 + 3 * a2) / (12 * f * nu)
L_t = -nu * c * np.log(L)
L, L_t

(0.37716288143958254, 19.989101840898154)

In [18]:
xi_critical = stats.chi2.ppf(1 - alpha, df=f)
xi_critical

9.487729036781154

In [19]:
if L_t > xi_critical:
    print('Reject H_0')
else:
    print('Accept H_0')

Reject H_0
