# Exercise 5.30

In [1]:
import pathlib
import numpy as np
import pandas as pd
import numpy.linalg as la
from IPython.display import display, Math
from scipy import stats

Energy consumption in 2001, by state, from the major sources
- $x_{1}$ = petroleum
- $x_{2}$ = natural gas
- $x_{3}$ = hydroelectric power
- $x_{4}$ = nuclear electric power

In [2]:
xbar = np.array([[0.766], [0.508], [0.438], [0.161]])
S = np.array([[0.856, 0.635, 0.173, 0.096],
              [0.635, 0.568, 0.128, 0.067],
              [0.173, 0.127, 0.171, 0.039],
              [0.096, 0.067, 0.039, 0.043]])
n, p = 50, 4

## (a)

Large sample Bonferroni confidence intervals (use normal distribution)
$$
\textbf{a}^{\prime} \bar{\textbf{x}}
\pm
z(\alpha/2m)
\sqrt{\frac{\textbf{a}^{\prime} \textbf{S} \textbf{a}}{n}}
$$

Setup a matrix like what's in (3-38) on page 144
$$
\textbf{A}
=
\begin{bmatrix}
    \textbf{a}_{1}^{\prime} \\
    \textbf{a}_{2}^{\prime} \\
    \textbf{a}_{3}^{\prime} \\
    \textbf{a}_{4}^{\prime} \\
    \textbf{a}_{5}^{\prime} \\
    \textbf{a}_{6}^{\prime}
\end{bmatrix}
=
\begin{bmatrix}
    1 &  0 & 0 & 0 \\
    0 &  1 & 0 & 0 \\
    0 &  0 & 1 & 0 \\
    0 &  0 & 0 & 1 \\
    1 &  1 & 1 & 1 \\
    1 & -1 & 0 & 0
\end{bmatrix}
$$

In [3]:
# Stack all the different vectors a we want into the matrix A.
A = np.vstack([np.eye(4),
               np.array([1, 1, 1, 1]).reshape(1, p),
               np.array([1, -1, 0, 0]).reshape(1, p)
               ])
A

array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1., -1.,  0.,  0.]])

In [4]:
alpha = 0.05
m = A.shape[0]
z_crit = stats.norm().ppf(1-alpha/(2*m))

In [5]:
bonf_lrg_ci = A @ xbar + np.array([-1, 1]) * z_crit * np.sqrt(np.diag(A @ S @ A.T).reshape(m, 1)/ n)
bonf_lrg_ci

array([[0.4208014 , 1.1111986 ],
       [0.22680583, 0.78919417],
       [0.28371269, 0.59228731],
       [0.08363111, 0.23836889],
       [1.13494782, 2.61105218],
       [0.11158266, 0.40441734]])

Alternatively, we could have also looped through the rows of $A$ and used the formula above
```python
for i in range(A.shape[0]):
    print(A[i,:] @ xbar + np.array([-1, 1]) * stats.norm().ppf(1-alpha/(2*m)) * np.sqrt(A[i,:] @ S @ A[i,:].T / n))
```

In [6]:
for i in range(bonf_lrg_ci.shape[0]):
    display(Math(fr'{(A[i] @ xbar).item():.2f} \pm {stats.norm.ppf(1-alpha/(2*m)):.2f} '\
                 fr'\frac{{\sqrt{{{A[i] @ S @ A[i].T:.2f}}}}}{{\sqrt{{{n}}}}}\hspace{{0.5cm}} \text{{contains}}'\
                 fr'\hspace{{0.5cm}}\mu_{i+1}\hspace{{1cm}}\text{{or}}'\
                 fr'\hspace{{1cm}}{bonf_lrg_ci[i,0]:.2f} \leq \mu_{i+1} '\
                 fr'\leq {bonf_lrg_ci[i,1]:.2f}'))

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

where $\mu_{5} = \mu_{1} + \mu_{2} + \mu_{3} + \mu_{4}$ and $\mu_{6} = \mu_{1} - \mu_{2}$.

## (b)

Large sample simultaneous $T^{2}$ confidence intervals (use $\chi^{2}$ distribution)
$$
\textbf{a}^{\prime} \bar{\textbf{x}}
\pm
\sqrt{\chi_{p}^{2}(\alpha/2m)}
\sqrt{\frac{\textbf{a}^{\prime} \textbf{S} \textbf{a}}{n}}
$$

In [7]:
chi2_crit = stats.chi2(df=p).ppf(1-alpha/m)

In [8]:
T2_lrg_ci = A @ xbar + np.array([-1, 1]) * np.sqrt(chi2_crit) * np.sqrt(np.diag(A @ S @ A.T).reshape(m, 1)/ n)
T2_lrg_ci

array([[0.28178339, 1.25021661],
       [0.11356361, 0.90243639],
       [0.22157827, 0.65442173],
       [0.05247319, 0.26952681],
       [0.83772023, 2.90827977],
       [0.05261763, 0.46338237]])

In [9]:

for i in range(T2_lrg_ci.shape[0]):
    display(Math(fr'{(A[i] @ xbar).item():.2f} \pm \sqrt{{{stats.chi2(df=p).ppf(1-alpha/m):.2f}}} '\
                 fr'\frac{{\sqrt{{{A[i] @ S @ A[i].T:.2f}}}}}{{\sqrt{{{n}}}}}\hspace{{0.5cm}} \text{{contains}}'\
                 fr'\hspace{{0.5cm}}\mu_{i+1}\hspace{{1cm}}\text{{or}}'\
                 fr'\hspace{{1cm}}{T2_lrg_ci[i,0]:.2f} \leq \mu_{i+1} '\
                 fr'\leq {T2_lrg_ci[i,1]:.2f}'))

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

where $\mu_{5} = \mu_{1} + \mu_{2} + \mu_{3} + \mu_{4}$ and $\mu_{6} = \mu_{1} - \mu_{2}$.

In [10]:
# How much shorter is the Bonferroni interval than the T^{2}?
z_crit / np.sqrt(chi2_crit)

0.7129011885927707