# Example 6.12

In [1]:
import numpy as np
from collections import namedtuple
from IPython.display import display, Math
from scipy import linalg as la
from scipy import stats

In [2]:
# Single place to store nursing home group data.
Data = namedtuple('Data', ['n', 'xbar', 'S'])

- $X_{1} = $ cost of nursing labor
- $X_{2} = $ cost of dietary labor
- $X_{3} = $ cost of plant operation and maintenance labor
- $X_{4} = $ cost of housekeeping and laundry labor

In [3]:
# For each cost category (within each group), attach the index the cost value can be found.
cost_category_index = {'cost of nursing labor': 0,
                       'cost of dietary labor': 1,
                       'cost of plant operation and maintenance labor': 2,
                       'cost of housekeeping and laundry labor': 3,}
p = len(cost_category_index)

Group
- $\ell = 1$: private
- $\ell = 2$: nonprofit
- $\ell = 3$: government

In [4]:
group_category_index = {'private': 1,
                        'nonprofit': 2,
                        'government':3}

In [5]:
def create_nursing_home_data() -> Data:
    n1, n2, n3 = 271, 138, 107
    n = n1 + n2 + n3
    p, g = 4, 3
    xbar1 = np.array([2.066, 0.480, 0.082, 0.360])[:,np.newaxis]
    xbar2 = np.array([2.167, 0.596, 0.124, 0.418])[:,np.newaxis]
    xbar3 = np.array([2.273, 0.521, 0.125, 0.383])[:,np.newaxis]

    S1 = np.array([[ 0.291, 0.000, 0.000, 0.000],
                   [-0.001, 0.011, 0.000, 0.000],
                   [ 0.002, 0.000, 0.001, 0.000],
                   [ 0.010, 0.003, 0.000, 0.010]])
    S1 = np.tril(S1) + np.tril(S1, -1).T

    S2 = np.array([[0.561, 0.000, 0.000, 0.000],
                   [0.011, 0.025, 0.000, 0.000],
                   [0.001, 0.004, 0.005, 0.000],
                   [0.037, 0.007, 0.002, 0.019]])
    S2 = np.tril(S2) + np.tril(S2, -1).T

    S3 = np.array([[0.261,  0.000, 0.000, 0.000],
                   [0.030,  0.017, 0.000, 0.000],
                   [0.003, -0.000, 0.004, 0.000],
                   [0.018,  0.006, 0.001, 0.013],])
    S3 = np.tril(S3) + np.tril(S3, -1).T

    # Store all of our nursing home data in a dictionary.
    nursing_home_data = {'private': Data(n=271, xbar=xbar1, S=S1),
                        'nonprofit': Data(n=138, xbar=xbar2, S=S2),
                        'government': Data(n=107, xbar=xbar3, S=S3)
                        }
    return nursing_home_data

In [6]:
nursing_home_data = create_nursing_home_data()

In [7]:
# The number of groups.
g = len(nursing_home_data)

# The number of measurements with each group.
p = len(cost_category_index)

# The total number of observations.
n = sum([t.n for t in nursing_home_data.values()])

# The mean vector (across all groups).
xbar = sum([t.n*t.xbar for t in nursing_home_data.values()])/n

In [8]:
S_pooled = sum([(data.n - 1)*data.S for data in nursing_home_data.values()])/(n-g)

In [9]:
u = (sum([1/(t.n - 1) for t in nursing_home_data.values()])  - (1/(n-g)))*((2*p**2 + 3*p - 1)/(6*(p+1)*(g-1)))
display(Math('u ='
             r'\left[ \sum_{\ell} \frac{1}{(n_{\ell} - 1)} - \frac{1}{\sum_{\ell}(n_{\ell} - 1)} \right]'
             r'\left[ \frac{2p^{2} + 3p - 1}{6(p+1)(g-1)} \right] = '
             f'{u:.4f}'
             ))

<IPython.core.display.Math object>

In [10]:
# In the book, M is 289.3, and here it's 244.146. The difference has to do with np.log(la.det(S_pooled)).
# We don't have enough precision for each group sample covariance matrix.
M = (n-g)*np.log(la.det(S_pooled)) - sum([(data.n - 1)*np.log(la.det(data.S)) for data in nursing_home_data.values()])
display(Math(r' M = '
             r'\left[ \sum_{\ell} (n_{\ell} - 1) \right]'
             r'\ln \left| \textbf{S}_{\text{pooled}} \right| -'
             r'\sum_{\ell} \left[ (n_{\ell} - 1) \ln \left| \textbf{S}_{\ell} \right| \right]'
             '='
             f'{np.log(la.det(S_pooled)):.3f} - ({sum([(data.n - 1)*np.log(la.det(data.S)) for data in nursing_home_data.values()]):.3f})'
             '='
             f'{M:.1f}'
             ))

<IPython.core.display.Math object>

In [11]:
C = (1-u)*M
display(Math('C ='
             '(1 - u)M ='
             f'(1 - {u:.4f}){M:.1f} ='
             f'{C:.1f}'
             ))

<IPython.core.display.Math object>

In [12]:
nu = (1/2)*p*(p+1)*(g-1)
display(Math(r'\nu ='
             r'\frac{1}{2}p(p+1)(g-1) ='
             f'{nu}'
             ))

<IPython.core.display.Math object>

In [13]:
alpha = 0.001
chi2_crit = stats.chi2.ppf(1-alpha, nu)
display(Math(r'X_{\text{crit}}^{2} ='
             r'\chi_{ \nu }^{2}'
             '='
             r'\chi_{ p(p+1)(g-1)/2 }^{2}'
             '='
             fr'\chi_{{ {p}({p}+1)({g}-1)/2}}^{{2}} \left( {alpha} \right)'
             '='
             fr'\chi_{{ {nu:.0f} }}^{{2}} \left( {alpha} \right)'
             '='
             f'{chi2_crit:.3f}'
             ))

<IPython.core.display.Math object>

In [14]:
if C > chi2_crit:
    display(Math(fr'\text{{We have that }} C = {C:.3f} > '
                 r'X_{\text{crit}}^{2}'
                 '='
                 fr'\chi^{{2}}_{{{nu:.0f}}} \left( {alpha} \right) = {chi2_crit:.3f}'
                 r'\text{{, so we would reject the null hypothesis that }} '
                 r'\bm{\Sigma}_{1} = \bm{\Sigma}_{2} = \bm{\Sigma}_{3} = \bm{\Sigma}'
                 ))
else:
    display(Math(fr'\text{{We have that }} C = {C:.3f} \leq '
                 r'X_{\text{crit}}^{2}'
                 '='
                 fr'\chi^{{2}}_{{{nu}}} \left( {alpha} \right) = {chi2_crit:.3f}'
                 r'\text{{, so we would fail to reject the null hypothesis that }} '
                 r'\bm{\Sigma}_{1} = \bm{\Sigma}_{2} = \bm{\Sigma}_{3} = \bm{\Sigma}'
                 ))

<IPython.core.display.Math object>