# Example 6.11

In [1]:
import numpy as np
from collections import namedtuple
from IPython.display import display, Math
from scipy import stats
from typing import Literal

In [2]:
# Single place to store nursing home group data.
Data = namedtuple('Data', ['n', 'xbar', 'S'])

- $X_{1} = $ cost of nursing labor
- $X_{2} = $ cost of dietary labor
- $X_{3} = $ cost of plant operation and maintenance labor
- $X_{4} = $ cost of housekeeping and laundry labor

In [3]:
# For each cost category (within each group), attach the index the cost value can be found.
cost_category_index = {'cost of nursing labor': 0,
                       'cost of dietary labor': 1,
                       'cost of plant operation and maintenance labor': 2,
                       'cost of housekeeping and laundry labor': 3,}
p = len(cost_category_index)

Group
- $\ell = 1$: private
- $\ell = 2$: nonprofit
- $\ell = 3$: government

In [4]:
group_category_index = {'private': 1,
                        'nonprofit': 2,
                        'government':3}

In [5]:
def create_nursing_home_data() -> Data:
    n1, n2, n3 = 271, 138, 107
    n = n1 + n2 + n3
    p, g = 4, 3
    xbar1 = np.array([2.066, 0.480, 0.082, 0.360])[:,np.newaxis]
    xbar2 = np.array([2.167, 0.596, 0.124, 0.418])[:,np.newaxis]
    xbar3 = np.array([2.273, 0.521, 0.125, 0.383])[:,np.newaxis]

    S1 = np.array([[ 0.291, 0.000, 0.000, 0.000],
                [-0.001, 0.011, 0.000, 0.000],
                [ 0.002, 0.000, 0.001, 0.000],
                [ 0.010, 0.003, 0.000, 0.010]])
    S1 = np.tril(S1) + np.tril(S1, -1).T

    S2 = np.array([[0.561, 0.000, 0.000, 0.000],
                [0.011, 0.025, 0.000, 0.000],
                [0.001, 0.004, 0.005, 0.000],
                [0.037, 0.007, 0.002, 0.019]])
    S2 = np.tril(S2) + np.tril(S2, -1).T

    S3 = np.array([[0.261,  0.000, 0.000, 0.000],
                [0.030,  0.017, 0.000, 0.000],
                [0.003, -0.000, 0.004, 0.000],
                [0.018,  0.006, 0.001, 0.013],])
    S3 = np.tril(S3) + np.tril(S3, -1).T

    # Store all of our nursing home data in a dictionary.
    nursing_home_data = {'private': Data(n=271, xbar=xbar1, S=S1),
                        'nonprofit': Data(n=138, xbar=xbar2, S=S2),
                        'government': Data(n=107, xbar=xbar3, S=S3)
                        }
    return nursing_home_data

In [6]:
nursing_home_data = create_nursing_home_data()

In [7]:
# The number of groups.
g = len(nursing_home_data)

# The number of measurements with each group.
p = len(cost_category_index)

# The total number of observations.
n = sum([t.n for t in nursing_home_data.values()])

# The mean vector (across all groups).
xbar = sum([t.n*t.xbar for t in nursing_home_data.values()])/n

In [8]:
def construct_ci(data: Data,
                 alpha: float,
                 group1: str,
                 group2: str,
                 cost_category: str) -> np.ndarray:
    '''Construct the 100(1-\alpha)% Simultaneous Bonferroni CI for treatment means.'''
    
    # Make sure input group strings and cost category string are valid entries.
    assert group1 in group_category_index.keys(), f'Invalid group1 value: {group1}'
    assert group2 in group_category_index.keys(), f'Invalid group2 value: {group2}'
    assert cost_category in cost_category_index.keys(), f'Invalid cost category: {cost_category}'

    tau_1 = nursing_home_data.get(group1).xbar - xbar
    tau_2 = nursing_home_data.get(group2).xbar - xbar
    W = sum([(t.n - 1)*t.S for t in nursing_home_data.values()])

    # Select the variable of interest.
    group1_idx = group_category_index.get(group1)
    group2_idx = group_category_index.get(group2)
    cost_idx = cost_category_index.get(cost_category)

    diff = (tau_1[cost_idx] - tau_2[cost_idx]).item()
    std_err = np.sqrt(((1/nursing_home_data.get(group1).n) + (1/nursing_home_data.get(group2).n))*W[cost_idx, cost_idx]/(n-g))
    t_crit = stats.t.ppf(1-alpha/(p*g*(g-1)), df=n - g)
    ci = diff + np.array([-1, 1])*t_crit*std_err

    display(Math(r'\begin{array}{rl}'
             fr'\tau_{{ {group1_idx} {cost_idx+1}}} - \tau_{{ {group2_idx} {cost_idx+1}}} '
             fr'\text{{ belongs to }} '
             fr'\hat{{\tau}}_{{ {group1_idx} {cost_idx+1}}} - \hat{{\tau}}_{{ {group2_idx} {cost_idx+1}}} \pm '
             f't_{{ {n-g} }} ( {alpha/(p*g*(g-1)):.5f} ) &'
             fr'\sqrt{{ \left( \frac{{1}}{{n_{{ {group1_idx} }}}} + \frac{{1}}{{n_{{ {group2_idx} }}}} \right) \frac{{w_{{ {cost_idx+1} {cost_idx+1} }}}}{{n - g}} }} \\'
             fr' & = {diff:.3f} \pm {t_crit:.2f} ( {std_err:.5f} ) \\'
             fr' & = {diff:.3f} \pm {t_crit*std_err:.2f} \text{{, or }} ({ci[0]:.3f}, {ci[1]:.3f})'
             r'\end{array}'
             ))
    return ci

In [9]:
alpha = 0.05
ci13 = construct_ci(data=nursing_home_data, alpha=alpha, group1='private', group2='government', cost_category='cost of plant operation and maintenance labor')

<IPython.core.display.Math object>

In [10]:
ci12 = construct_ci(data=nursing_home_data, alpha=alpha, group1='private', group2='nonprofit', cost_category='cost of plant operation and maintenance labor')

<IPython.core.display.Math object>

In [11]:
ci23 = construct_ci(data=nursing_home_data, alpha=alpha, group1='nonprofit', group2='government', cost_category='cost of plant operation and maintenance labor')

<IPython.core.display.Math object>