In [None]:
# standard library imports

# 3rd party library imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

sns.set()

We begin by reading the data and summarizing the variables.

In [None]:
df = pd.read_csv('case0601.csv')
df.head()

In [None]:
df.groupby('Handicap').describe()

## Robustness to Assumptions

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=[12, 6.4])
_ = sns.boxplot(data=df, x='Handicap', y='Score', ax=axes[0])
_ = sns.kdeplot(data=df, x='Score', hue='Handicap', ax=axes[1])

In [None]:
sm.graphics.qqplot(df['Score'], fit=True, line='45')
plt.show()

There do not appear to be any violations of normality, equal variance, or independence.

# Are any handicap groups different?

$
\begin{align}
H_0: &\: \mu_{amputee} = \mu_{crutches} = \mu_{hearing} = \mu_{none} = \mu_{wheelchair} \\
H_a: &\: \text{at least one} \: \mu_i \ne \mu_j \: \text{for} \: i, j \in \{amputee, crutches, hearing, wheelchair\} \\
\end{align}
$

In [None]:
args = [grp['Score'] for _, grp in df.groupby('Handicap')]
scipy.stats.f_oneway(*args)

This is reasonable evidence that at least one group may be different.

Display 6.4:  compute the contrast between the wheelchair and crutches mean and the amputee and hearing means.


In [None]:
num = ((grp_summary['count'] - 1) * grp_summary['std'] ** 2).sum()
dof = grp_summary['count'].sum() - len(grp_summary)
sp = np.sqrt(num / dof)
print(sp, dof)

The coefficients for the linear combination are $C_1 = -\frac{1}{2}$, $C_2 = \frac{1}{2}$, $C_3 = -\frac{1}{2}$, $C_4 = 0$, $C_5 = \frac{1}{2}$

Estimate the linear combination.

In [None]:
grp_summary['C'] = [-0.5, 0.5, -0.5, 0, 0.5]
grp_summary

In [None]:
g = (grp_summary['mean'] * grp_summary['C']).sum()
g

Find the standard error of the estimate.

In [None]:
se = sp * np.sqrt((grp_summary['C'] ** 2 / grp_summary['count']).sum())
se

Construct a 95% confidence interval.

In [None]:
scipy.stats.t.interval(0.95, dof, loc=g, scale=se)