In [None]:
import pathlib

import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

sns.set()
pd.options.display.float_format = "{:.1f}".format

We begin by reading the data and summarizing the variables.

In [None]:
df = pd.read_csv('case0502.csv')
print(df.head())
df.groupby('Judge').describe()

<ol type="a">
    <li>Is there evidence that women are underrepresented on the Spock judge’s venires compared to the venires of the other judges?</li>
    <li>Is there any evidence that there are differences in women’s representation  in the venires of the other six judges?</li>
</ol>

In [None]:
_ = sns.boxplot(data=df, x='Judge', y='Percent')

## Is any judge different?

$
\begin{align}
H_0: &\mu_i = \mu_j \text{for} \: i, j \in \{A, B, C, D, E, F, Spock\} \\
H_a: &\text{at least one} \: \mu_i \ne \mu_j \: \text{for} \: i, j \in \{A, B, C, D, E, F, Spock\} \\
\end{align}
$

In [None]:
pd.options.display.float_format = "{:.4f}".format
model = smf.ols('Percent ~ Judge', data=df).fit()
adf = sm.stats.anova_lm(model)
print(adf)

There is strong evidence that at least one judge's percentages are different ($F_{6,39}$ = 6.7184, $p$-value = 0.0001).

In [None]:
sse = adf.loc['Residual', 'sum_sq']
dof_sse = adf.loc['Residual', 'df']

## Are judges A-F different?

$
\begin{align}
H_0: &\mu_A = \mu_B = \mu_C = \mu_D = \mu_E = \mu_F; \mu_{spock} \\
H_a: &\text{at least one} \: \mu_i \ne \mu_j \: \text{for} \: i, j \in \{A, B, C, D, E, F, Spock\} \\
\end{align}
$

The necessary data values for this hypothesis test are not directly produced by **sm.stats.anova_lm** and **smf.ols**.  The sum of squares (SSE) for the alternative hypothesis (full model in the book's parliance) is provided above.  A separate ANOVA test based on 

In [None]:
df['Judge2'] = df['Judge']
df.loc[df['Judge'] != "Spock's", 'Judge2'] = 'Other'
model = smf.ols('Percent ~ C(Judge2, Treatment)', data=df).fit()
adf2 = sm.stats.anova_lm(model)
print(adf2)

There is strong evidence that the percentage of women on juries of at least one judge $\in$ {A, B, C, D, E, F} differs from the others ($F_{44,1}$ = 32.1454, $p$-value < 0.0001).

In [None]:
sns.boxplot(x='Judge', y='Percent', data=df)

In [None]:
for judge, grp in df.groupby('Judge'):
    sns.distplot(grp['Percent'], hist=False, rug=True, label=judge)

$\it{H_0}:  \mu_{Spock} = \mu_A = \mu_B = \mu_C = \mu_D = \mu_E = \mu_F$

$\it{H_a}:$  at least one $\mu$ is different

First, we perform the test manually.

In [None]:
a = np.empty((3, 5))
a[:] = NaN
index = ['Between Groups', 'Within Groups', 'Total']
columns = ['Sum of Squares', 'd.f.', 'Mean Square', 'F Statistic', 'p-value']
dfss = pd.DataFrame(a, columns=columns, index=index)

dfss.at['Total', 'Sum of Squares'] = ((df.Percent - mu) ** 2).sum()

ssg = [((g.Percent - g.Percent.mean()) ** 2).sum() for idx, g in df.groupby('Judge')]
dfss.at['Within Groups', 'Sum of Squares'] = sum(ssg)

ssb = [
    ((g['Percent'].mean() - mu) ** 2) * len(g) 
    for _, g in df.groupby('Judge')
] 
dfss.at['Between Groups', 'Sum of Squares'] = sum(ssb)

n = len(df)
I = len(df.Judge.unique())
dfss['d.f.'] = [I - 1, n - I, n - 1]

dfss['Mean Square'] = dfss['Sum of Squares'] / dfss['d.f.']

dfss.at['Between Groups', 'F Statistic'] = dfss.at['Between Groups', 'Mean Square'] / dfss.at['Within Groups', 'Mean Square']

dfss.at['Between Groups', 'p-value'] = scipy.stats.f.sf(dfss.at['Between Groups', 'F Statistic'],
                                                        dfss.at['Between Groups', 'd.f.'],
                                                        dfss.at['Within Groups', 'd.f.'])

dfss

In [None]:
judges = df.Judge.unique()
pargs = [df.loc[df.Judge == judge, 'Percent'] for judge in judges]
scipy.stats.f_oneway(*pargs)

The p-value of 6.1e-5 is sufficient to reject the null hypothesis and conclude that the mean percentage of at least one judge is not the same as the mean of the other judges.

We can also do this with statsmodels.

In [None]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

model = ols('Percent ~ Judge', data=df).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
print(aov_table)



$\it{H_0}:  \mu_{Spock} = \mu_0, where \mu_0 = \mu_A = \mu_B = \mu_C = \mu_D = \mu_E = \mu_F$

$\it{H_a}:  \mu_{Spock} \neq \mu_0$


In [None]:
a = np.empty((3, 5))
a[:] = NaN
index = ['Between Groups', 'Within Groups', 'Total']
columns = ['Sum of Squares', 'd.f.', 'Mean Square', 'F Statistic', 'p-value']
dfss = pd.DataFrame(a, columns=columns, index=index)

n = len(df)
I = len(df.Judge.unique())
dfss['d.f.'] = [I - 2, n - I, n - 2]
dfss

In order to facilitate further computations, we need to be able to apply a grouping for "Spock" or "not Spock".

In [None]:
df2 = df.set_index('Judge')
fcn = lambda x: x if x == "Spock's" else "Other"

ss = [((g.Percent - g.Percent.mean()) ** 2).sum() for idx, g in df2.groupby(fcn)]
dfss.at['Total', 'Sum of Squares'] = sum(ss)
dfss

In [None]:
ss = [((g.Percent - g.Percent.mean()) ** 2).sum() for idx, g in df.groupby('Judge')]
dfss.at['Within Groups', 'Sum of Squares'] = sum(ss)
dfss

In [None]:
dfss.at['Between Groups', 'Sum of Squares'] = dfss.at['Total', 'Sum of Squares'] - dfss.at['Within Groups', 'Sum of Squares'] 
dfss



In [None]:
dfss['Mean Square'] = dfss['Sum of Squares'] / dfss['d.f.']

dfss.at['Between Groups', 'F Statistic'] = dfss.at['Between Groups', 'Mean Square'] / dfss.at['Within Groups', 'Mean Square']

dfss.at['Between Groups', 'p-value'] = scipy.stats.f.sf(dfss.at['Between Groups', 'F Statistic'],
                                                        dfss.at['Between Groups', 'd.f.'],
                                                        dfss.at['Within Groups', 'd.f.'])

dfss