In [None]:
from IPython.display import display, Markdown, Latex

import os

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
if os.path.exists('src'):
    path_dunnetts_table = "src/Dunnetts_Table.csv"

    %load_ext autoreload
    %autoreload 2

    from src.stattests import f_test, dunnetts_q_value, dunnetts_test, _make_dunnetts_q_value
else:
    path_dunnetts_table = "../input/dunnetts-table/Dunnetts_Table.csv"

    !git clone https://github.com/P-Mihail/medstats_ex.git
    from medstats_ex.src.stattests import f_test, dunnetts_q_value, dunnetts_test, _make_dunnetts_q_value # type: ignore

`Dunnetts_Table.csv` provide critical values for various values of alpha for the two-tailed Dunnett’s test.

columns:
* $\nu$ - degrees of freedom (N – m), where N - sum of the observations in all groups, m – number of groups
* $\alpha$ - significance level
* *l* - number of groups including control

# 1. Table view variations

In [None]:
# raw pd.Series from csv file
df = pd.read_csv(path_dunnetts_table, index_col=[0, 1, 2]).squeeze()
display(Markdown("---\n\n### *1. raw pd.Series from csv file:*"))
display(df)
display(Markdown(f"For example: q($\\nu$=5, $\\alpha$=0.05, *l*=6) = `df.loc[5.,0.05,6]` = {df.loc[5.,0.05,6]}"))

# like in statistics books
df = pd.read_csv(path_dunnetts_table, index_col=[0, 1, 2]).unstack(-1).droplevel(0, axis=1)
display(Markdown("---\n\n### *2. like in statistics books*"))
display(df)
display(Markdown(f"For example: q($\\nu$=5, $\\alpha$=0.05, *l*=6) = `df.loc[(5.,0.05), 6]` = {df.loc[(5.,0.05), 6]}"))

# separate tables by alpha
df = pd.read_csv(path_dunnetts_table, index_col=[1, 0, 2]).unstack(-1).droplevel(0, axis=1)
display(Markdown("---\n\n### *3. separate tables by alpha*"))
df_001, df_005, df_01 = (df.loc[a] for a in df.index.unique(0))
display(Markdown("For example $\\alpha$=0.05 `df_005`"))
display(df_005)
display(Markdown(f"For example: q($\\nu$=5, $\\alpha$=0.05, *l*=6) = `df_005.loc[5., 6]` = {df_005.loc[5., 6]}"))

In [None]:
display(Markdown(f"Domain of the table:\
    \n* $\\nu$: {list(int(x) for x in df.index.unique(1).values[:-1])}\
    \n* $\\alpha$: {df.index.unique(0).values}\
    \n* l: {df.columns.values}"))

# 2. Interpolation of intermediate values

In [None]:
# Test
assert dunnetts_q_value(5, 0.05, 6) == 3.62

# 3. Usage example

src: https://www.statology.org/dunnetts-test-r/

Suppose a teacher wants to know whether or not two new studying techniques have the potential to increase exam scores for her students. To test this, she randomly splits her class of 30 students into the following three groups:

* Control Group: 10 students
* New Study technique 1: 10 students
* New Study Technique 2: 10 students

After one week of using their assigned study technique, each student takes the same exam.

In [None]:
data = {"control": [76, 77, 77, 81, 82, 82, 83, 84, 85, 89], 
        "new_1": [81, 82, 83, 83, 83, 84, 87, 90, 92, 93],
        "new_2": [77, 78, 79, 88, 89, 90, 91, 95, 95, 98]}

df = pd.concat([pd.Series(v, name=k) for k,v in data.items()], axis=1)
display(df)
agg = df.agg(['count', np.mean, np.std])  # type: ignore
display(agg)

In [None]:
plt.style.use('ggplot')
sns.boxplot(data=df)

\* The first step is to test the hypothesis that there are statistical differences in all groups.

In [None]:
f_test(*agg.values)

In [None]:
dunnetts_test(*agg.values, names=list(df.columns), ctrl_group=0)

* The control group and the first group have no statistical differences.
* The control group and the second group probably have statistical differences.