In [1]:
import numpy as np
import pandas as pd
from scipy import stats as st

<img src="figs/chi2.png" alt="Drawing" style="height: 200px;"/>

In [2]:
df = pd.DataFrame.from_dict({"hit" : [7, 43],
                             "smashes" : [16, 34],
                             "control" : [6, 44],
                             "response" : ["Yes", "No"]}).set_index("response")
df

Unnamed: 0_level_0,control,hit,smashes
response,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Yes,6,7,16
No,44,43,34


#### Calculate expected value
* $e_{rc}=\frac{n_r \times n_c}{n}$
* Degree of freedom: $(n_r-1)\times(n_c-1)$

In [3]:
n = sum(sum(df.values))
nr, nc = df.shape

In [4]:
col_sum = np.asmatrix(df.sum(axis=0))

row_prop = np.asmatrix(df.sum(axis=1) / sum(df.sum(axis=1))).T

e = np.asarray(row_prop * col_sum)

##### Calculate $\chi^2$

In [5]:
o = np.asarray(np.asmatrix(df))

In [6]:
chi2 = sum(sum((e - o) ** 2 / e))
chi2

7.779994300370477

In [7]:
# note: critical value for 0.05 alpha is 5.99
p = 1 - st.chi2.cdf(chi2, df=2)
p

0.02044540430346964

In [8]:
alpha = 0.05
if p < alpha:
    print("reject null, with p-val = %.5f"%(p))
else:
    print("cannot reject null, with p-val = %.5f"%(p))

reject null, with p-val = 0.02045


##### Effect size
* Cramer's v: $\sqrt{\frac{\chi^2}{n(k-1)}}$, where $k = \min(n_r, n_c)$.
* Analogous to Cohen's d.

In [9]:
np.sqrt(chi2 / (n * (min(nr, nc) - 1)))

0.2277424612783846

##### Use scipy

In [10]:
chi2, p, dof, e = st.chi2_contingency(o)

In [11]:
p

0.020445404303469625