# Paired 2 x 2 cross table
Alternative of z-test and chi-square test

In [None]:
# Enable the commands below when running this program on Google Colab.
# !pip install arviz==0.7
# !pip install pymc3==3.8
# !pip install Theano==1.0.4

import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pymc3 as pm

import math

plt.style.use('seaborn-darkgrid')
np.set_printoptions(precision=3)
pd.set_option('display.precision', 3)

## Q. Agreement/disagreement with proposition A and B was gathered. Analyse the relationship of agreement/disagreement with both two propositions.

In [None]:
data = pd.DataFrame([[55, 16], [14, 35]], columns=['B Agree', 'B Disagree'], index=['A Agree', 'A Disagree'])
observed = [data['B Agree']['A Agree'], 
            data['B Disagree']['A Agree'], 
            data['B Agree']['A Disagree'],
            data['B Disagree']['A Disagree']]
display(data)
N = data.sum().sum()

In [None]:
with pm.Model() as model:
  # Prior distribution
  p_ = pm.Uniform('p_', 0, 1, shape=4)
  p = pm.Deterministic('p', p_ / pm.math.sum(p_))

  # Likelihood
  x = pm.Multinomial('x', n=N, p=p, observed=observed)

  # Marginal probability
  p1d = pm.Deterministic('p1d', p[0] + p[1])  # p1. = p11 + p12
  p2d = pm.Deterministic('p2d', p[2] + p[3])  # p2. = p21 + p22

  pd1 = pm.Deterministic('pd1', p[0] + p[2])  # p.1 = p11 + p21
  pd2 = pm.Deterministic('pd2', p[1] + p[3])  # p.2 = p12 + p22

  # Pearson's residual
  pp = [p1d * pd1, p1d * pd2,
        p2d * pd1, p2d * pd2]
  e = pm.Deterministic('e', (p - pp) / pm.math.sqrt(pp))

  # Cramer's association coefficient
  V = pm.Deterministic('V', pm.math.sqrt(pm.math.sum(e**2)))

  trace = pm.sample(21000, chains=5)

In [None]:
chain = trace[1000:]
pm.traceplot(chain)
plt.show()

In [None]:
pm.summary(chain, var_names=['p', 'e', 'V'])

### Independence and association

In [None]:
# A_i and B_j is independent -> e_ij is 0.
# Positive cells are observed in a higher proportion than in the independent case (value is 0).
# Negative cells are observed in a lower proportion than in the independent case (value is 0).
plt.boxplot(
    [chain['e'][:,0],
     chain['e'][:,1],
     chain['e'][:,2],
     chain['e'][:,3]],
     labels=['e11', 'e12', 'e21', 'e22'])
plt.show()

# e11 > 0: A person who agrees with 'A' is likely to agree with 'B'.
# e22 > 0: A person who disagrees with 'B' is likely to disagree with 'B'.
# e12 < 0: A person who agrees with 'A' is less likely to disagree with 'B'.
# e21 < 0: A person who disagrees with 'A' is less likely to agree with 'B'.

In [None]:
egz = pd.DataFrame(
    [[(chain['e'][:,0] > 0).mean(), (chain['e'][:,1] > 0).mean()],
     [(chain['e'][:,2] > 0).mean(), (chain['e'][:,3] > 0).mean()]],
     columns=['B Agree', 'B Disagree'], index=['A Agree', 'A Disagree']
)

elz = pd.DataFrame(
    [[(chain['e'][:,0] < 0).mean(), (chain['e'][:,1] < 0).mean()],
     [(chain['e'][:,2] < 0).mean(), (chain['e'][:,3] < 0).mean()]],
     columns=['B Agree', 'B Disagree'], index=['A Agree', 'A Disagree']
)

print('e > 0')
display(egz)

print('e < 0')
display(elz)