# Wine ordered by customers who has visited "Restaurant A" twice.

In [None]:
# Enable the commands below when running this program on Google Colab.
# !pip install arviz==0.7
# !pip install pymc3==3.8
# !pip install Theano==1.0.4

import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

import pymc3 as pm

import math

plt.style.use('seaborn-darkgrid')
np.set_printoptions(precision=3)
pd.set_option('display.precision', 3)

In [None]:
a = 3  # Kinds of wines (ordered at first time)
b = 3  # Kinds of wines (ordered at second time)
wines = ['Red', 'Rose', 'White']
data = pd.DataFrame([[13, 6, 21], [7, 17, 7], [13, 6, 13]], columns=wines, index=wines)
display(data)
N = data.sum().sum()

observed = [data['Red']['Red'],
            data['Rose']['Red'],
            data['White']['Red'],
            data['Red']['Rose'],
            data['Rose']['Rose'],
            data['White']['Rose'],
            data['Red']['White'],
            data['Rose']['White'],
            data['White']['White']]
print(observed)

## Bayesian analysis

In [None]:
with pm.Model() as model:
  # Prior distribution
  p_ = pm.Uniform('p_', 0, 1, shape=(a * b))
  p = pm.Deterministic('p', p_ / pm.math.sum(p_))

  # Likelihood
  x = pm.Multinomial('x', n=N, p=p, observed=observed)

  # Marginal probability
  p1d = pm.Deterministic('p1d', p[0] + p[1] + p[2])  # p1. = p11 + p12 + p13
  p2d = pm.Deterministic('p2d', p[3] + p[4] + p[5])  # p2. = p21 + p22 + p23
  p3d = pm.Deterministic('p3d', p[6] + p[7] + p[8])  # p3. = p31 + p32 + p33

  pd1 = pm.Deterministic('pd1', p[0] + p[3] + p[6])  # p.1 = p11 + p21 + p31
  pd2 = pm.Deterministic('pd2', p[1] + p[4] + p[7])  # p.2 = p12 + p22 + p32
  pd3 = pm.Deterministic('pd3', p[2] + p[5] + p[8])  # p.3 = p13 + p23 + p33

  # Pearson's residual
  pp = [p1d * pd1, p1d * pd2, p1d * pd3, 
        p2d * pd1, p2d * pd2, p2d * pd3, 
        p3d * pd1, p3d * pd2, p3d * pd3]
  e = pm.Deterministic('e', (p - pp) / pm.math.sqrt(pp))

  # Cramer's association coefficient
  V = pm.Deterministic('V', pm.math.sqrt(pm.math.sum(e**2) / (min(a, b) - 1)))

  trace = pm.sample(21000, chains=5)

In [None]:
chain = trace[1000:]
pm.traceplot(chain)
plt.show()

In [None]:
pm.summary(chain, var_names=['p', 'V', 'p1d', 'p2d', 'p3d', 'pd1', 'pd2', 'pd3'])

### Independence and association

In [None]:
plt.boxplot(
    [chain['e'][:,0],
     chain['e'][:,1],
     chain['e'][:,2],
     chain['e'][:,3],
     chain['e'][:,4],
     chain['e'][:,5],
     chain['e'][:,6],
     chain['e'][:,7],
     chain['e'][:,8],],
     labels=['e11', 'e12', 'e13', 'e21', 'e22', 'e23', 'e31', 'e32', 'e33'])
plt.show()

In [None]:
print("Cramer's association coefficient: {:.3f}".format(chain['V'].mean()))
# 1.0 - 0.5: strong association
# 0.5 - 0.25: association
# 0.25 - 0.1: weak association
# 0.1 > : very weak association
# 0: no association

In [None]:
egz = pd.DataFrame(
    [[(chain['e'][:,0] > 0).mean(), (chain['e'][:,1] > 0).mean(), (chain['e'][:,2] > 0).mean()],
     [(chain['e'][:,3] > 0).mean(), (chain['e'][:,4] > 0).mean(), (chain['e'][:,5] > 0).mean()],
     [(chain['e'][:,6] > 0).mean(), (chain['e'][:,7] > 0).mean(), (chain['e'][:,8] > 0).mean()]
    ],
    columns=wines,
    index=wines
)

elz = pd.DataFrame(
    [[(chain['e'][:,0] < 0).mean(), (chain['e'][:,1] < 0).mean(), (chain['e'][:,2] < 0).mean()],
     [(chain['e'][:,3] < 0).mean(), (chain['e'][:,4] < 0).mean(), (chain['e'][:,5] < 0).mean()],
     [(chain['e'][:,6] < 0).mean(), (chain['e'][:,7] < 0).mean(), (chain['e'][:,8] < 0).mean()]
    ],
    columns=wines,
    index=wines
)

print('e > 0')
display(egz)
print('e < 0')
display(elz)

### RQ1: 一回目に「ロゼ」を選んだ客は二回目も「ロゼ」を選び「白」を避けること、一回目に「赤」を選んだ客は二回目は「ロゼ」を避けること

In [None]:
e_red_red = chain['e'][:,0]
e_red_rose = chain['e'][:,1]
e_red_white = chain['e'][:,2]

e_rose_red = chain['e'][:,3]
e_rose_rose = chain['e'][:,4]
e_rose_white = chain['e'][:,5]

e_white_red = chain['e'][:,6]
e_white_rose = chain['e'][:,7]
e_white_white = chain['e'][:,8]

val_1 = (e_rose_rose > 0).mean() * (e_rose_white < 0).mean() * (e_red_rose < 0).mean()
print('prob(rose -> rose & rose !-> white & red !-> rose) = {:.3f}'.format(val_1))