In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import read_csv, concat

## Section 1: Descriptive results

In [2]:
## Load data.
data = read_csv(os.path.join('data', 'ccnp.csv'))

### 1.1 Failure rate

In [3]:
from statsmodels.stats.proportion import proportions_ztest

## Perform proportions test.
zval, pval = proportions_ztest([6, 10], [20, 45])
print('z = %0.3f, p = %0.3f' %(zval, pval))

## Summarize failure rate. 
gb = data.groupby('group').total.agg(['sum','count','mean']).round(3)
gb.columns = ['K','N','%']
gb

z = 0.672, p = 0.502


Unnamed: 0_level_0,K,N,%
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
healthy,6,20,0.3
patient,10,45,0.222


### 1.2 Failure rate by item

In [4]:
data.filter(regex='infreq').mean().round(3)

infreq_01    0.000
infreq_02    0.121
infreq_03    0.197
infreq_04    0.000
infreq_05    0.000
infreq_06    0.000
infreq_07    0.000
infreq_08    0.188
dtype: float64

### 1.2 Bayes factors

Identify difference in proportions equivalent to [small effect size](https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/PASS/Tests_for_Two_Proportions_using_Effect_Size.pdf) ($h \geq 0.2$).


In [5]:
## Define base / offset probability.
p = gb['%'].mean()
o = 0.05

## Compute effect size
h = 2 * (np.arcsin(np.sqrt(p + o)) - np.arcsin(np.sqrt(p - o)))
print('h = %0.3f' %h)

h = 0.229


Compute Bayes factors

In [6]:
from scipy.stats import binom

## Compute model (log-)likelihood.
m0 = binom(45, p).logpmf(10) + binom(20, p).logpmf(6)
m1 = binom(45, p + o).logpmf(10) + binom(20, p - o).logpmf(6)
m2 = binom(45, p - o).logpmf(10) + binom(20, p + o).logpmf(6)

## Compute Bayes factors.
print('BF(m1 / m0) = %0.3f' %np.exp(m1 - m0))
print('BF(m2 / m0) = %0.3f' %np.exp(m2 - m0))
print('BF(m2 / m1) = %0.3f' %np.exp(m2 - m1))

BF(m1 / m0) = 0.347
BF(m2 / m0) = 1.266
BF(m2 / m1) = 3.648
