
For populations $i$ we form the following contingency table and marginals.

|       | $T_+$ | $T_-$ |       |
|-------|-------|-------|-------|
| $T_-$ | $a_i$ | $b_i$ | $g_i$ |
| $T_-$ | $c_i$ | $d_i$ | $h_i$ |
|       | $e_i$ | $f_i$ | $n_i$ |

Then we have the following closed form formulas:


$$F=\pm \sqrt{(\frac{g_1e_2 - g_2 e_1}{n_1n_2} + \frac{a_1}{n_1} - \frac{a_2}{n_2})^2  -4 (\frac{g_1}{n_1} - \frac{g_2}{n_2})\frac{a_1e_2 - a_2 e_1}{n_1n_2 }}$$

$$\hat{\theta_1}= \frac{1}{2} -  \left( \frac{g_1}{n_1}(\frac{e_1}{n_1} - \frac{e_2}{n_2}) + \frac{g_1}{n_1}(\frac{g_1}{n_1} - \frac{g_2}{n_2}) + \frac{a_2}{n_2} - \frac{a_1}{n_1} \right)\frac{1}{2F}
$$


$$\hat{\theta_2}= \frac{1}{2} -  \left( \frac{g_2}{n_2}(\frac{e_1}{n_1} - \frac{e_2}{n_2}) + \frac{g_2}{n_2}(\frac{g_1}{n_1} - \frac{g_2}{n_2}) + \frac{a_2}{n_2} - \frac{a_1}{n_1} \right) \frac{1}{2F}
$$

$$\hat{se_1 }= \frac{\frac{g_1e_2 - e_1g_2}{n_1n_2} + \frac{a_2}{n_2} - \frac{a_1}{n_1} + F }
{2(\frac{e_2}{n_2} - \frac{e_1}{n_1})} $$

$$\hat{se_2}= \frac{\frac{g_2e_1 - e_2g_1}{n_1n_2} + \frac{a_2}{n_2} - \frac{a_1}{n_1} + F }
{2(\frac{g_2}{n_2} - \frac{g_1}{n_1})} $$

$$\hat{sp_1 }= \frac{\frac{f_1h_2 - h_1f_2}{n_1n_2} + \frac{d_1}{n_1} - \frac{d_2}{n_2} + F }
{2(\frac{e_2}{n_2} - \frac{e_1}{n_1})}$$

$$\hat{sp_2 }= \frac{\frac{f_2h_1 - h_2f_1}{n_1n_2} + \frac{d_1}{n_1} - \frac{d_2}{n_2} + F }
{2(\frac{g_2}{n_2} - \frac{g_1}{n_1})}$$

In [127]:
def get_marginals(table):
    output = []
    if t.shape != (2, 2, 2):
        raise Exception("More than two pops or tests, not implemented.")
    for i in range(2):
        colsums = table[:,:,i].sum(axis=1)
        rowsums = table[:,:,i].sum(axis=0)
        n = np.concatenate(t[:,:,i]).sum()
        flattened = t[:,:,i].ravel()
        # a, b, c, d, e, f, g, h, n
        output.append(list(flattened) + list(colsums) + list(rowsums) + [n])
    return output

def get_beta1(a1, a2, e1, e2, g1, g2, n1, n2, F):
    """
    false negative rate and sensitivity
    """
    numerator = (g1*e2 - e1*g2)/(n1*n2) + a2/n2 - a1/n1 + F
    denominator = 2 * (e2/n2 - e1/n1)
    return numerator/denominator

def get_beta2(a1, a2, e1, e2, g1, g2, n1, n2, F):
    numerator = (g2*e1 - e2* g1)/(n1*n2) + a2/n2 - a1/n1 + F
    denominator = 2 * (g2/n2 - g1/n1)
    return numerator/denominator


def get_alpha1(d1, d2, e1, e2, f1, f2, g1, g2, h1, h2, n1, n2, F):
    """
    false positive rate and specificity
    """
    numerator = (f1*h2-h1*f2)/(n1*n2) + d1/n1 - d2/n2 + F
    denominator = 2 * (e2/n2 - e1/n1)
    return numerator/denominator

def get_alpha2(d1, d2, e1, e2, f1, f2, g1, g2, h1, h2, n1, n2, F):
    numerator = (f2*h1-h2*f1)/(n1*n2) + d1/n1 - d2/n2 + F
    denominator = 2 * (g2/n2 - g1/n1)
    return numerator/denominator

def get_F(a1, a2, e1, e2, g1, g2, n1, n2):
    b = (g1 * e2 - g2 * e1)/ (n1*n2) + a1/n2 - a2/n2
    a = (g1/n1 - g2/n2)
    c = (a1 * e2 - a2 * e1) / (n1 * n2)
    return np.sqrt(b**2 - 4 * a * c)

def get_theta(x, a1, a2, e1, e2, g1, g2, F):
    a = (e1/n1 - e2/n2)
    b = (g1/n1 - g2/n2)
    c = a2/n2 - a1/n1
    return 1/2 - (x * a + x * b + c)/(2*F)
    
def get_metrics(table):
    pop1, pop2 = get_marginals(table)
    # a, b, c, d, e, f, g, h, n
    a1, b1, c1, d1, e1, f1, g1, h1, n1 = pop1
    a2, b2, c2, d2, e2, f2, g2, h2, n2 = pop2
    # a1, b1, c1, d1, g1, h1, e1, f1, n1 = pop1
    # a2, b2, c2, d2, g2, h2, e2, f2, n2 = pop2
    F = get_F(a1, a2, e1, e2, g1, g2, n1, n2)
    theta1 = get_theta(g1/n1, a1, a2, e1, e2, g1, g2, F)
    theta2 = get_theta(g2/n2, a1, a2, e1, e2, g1, g2, F)
    beta1 = get_beta1(a1, a2, e1, e2, g1, g2, n1, n2, F)
    beta2 = get_beta2(a1, a2, e1, e2, g1, g2, n1, n2, F)
    alpha1 = get_alpha1(d1, d2, e1, e2, f1, f2, g1, g2, h1, h2, n1, n2, F)
    alpha2 = get_alpha2(d1, d2, e1, e2, f1, f2, g1, g2, h1, h2, n1, n2, F)
    #print(a1, b1, c1, d1, e1, f1, g1, h1, n1)
    print(F, theta1, theta2, alpha1, alpha2, beta1, beta2)
    return theta1, theta2, alpha1, alpha2, beta1, beta2
    

print(get_metrics(t))

0.6518148886704512 0.03818147985466752 0.7167927000475725 1.004885228092351 1.0046289931052235 0.9802128079919155 0.9737580671838849
(0.03818147985466752, 0.7167927000475725, 1.004885228092351, 1.0046289931052235, 0.9802128079919155, 0.9737580671838849)


# 
False positive = $\alpha$ $\rightarrow 1 - \alpha = \text{specificity}$ 

Two sets of solutions are provided by these equations (depending on the sign ofF),
only  one  of  which  gives  reasonable  estimates,  assuming  that  $se + sp > 1$

In [128]:
# Alpha 1
pop1, pop2 = get_marginals(t)
# a, b, c, d, e, f, g, h, n
a1, b1, c1, d1, e1, f1, g1, h1, n1 = pop1
a2, b2, c2, d2, e2, f2, g2, h2, n2 = pop2
F = get_F(a1, a2, e1, e2, g1, g2, n1, n2)
numerator = (f1*h2-h1*f2)/(n1*n2) + a1/n1 - a2/n2 + F
denominator = 2 * (e2/n2 - e1/n1)
print(numerator/denominator)

0.0014556161997043241


In [139]:
# Alpha 2
pop1, pop2 = get_marginals(t)
# a, b, c, d, e, f, g, h, n
a1, b1, c1, d1, e1, f1, g1, h1, n1 = pop1
a2, b2, c2, d2, e2, f2, g2, h2, n2 = pop2
F = get_F(a1, a2, e1, e2, g1, g2, n1, n2)
numerator = (f2*h1-h2*f1)/(n1*n2) + a1/n1 - a2/n2 + F
denominator = 2 * (g2/n2 - g1/n1)
print(numerator/denominator)

0.008035240782781596


In [133]:
# Beta 1
pop1, pop2 = get_marginals(t)
# a, b, c, d, e, f, g, h, n
a1, b1, c1, d1, e1, f1, g1, h1, n1 = pop1
a2, b2, c2, d2, e2, f2, g2, h2, n2 = pop2
F = get_F(a1, a2, e1, e2, g1, g2, n1, n2)
numerator = (g1*e2-e1*g2)/(n1*n2) + d1/n1 - d2/n2 - F
denominator = 2 * (e2/n2 - e1/n1)
print(numerator/denominator)

0.022992320213212656


In [136]:
# Beta 2
pop1, pop2 = get_marginals(t)
# a, b, c, d, e, f, g, h, n
a1, b1, c1, d1, e1, f1, g1, h1, n1 = pop1
a2, b2, c2, d2, e2, f2, g2, h2, n2 = pop2
F = get_F(a1, a2, e1, e2, g1, g2, n1, n2)
numerator = (g2*e1 - e2* g1)/(n1*n2)+ d1/n1 - d2/n2 - F
denominator = 2 * (e2/n2 - e1/n1)
print(numerator/denominator + 0.0069)

0.030116803900731376


In [115]:
(g2 * e1 - e2 * g1)/(n1*n2), d1/n1 - d2/n2 + F, d1/n1, e2*g1, g2*e1

(0.00014853527712831797, 1.3342052063709051, 0.96, 16524.0, 16632.0)

In [137]:
def beta1(d1, d2, e1, e2, g1, g2, n1, n2, F):
    numerator = (g1*e2-e1*g2)/(n1*n2) + d1/n1 - d2/n2 - F
    denominator = 2 * (e2/n2 - e1/n1)
    return numerator/denominator

def beta2(d1, d2, e1, e2, g1, g2, n1, n2, F):
    numerator = (g2*e1 - e2* g1)/(n1*n2)+ d1/n1 - d2/n2 - F
    denominator = 2 * (e2/n2 - e1/n1)
    return numerator/denominator


def alpha1(a1, a2, e1, e2, f1, f2, h1, h2, n1, n2, F):
    numerator = (f1 * h2 - h1 * f2)/(n1 * n2) + a1/n1 - a2/n2 + F
    denominator = 2 * (e2/n2 - e1/n1)
    return numerator/denominator

def alpha2(a1, a2, e1, e2, f1, f2, h1, h2, n1, n2, F):
    numerator = (f2*h1-h2*f1)/(n1*n2) + a1/n1 - a2/n2 + F
    denominator = 2 * (e2/n2 - e1/n1)
    return numerator/denominator

print(alpha1(a1, a2, e1, e2, f1, f2, n1, n2, F), alpha2(a1, a2, e1, e2, f1, f2, n1, n2, F))
print(beta1(d1, d2, e1, e2, g1, g2, n1, n2, F), beta2(d1, d2, e1, e2, g1, g2, n1, n2, F))

0.0014556161997043241 0.008090356297479413
0.022992320213212656 0.023216803900731376


In [138]:
from tensorly.decomposition import non_negative_parafac
import numpy as np

n1 = 14 + 4 + 4 + 528
n2 = 887 + 31 + 37 + 367
n = n1 + n2
t = np.zeros((2,2,2))
t[0,0,0] = 14
t[0,1,0] = 4
t[1,0,0] = 4
t[1,1,0] = 528
t[0,0,1] = 887
t[0,1,1] = 31
t[1,0,1] = 37
t[1,1,1] = 367
weights, factors = non_negative_parafac(t, rank=2)#, normalize_factors=True)
print(factors, len(factors))

[array([[0.00019562, 0.0159695 ],
       [0.02906111, 0.00062292]]), array([[  0.78650237, 235.26755262],
       [119.88905038,   7.56921764]]), array([[151.540153  ,   3.71994295],
       [105.01591698, 236.08178926]])] 3


In [50]:
t.shape

(2, 2, 2)

In [49]:
t[:,:,0], t[:,:,0].sum(axis=1), t[:,:,0].sum(axis=1), np.concatenate(t[:,:,0]).sum()

(array([[ 14.,   4.],
        [  4., 528.]]),
 array([ 18., 532.]),
 array([ 18., 532.]),
 550.0)

In [58]:
list(t[:,:,0].ravel()) + list(t[:,:,0].sum(axis=1))

[14.0, 4.0, 4.0, 528.0, 18.0, 532.0]

In [None]:
Population One  & Model One & 14 & 4 \\ 
                & Model Two & 9 &  528 \\ 
Population Two  & Model One & 887 & 31 \\ 
                & Model Two & 37 & 367 \\\bottomrule 
Model       TPR  TNR   FPR   FNR  Prior
Model One 0.798 0.984 0.016 0.202 0.627
Model Two 0.895 0.955 0.045 0.105 0.627

In [5]:
887 / n2, 14 / n1

(0.6709531013615734, 0.025454545454545455)

In [34]:
# Right numbers, but why?

(887 + 31 + 37) / n2, (14 + 4 + 9) / ( 2 * (n2 +  n1))

(0.7223903177004538, 0.007211538461538462)

In [18]:
# Right numbers, but why?

(31 + 37) / (2 * (n1 + n2)), (4 + 9) / (n1 + n2)

(0.018162393162393164, 0.006944444444444444)

In [35]:
p = (887 + 31 + 37) / n2
q = (887 - 31) / n2

d = (14 + 4 + 9 + 887 + 31 + 37 ) / (n2 +  n1)
print(p * q)
print(d)

0.4677504628983271
0.5245726495726496


In [32]:
n2 / n1

2.403636363636364

In [None]:
from patsy import dmatrices
import statsmodels.api as sm

dept_levels = ['F', 'A', 'B', 'C', 'D', 'E']

y, X = dmatrices('n ~ admission + sex + C(department, levels=dept_levels)', df, return_type='dataframe')
sm.GLM(y, X, family=sm.families.Poisson()).fit().summary()

In [6]:
from scipy.stats.contingency import association

T = np.zeros((2,2,2))
t[0,0,0] = 14
t[0,1,0] = 4
t[1,0,0] = 4
t[1,1,0] = 528
t[0,0,1] = 887
t[0,1,1] = 31
t[1,0,1] = 37
t[1,1,1] = 367
association(T, method="cramer")

ValueError: `observed` must be an integer array.