Runs frequentist coverage testing for the difference of two binomial distribution probabilities using 95% posterior credible sets (see [2] for background).

The delta of binomial distribution probabilities was studied by Laplace in [1, p. 59]. Seeking to determine whether London had a higher boys-to-girls birth rate than Paris, Laplace modeled birth rates as binomial distributions and computed a posterior for the delta of the two probabilities using a uniform prior.

Of course, we know now that the uniform prior is arbitrary and dependent on the measurement scale. This notebook tests Laplace's original uniform prior against Jeffreys rule prior; and the modern approach of reference priors, Proposition 0.2 of [2].

### References:
[1]: Laplace, P. (1778). Mémoire sur les probabilités. Translated by Richard J. Pulskamp.

[2]: Berger, J., J. Bernardo, and D. Sun (2022). Objective bayesian inference and its 
           relationship to frequentism.

## Imports

In [1]:
from bbai.model import DeltaBinomialModel
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import math
import pandas as pd

## Set up coverage simulation

In [2]:
alpha = 0.95
low = (1 - alpha) / 2.0
high = 1.0 - low

def coverage(m, theta, x, n1, n2):
    p1 = theta + x
    p2 = x
    theta = p1 - p2
    res = 0.0
    for k1 in range(n1+1):
        prob1 = math.comb(n1, k1) * p1 ** k1 * (1 - p1)**(n1 - k1)
        for k2 in range(n2+1):
            prob = prob1 * math.comb(n2, k2) * p2 ** k2 * (1 - p2)**(n2 - k2)
            m.fit(k1, n1-k1, k2, n2-k2)
            cdf = m.cdf(theta)
            if low <= cdf and cdf <= high:
                res += prob
    return res

In [3]:
def run_simulation(params, n1, n2):
    covs = []
    for prior in ['uniform', 'jeffreys', 'reference']:
        row = [prior]
        for theta, xs in params:
            for x in xs:
                m = DeltaBinomialModel(prior=prior)
                cov = coverage(m, theta, x, n1, n2)
                row.append(cov)
        covs.append(row)
    return covs

In [4]:
def make_coverage_table(params, covs):
    cols = []
    cols.append(('', 'prior'))
    for theta, xs in params:
        for x in xs:
            cols.append(('theta={}'.format(theta), 'x={}'.format(x)))
    df = pd.DataFrame(
        covs,
        columns = pd.MultiIndex.from_tuples(cols)
    )
    df = df.set_index(df.columns[0])
    df.index.name = 'prior'
    return df

# Run Simulation

In [5]:
params = [
    (0.0, [0.0, 0.1, 0.25, 0.5]),
    (0.5, [0.0, 0.1, 0.25]),
    (0.98, [0.0, 0.005, 0.01]),
]

### $n_1=3, n_2=3$

In [6]:
covs = run_simulation(params, 3, 3)
make_coverage_table(params, covs)

Unnamed: 0_level_0,theta=0.0,theta=0.0,theta=0.0,theta=0.0,theta=0.5,theta=0.5,theta=0.5,theta=0.98,theta=0.98,theta=0.98
Unnamed: 0_level_1,x=0.0,x=0.1,x=0.25,x=0.5,x=0.0,x=0.1,x=0.25,x=0.0,x=0.005,x=0.01
prior,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
uniform,1.0,0.998542,0.986816,0.96875,0.875,0.927288,0.949219,0.0,0.0,0.0
jeffreys,1.0,0.998542,0.986816,0.96875,0.875,0.927288,0.949219,0.941192,0.941408,0.94148
reference,1.0,0.998542,0.986816,0.96875,1.0,0.97416,0.962402,0.941192,0.941408,0.94148


### $n_1=10, n_2=3$

In [7]:
covs = run_simulation(params, 10, 3)
make_coverage_table(params, covs)

Unnamed: 0_level_0,theta=0.0,theta=0.0,theta=0.0,theta=0.0,theta=0.5,theta=0.5,theta=0.5,theta=0.98,theta=0.98,theta=0.98
Unnamed: 0_level_1,x=0.0,x=0.1,x=0.25,x=0.5,x=0.0,x=0.1,x=0.25,x=0.0,x=0.005,x=0.01
prior,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
uniform,1.0,0.989656,0.983693,0.985596,0.945312,0.932969,0.945239,0.0,0.0,0.0
jeffreys,1.0,0.989592,0.978478,0.956299,0.945312,0.960052,0.951203,0.817073,0.846899,0.877521
reference,1.0,0.894403,0.928312,0.948242,0.944336,0.962462,0.95179,0.983822,0.975868,0.96616


### $n_1=10, n_2=10$

In [8]:
covs = run_simulation(params, 10, 10)
make_coverage_table(params, covs)

Unnamed: 0_level_0,theta=0.0,theta=0.0,theta=0.0,theta=0.0,theta=0.5,theta=0.5,theta=0.5,theta=0.98,theta=0.98,theta=0.98
Unnamed: 0_level_1,x=0.0,x=0.1,x=0.25,x=0.5,x=0.0,x=0.1,x=0.25,x=0.0,x=0.005,x=0.01
prior,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
uniform,1.0,0.99096,0.96516,0.957809,0.944336,0.947102,0.9547,0.0,0.0,0.0
jeffreys,1.0,0.949782,0.915044,0.947739,0.93457,0.947157,0.934719,0.817073,0.817698,0.817907
reference,1.0,0.949782,0.915044,0.947739,0.978516,0.950859,0.934762,0.983822,0.983311,0.983141
