Compute frequentist coverages for Bayesian logistic regression with a single variable using Jeffreys prior.

For more background on frequentist coverage matching, see

Berger, J., J. Bernardo, and D. Sun (2022). Objective bayesian inference and its relationship to frequentism.

## Compute frequentist coverage

In [1]:
from bbai.glm import BayesianLogisticRegression1
import numpy as np
import pandas as pd
from scipy.special import expit

In [2]:
alpha = 0.95
low = (1 - alpha) / 2
high = 1 - low

def compute_coverage(x, w_true):
    n = len(x)
    res = 0
    for targets in range(1 << n):
        y = np.zeros(n)
        prob = 1.0
        for i in range(n):
            y[i] = (targets & (1 << i)) != 0
            mult = 2 * y[i] - 1.0
            prob *= expit(mult * x[i] * w_true)
        model = BayesianLogisticRegression1()
        model.fit(x, y)
        t = model.cdf(w_true)
        if low < t and t < high:
            res += prob
    return res

## Compute coverages for a range of different values

In [3]:
N = 20
np.random.seed(0)
nx = [1, 3, 5, 10]
wx = [0.0, 0.5, 1.0, 2.0, 3.0, 5.0]
table = []
for n in nx:
    row = []
    for w in wx:
        cov = 0
        for _ in range(N):
            x = np.random.uniform(-1, 1, size=n)
            cov += compute_coverage(x, w)
        cov /= N
        row.append(cov)
    table.append(row)

In [4]:
pd.DataFrame(
    table,
    index=['n={}'.format(n) for n in nx],
    columns = ['w={}'.format(w) for w in wx],
)

Unnamed: 0,w=0.0,w=0.5,w=1.0,w=2.0,w=3.0,w=5.0
n=1,1.0,1.0,1.0,0.980229,0.966392,0.952218
n=3,1.0,0.919042,0.939453,0.963553,0.961761,0.969956
n=5,0.896875,0.901532,0.928691,0.969868,0.964223,0.968547
n=10,0.941309,0.939282,0.936445,0.917138,0.943845,0.971962
