In [1]:
import numpy as np

In [2]:
# A simple experimental data generating process
def gen_data(n, d, p, delta, base
    D = np.random.binomial(1, p, size=(n,))
    X = np.random.normal(0, 1, size=(n, d))
    y0 = base - X[:, 0] + np.random.normal(0, 1, size=(n,))
    y1 = delta + base + X[:, 0] + np.random.normal(0, 1, size=(n,))
    y = y1 * D + y0 * (1 - D)
    return y, D, X

In [3]:
n = 1000 # n samples
delta = 1.0 # treatment effect
base = .3 # baseline outcome

In [4]:
# Simple two means estimate and calcualtion of variance
def twomeans(y, D):
    hat0 = np.mean(y[D==0]) # mean of outcome of un-treated
    hat1 = np.mean(y[D==1]) # mean of outcome of treated
    V0 = np.var(y[D==0]) / np.mean(1 - D) # asymptotic variance of the mean of outcome of untreated
    V1 = np.var(y[D==1]) / np.mean(D) # asymptotic variance of the mean of outcome of treated
    return hat0, hat1, V0, V1

In [5]:
y, D, X = gen_data(n, 2, .2, delta, base) # generate RCT data
hat0, hat1, V0, V1 = twomeans(y, D) # calculate estimation quantities

In [6]:
hat = hat1 - hat0 # estimate of effect
hat

0.8388466359701585

In [7]:
stderr = np.sqrt((V0 + V1) / n) # standard error of estimate of effect
stderr

0.10802464049926344

In [8]:
ci = [hat - 1.96 * stderr, hat + 1.96 * stderr] # 95% confidence interval for effect
ci

[0.6271183405916022, 1.050574931348715]

In [9]:
# Let's measure coverage: how many times among 100 iterations
# of the experiment, does our 95% confidence interval contain
# the true parameter. It should be 95% of the times
cov = []
for _ in range(100):
    y, D, X = gen_data(n, 2, .2, delta, base)
    hat0, hat1, V0, V1 = twomeans(y, D)
    hat = hat1 - hat0
    stderr = np.sqrt((V0 + V1) / n)
    ci = [hat - 1.96 * stderr, hat + 1.96 * stderr]
    cov += [(ci[0] <= delta) & (delta <= ci[1])] # 1 if CI contains the true parameter

In [10]:
np.mean(cov)

0.94

In [11]:
# Let's estimate Relative Effects RE
y, D, X = gen_data(n, 2, .2, delta, base)
hat0, hat1, V0, V1 = twomeans(y, D)

In [12]:
true_re = delta / base # RE = effect / baseline
true_re

3.3333333333333335

In [13]:
hat = hat1 / hat0 - 1 # estimate of RE
hat

2.734126139580923

In [14]:
# standard error of RE based on the delta method
stderr = np.sqrt( (V0 * hat1**2 / hat0**4 + V1 / hat0**2) / n )
stderr

0.7009732747035597

In [15]:
# Let's calculate coverage of the 95% CI for the RE
cov = []
true_re = delta / base
for _ in range(100):
    y, D, X = gen_data(n, 2, .2, delta, base)
    hat0, hat1, V0, V1 = twomeans(y, D)
    hat = hat1 / hat0 - 1
    stderr = np.sqrt( (V0 * hat1**2 / hat0**4 + V1 / hat0**2) / n )
    ci = [hat - 1.96 * stderr, hat + 1.96 * stderr]
    cov += [(ci[0] <= true_re) & (true_re <= ci[1])]

In [16]:
np.mean(cov)

0.97

In [17]:
# Vaccine trial confidence intervals from contigency table
N0cases = 169
N0 = 20172
N1cases = 9
N1 = 19965

In [18]:
hat0 = N0cases / N0 # estimate of baseline mean outcome
hat1 = N1cases / N1 # estimate of treatment mean outcome
n = N0 + N1 # overall samples 
V0 = hat0 * (1 - hat0) / (N0 / n) # asymptotic variance of baseline mean outcome
V1 = hat1 * (1 - hat1) / (N1 / n) # asymptotic variance of treatment mean outcome

In [19]:
hat = 1 - hat1 / hat0 # VE = -RE
hat

0.9461934124362605

In [20]:
# standard error of VE based on the delta method
stderr = np.sqrt( (V0 * hat1**2 / hat0**4 + V1 / hat0**2) / n )
stderr

0.018399068760881994

In [21]:
ci = [hat - 1.96 * stderr, hat + 1.96 * stderr]
ci

[0.9101312376649319, 0.9822555872075892]