In [1]:
import numpy as np
import scipy as sp 
import pandas as pd
import time
from scipy import stats as sps
from scipy import optimize
from scipy import special
from sklearn.linear_model import LogisticRegression
from IPython.display import display, Markdown

In [2]:
from datetime import datetime

## Example data from Chapter 2 of "What If?"

In [3]:
ch2data = {
    'L': [0]*8 + [1]*12,
    'A': [0]*4 + [1]*4 + [0]*3 + [1]*9,
    'Y': [0,1] + [0]*5 + [1]*3 + [0] + [1]*6 + [0]*3,
}
ch2names = ['Rheia', 'Kronos', 'Demeter', 'Hades', 'Hestia', 'Poseidon', 'Hera', 
            'Zeus', 'Artemis', 'Apollo', 'Leto', 'Ares', 'Athena', 'Hephaestus', 
            'Aphrodite', 'Cyclope', 'Persephone', 'Hermes', 'Hebe', 'Dionysus']
pd.DataFrame(ch2data, index=ch2names)

Unnamed: 0,L,A,Y
Rheia,0,0,0
Kronos,0,0,1
Demeter,0,0,0
Hades,0,0,0
Hestia,0,1,0
Poseidon,0,1,0
Hera,0,1,0
Zeus,0,1,1
Artemis,1,0,1
Apollo,1,0,1


## Basic function definitions

A utility function for generating all binary vectors of a given length.

In [4]:
def all_binary_vectors(length):
    result = [[]]
    while len(result[0]) < length:
        result = [ vector + [val] for vector in result for val in [False, True] ]
    return result

In [5]:
pd.DataFrame(all_binary_vectors(3))

Unnamed: 0,0,1,2
0,False,False,False
1,False,False,True
2,False,True,False
3,False,True,True
4,True,False,False
5,True,False,True
6,True,True,False
7,True,True,True


Calculate the standardized means for each level of A for a given dataset

In [6]:
def fit_phat_Y_given_L_A(L, A, Y):
    A = np.array(A)
    Y = np.array(Y)
    L = np.reshape(np.array(L), (len(Y), -1)) # force L to be a 2D array
    M = L.shape[1]
    A_levels = [False, True]
    L_levels = all_binary_vectors(M)
    phat_Y_given_L_A_table = np.empty((len(L_levels), len(A_levels)))
    
    for i,l in enumerate(L_levels):
        mask_l = (np.product(L == l, axis=1) != 0)
        for j,a in enumerate(A_levels):
            phat_Y_given_L_A_table[i,j] = np.mean(Y[mask_l * (A == a)])
    
    def phat_Y_given_L_A(L, A):
        A = np.array(A)
        L = np.reshape(np.array(L), (-1, M)) # force L to be a 2D array
        result = np.zeros(L.shape[0])
        for i,l in enumerate(L_levels):
            #print(L, l, )#np.product(L == l, axis=1) != 0)
            mask_l = (np.product(L == l, axis=1) != 0)
            for j,a in enumerate(A_levels):
                result[mask_l * (A == a)] = phat_Y_given_L_A_table[i,j]
        return result
    
    return phat_Y_given_L_A

In [7]:
def fit_phat_L(L):
    L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
    M = L.shape[1]
    L_levels = all_binary_vectors(M)
    phat_L_table = np.zeros((len(L_levels),))

    for i,l in enumerate(L_levels):
        mask_l = (np.product(L == l, axis=1) != 0)
        phat_L_table[i] = np.mean(mask_l)
        
    def phat_L(L):
        L = np.reshape(np.array(L), (-1,M)) # force L to be a 2D array
        result = np.zeros((L.shape[0],))
        for i,l in enumerate(L_levels):
            mask_l = (np.product(L == l, axis=1) != 0)
            result += mask_l * phat_L_table[i]
        return result
        
    return phat_L

In [8]:
def standardized_means_for_A(L, A, Y):
    A = np.array(A)
    A_levels = [False, True]
    L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
    L_levels = all_binary_vectors(L.shape[1])
    Y_mean = np.zeros((len(A_levels),))

    phat_L = fit_phat_L(L)
    phat_Y_given_L_A = fit_phat_Y_given_L_A(L, A, Y)
    
    for i,a in enumerate(A_levels):
        for l in L_levels:
            Y_mean[i] += (phat_Y_given_L_A(l, a) * phat_L(l))[0]

    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [9]:
pd.DataFrame(standardized_means_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


In [10]:
def outcome_regression_for_A(L, A, Y):
    A = np.array(A)
    A_levels = [False, True]
    L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
    Y_mean = np.zeros((len(A_levels),))

    phat_Y_given_L_A = fit_phat_Y_given_L_A(L, A, Y)
    
    for i,a in enumerate(A_levels):
        Y_mean[i] += np.mean(phat_Y_given_L_A(L, a))

    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [11]:
pd.DataFrame(outcome_regression_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


Calculate the inverse probability weighted means for each level of A for a given dataset

In [12]:
# Separately tabulate the freqency of A for each possible level of L
# (note: vectorized with respect to samples, so fast but a little ugly)
def fit_phat_A_given_L(L, A):
    A = np.array(A)
    L = np.reshape(np.array(L), (len(A), -1)) # force L to be a 2D array
    A_levels = [False, True]
    L_levels = all_binary_vectors(L.shape[1])
    phat_A_given_L_table = np.zeros((len(L_levels), len(A_levels)))

    for i,l in enumerate(L_levels):
        mask_l = (np.product(L == l, axis=1) != 0)
        P_l = np.mean(mask_l)
        for j,a in enumerate(A_levels):
            phat_A_given_L_table[i,j] = np.mean(mask_l * (A == a))/P_l
            
    def phat_A_given_L(L):
        L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
        result = np.zeros((L.shape[0], len(A_levels)))
        for i,l in enumerate(L_levels):
            mask_l = (np.product(L == l, axis=1) != 0)
            for j,a in enumerate(A_levels):
                result[:,j] += mask_l * phat_A_given_L_table[i,j]
        return result
        
    return phat_A_given_L

In [13]:
def ip_weighted_means_for_A(L, A, Y):
    A = np.array(A)
    A_levels = [False, True]
    phat_A_given_L = fit_phat_A_given_L(L, A)
    Y_mean = [np.mean(Y * (A == a) / phat_A_given_L(L)[:,i]) for i,a in enumerate(A_levels)]

    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [14]:
pd.DataFrame(ip_weighted_means_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


Calculate the double-robust means for each level of A for a given dataset

In [15]:
def doubly_robust_means_for_A(L, A, Y):
    A = np.array(A)
    A_levels = [False, True]
    Y = np.array(Y)
    
    phat_A_given_L = fit_phat_A_given_L(L, A)
    phat_Y_given_L_A = fit_phat_Y_given_L_A(L, A, Y)
    f = phat_A_given_L(L)
    
    Y_mean = np.zeros((len(A_levels),))
    for i,a in enumerate(A_levels):
        q = phat_Y_given_L_A(L, a)
        Y_mean[i] = np.mean(q + (A == a) / f[:,i] * (Y - q))

    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [16]:
pd.DataFrame(doubly_robust_means_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


Calculate the means for each level of A using a marginal structural model (in this case, inverse probability of treatment weighting using logistic marginal models)

In [17]:
# Use a logistic marginal model for L
def fit_marginal_phat_A_given_L(L, A):
    L = np.reshape(np.array(L), (len(A), -1)) # force L to be a 2D array
    A = np.array(A)
    
    model = LogisticRegression(solver='liblinear').fit(L, A)
    
    def phat_A_given_L(L):
        L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
        return model.predict_proba(L)
        
    return phat_A_given_L

In [18]:
def fit_marginal_phat_P_Y_given_L_A(L, A, Y):
    L = np.reshape(np.array(L), (len(A), -1)) # force L to be a 2D array
    A = np.array(A)

    marginal_phat_A_given_L = fit_marginal_phat_A_given_L(L, A)
    weights = A/marginal_phat_A_given_L(L)[:,1] + (1-A)/marginal_phat_A_given_L(L)[:,0]
    X = A[:,np.newaxis]
    model = LogisticRegression(solver='liblinear').fit(X, Y, weights)

    def phat_Y_given_L_A(L, A):
        A = np.array(A)
        L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array        
        X = A[:,np.newaxis]
        return model.predict_proba(X)
        
    return phat_Y_given_L_A   

In [19]:
def marginal_structural_model_for_A(L, A, Y):
    L = np.reshape(np.array(L), (len(A), -1)) # force L to be a 2D array
    A = np.array(A)
    Y = np.array(Y)

    marginal_phat_P_Y_given_L_A = fit_marginal_phat_P_Y_given_L_A(L, A, Y)
    
    A_levels = [False, True]
    Y_mean = [np.mean(marginal_phat_P_Y_given_L_A(L, np.ones_like(A)*a)[:,1]) for a in A_levels]
    
    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [20]:
pd.DataFrame(marginal_structural_model_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.480048
1,True,0.510726


#### Calculate the log of the odds ratio using g-estimation

For a given link function $g(x)$, assume a linear effect of an intervention to create a structurally mean model (SMM):

$$g(\mathbb{E}(Y^a|L=l,A=a)) - g(\mathbb{E}(Y^{a=0}|L=l,A=a)) = \Psi_0 a + \Psi_L al$$

$$U = \mathbb{E}(Y^{a=0}|L=l,A=a) = g^{-1}(g(\mathbb{E}(Y^a|L=l,A=a)) - \Psi_0 a - \Psi_L al)$$


We first define a structural mean model (SMM) with link function  $g(x) = \mathrm{logit}(x)$ parameterized by $\Psi_0$ and $\Psi_L$:

$$\mathrm{logit}\left(\mathbb{E}\left(Y^a|L=l,A=a\right)\right) 
    - \mathrm{logit}\left(\mathbb{E}\left(Y^0|L=l,A=a\right)\right)
    = \gamma(a, l; \Psi_0, \Psi_L),$$
    
where

$$\gamma(a, l; \Psi_0, \Psi_L) = \Psi_0 a + \Psi_L al. $$
    
We can then construct a variable $U(\Psi_0, \Psi_L)$ such that

$$U(\Psi_0, \Psi_L) = \mathrm{expit}\left(\mathrm{logit}\left(\mathbb{E}\left(Y^a|L,A\right)\right) - \Psi_0 A - \Psi_L LA\right)$$

such that

$$\mathbb{E}\left(U\right | L,A)= \mathbb{E}\left(Y^{a=0} | L,A\right).$$

Because we assume $A \perp\!\!\perp Y^{a=0} | L$, we then find

$$\mathbb{E}\left(U | L,A \right)
    = \mathbb{E}\left(Y^{a=0} | L,A\right)
    = \mathbb{E}\left(Y^{a=0} | L\right)
    = \mathbb{E}\left(U | L \right)
.$$

More generally for an arbitrary function $f(A,L)$, $f(A,L) \perp\!\!\perp U | L$.  Thus 

$$0 = cov(f(A,L), U)
    = \mathbb{E}\left(\sum_i (f(A, L = L_i) - \mathbb{E}(f(A,L_i) | L = L_i)) \cdot
        (U_i - \mathbb{E}(U | L = L_i))\right)
.$$

We can then use this equality with the family of functions

$$f_i(A,L) = \frac{\partial U}{\partial \psi_i}$$

to provide a sufficient set of constraints to solve for $\psi$. 

Note: This function appears to work as long as the magnitude of the various effect sizes ($|\beta|$) is approximately $1$ or less, but usually generates erroneously large effect sizes if $|\beta|$ is greater than that. Maybe this is due to the vanishing derivatives of $\mathrm{expit}(x)$ function when $|x| >> 1$? 

In [21]:
def g_estimation_log_odds_for_A(L, A, Y):
    L = np.reshape(np.array(L), (len(A), -1)) # force L to be a 2D array
    A = np.array(A)
    a1 = np.ones_like(A)
    a0 = np.zeros_like(A)
    Y = np.array(Y)

    L_augmented = np.hstack([L, np.ones((L.shape[0],1))])
    phat_A_given_L = fit_phat_A_given_L(L, A)(L)[:,1]
    phat_Y_given_L_A = fit_phat_Y_given_L_A(L, A, Y)
    logit_phat_Y_given_L_A = special.logit(phat_Y_given_L_A(L,A))
    logit_phat_Y_given_L_a1 = special.logit(phat_Y_given_L_A(L,a1))
    logit_phat_Y_given_L_a0 = special.logit(phat_Y_given_L_A(L,a0))
    
    def gamma(L, A, psi):
        L_augmented_ = np.hstack([L, np.ones((L.shape[0],1))])
        return (L_augmented_ * A[:,np.newaxis]).dot(psi)
    
    def cov_dU_U(psi):
        U = special.expit(logit_phat_Y_given_L_A - gamma(L, A, psi))
        U_given_a0 = special.expit(logit_phat_Y_given_L_a0 - gamma(L, a0, psi))
        U_given_a1 = special.expit(logit_phat_Y_given_L_a1 - gamma(L, a1, psi))
        E_U_given_L = (phat_A_given_L * U_given_a1 + 
                       (1 - phat_A_given_L) * U_given_a0)
        
        dUdPsi = L_augmented * A[:,np.newaxis] * (U*(1-U))[:,np.newaxis]
        E_dUdPsi_given_a0 = (L_augmented * a0[:,np.newaxis] *
                               (U_given_a0*(1-U_given_a0))[:,np.newaxis])
        E_dUdPsi_given_a1 = (L_augmented * a1[:,np.newaxis] *
                               (U_given_a1*(1-U_given_a1))[:,np.newaxis])
        E_dUdPsi_given_L = (phat_A_given_L[:,np.newaxis] * E_dUdPsi_given_a1 + 
                       (1 - phat_A_given_L[:,np.newaxis]) * E_dUdPsi_given_a0)
        
        #print(psi)
        return np.sum((dUdPsi - E_dUdPsi_given_L) * (U - E_U_given_L)[:,np.newaxis], axis=0)
    
    psi = optimize.root(cov_dU_U, np.zeros(L.shape[1] + 1)).x
    
    return np.mean(gamma(L, np.ones_like(A), psi))

In [22]:
g_estimation_log_odds_for_A(**ch2data)

0.0

### Utilities

A simple utility function for summarizing results for a given model

In [23]:
def summarize_model(data, title='', description=''):
    start_time = time.monotonic()
    A = data['A']
    L = np.reshape(np.array(data['L']), (len(A), -1)) # force L to be a 2D array
    Y = data['Y']
    
    display(Markdown('### ' + title))
    display(Markdown(description))
    display(Markdown('#### Sample data'))
    sample_df = pd.concat([
        pd.DataFrame(L).add_prefix("L"), 
        pd.Series(A, name='A'), 
        pd.Series(Y, name='Y')
        ], axis=1)
    display(sample_df.iloc[:12])

    display(Markdown('#### Summary statistics'))
    stats = {
        '$\\bar L$': [np.mean(L[:,i]) for i in range(L.shape[1])],
        '$\\bar A$': np.mean(A),
        '$\\bar Y$': np.mean(Y),
    }
    if 'Y_a_0' in data:
        stats = stats | {
        '$\\bar Y^c_0$': np.mean(data['Y_a_0']),
        '$\\bar Y^c_1$': np.mean(data['Y_a_1']),
        }
    display(pd.DataFrame({'statistic': stats.values()}, index=stats.keys()))
    
    model_names = []
    Y_hat_a_0s = []
    Y_hat_a_1s = []
    beta_a_hats = []
    
    if 'Y_a_0' in data:
        model_names.append('Underlying counterfactual data')
        Y_hat_a_0s.append(np.mean(data['Y_a_0']))
        Y_hat_a_1s.append(np.mean(data['Y_a_1']))
        beta_a_hats.append(special.logit(np.mean(data['Y_a_1'])) -
            special.logit(np.mean(data['Y_a_0'])))

    models = [
        ["Standardized means", standardized_means_for_A],
        ["Outcome regression", outcome_regression_for_A], 
        ["Inverse probability weighted means (IPW)", ip_weighted_means_for_A],
        ["Doubly robust means", doubly_robust_means_for_A],
        ["Marginal structural model (MSM)", marginal_structural_model_for_A]
    ]
    for model_name, model in models:
        model_names.append(model_name)
        result = model(L, A, Y)
        Y_hat_a_0s.append(result['Y_mean'][0])
        Y_hat_a_1s.append(result['Y_mean'][1])
        beta_a_hats.append(special.logit(result['Y_mean'][1]) -
            special.logit(result['Y_mean'][0]))
        
    model_names.append('g-estimation')
    Y_hat_a_0s.append('')
    Y_hat_a_1s.append('')
    beta_a_hats.append(g_estimation_log_odds_for_A(L, A, Y))

        
    display(Markdown('#### Estimators'))
    display(pd.DataFrame({
        '$\\hat Y^{a=0}$':Y_hat_a_0s, 
        '$\\hat Y^{a=1}$':Y_hat_a_1s,
        '$\\hat \\beta_a$':beta_a_hats,
    }, index=model_names))
    
    total_time = time.monotonic() - start_time
    if total_time > 1:
        display(Markdown(f'*Total execution time*: {total_time} s'))

In [24]:
summarize_model(ch2data, 'Analysis of data from chapter 2', 'Validating the summary function with data from chapter 2')

### Analysis of data from chapter 2

Validating the summary function with data from chapter 2

#### Sample data

Unnamed: 0,L0,A,Y
0,0,0,0
1,0,0,1
2,0,0,0
3,0,0,0
4,0,1,0
5,0,1,0
6,0,1,0
7,0,1,1
8,1,0,1
9,1,0,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.6]
$\bar A$,0.65
$\bar Y$,0.5


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Standardized means,0.5,0.5,0.0
Outcome regression,0.5,0.5,0.0
Inverse probability weighted means (IPW),0.5,0.5,0.0
Doubly robust means,0.5,0.5,8.881784e-16
Marginal structural model (MSM),0.480048,0.510726,0.1227615
g-estimation,,,0.0


## Testing with generated data

Start with a simple parameterized model...

In [25]:
def generate_simple_logistic_model_data(N=100000, P_L=0.5,
                                        beta_A_0=0, beta_A_L=0, 
                                        beta_Y_0=0, beta_Y_L=0, beta_Y_A=0, beta_Y_LA=0,
                                        debug_info=False):
    # reshape and broadcast values into appropriately sized arrays
    P_L = np.array(P_L).reshape((-1,)) # expand P_L to a 2d array if needed
    M = len(P_L)
    if np.isscalar(beta_A_L) == 1: # broadcast to appropriate shape if needed
        beta_A_L = np.array([beta_A_L]*M)
    else:
        beta_A_L = np.array([beta_A_L]).reshape((M,))
    if np.isscalar(beta_Y_L) == 1: # broadcast to appropriate shape if needed
        beta_Y_L = np.array([beta_Y_L]*M)
    else:
        beta_Y_L = np.array([beta_Y_L]).reshape((M,))
    if np.isscalar(beta_Y_LA) == 1: # broadcast to appropriate shape if needed
        beta_Y_LA = np.array([beta_Y_LA]*M)
    else:
        beta_Y_LA = np.array([beta_Y_LA]).reshape((M,))
    
    L = np.random.uniform(size=(N,)+np.array(P_L).shape) < P_L
    P_A_given_l = sps.logistic.cdf(beta_A_0 + L.dot(beta_A_L))
    A = np.random.uniform(size=N) < P_A_given_l
    P_Y_given_l_a_0 = sps.logistic.cdf(beta_Y_0 + L.dot(beta_Y_L) #+
                                     #A.dot(beta_Y_A) +
                                     #L.dot(beta_Y_LA)*A
                                     )
    P_Y_given_l_a_1 = sps.logistic.cdf(beta_Y_0 + L.dot(beta_Y_L) +
                                     beta_Y_A +#A.dot(beta_Y_A) +
                                     L.dot(beta_Y_LA)#L.dot(beta_Y_LA)*A
                                     )
    Y_a_0 = np.random.uniform(size=N) < P_Y_given_l_a_0
    Y_a_1 = np.random.uniform(size=N) < P_Y_given_l_a_1
    Y = Y_a_0 * (1 - A) + Y_a_1 * A
    
    return {
        'L' : L,
        'A' : A,
        'Y' : Y,
        'Y_a_0': Y_a_0,
        'Y_a_1': Y_a_1,
    } | ({
        'P_L' : P_L,
        'P_A_given_l' : P_A_given_l, 
        'P_Y_given_l_a' : P_Y_given_l_a,
    } if debug_info else {})

In [26]:
summarize_model(generate_simple_logistic_model_data(), 'Maximal entropy model', 'No interactions, 1:1 odds for L, A, and Y')

### Maximal entropy model

No interactions, 1:1 odds for L, A, and Y

#### Sample data

Unnamed: 0,L0,A,Y
0,True,False,0
1,False,False,0
2,True,True,1
3,True,False,0
4,True,True,1
5,True,False,1
6,True,True,0
7,False,False,0
8,True,False,1
9,False,True,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.49806]
$\bar A$,0.50206
$\bar Y$,0.50009
$\bar Y^c_0$,0.50093
$\bar Y^c_1$,0.49927


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.50093,0.49927,-0.00664
Standardized means,0.501265,0.498925,-0.00936
Outcome regression,0.501265,0.498925,-0.00936
Inverse probability weighted means (IPW),0.501265,0.498925,-0.00936
Doubly robust means,0.501265,0.498925,-0.00936
Marginal structural model (MSM),0.501265,0.498925,-0.009359
g-estimation,,,-0.00936


In [27]:
summarize_model(generate_simple_logistic_model_data(beta_Y_A=-1),
                'Simple randomized protective intervention', 'Assumes no effect from covariate')

### Simple randomized protective intervention

Assumes no effect from covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,False,True,1
1,True,False,1
2,False,False,0
3,False,False,0
4,False,True,0
5,False,True,0
6,False,False,1
7,False,True,0
8,False,True,0
9,True,True,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.50174]
$\bar A$,0.49955
$\bar Y$,0.38287
$\bar Y^c_0$,0.49867
$\bar Y^c_1$,0.26974


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.49867,0.26974,-0.990622
Standardized means,0.496713,0.268823,-0.987456
Outcome regression,0.496713,0.268823,-0.987456
Inverse probability weighted means (IPW),0.496713,0.268823,-0.987456
Doubly robust means,0.496713,0.268823,-0.987456
Marginal structural model (MSM),0.496703,0.268833,-0.987367
g-estimation,,,-0.987649


In [28]:
summarize_model(generate_simple_logistic_model_data(beta_Y_A=-1, beta_Y_L=1),
                'Randomized, covariate is risk, intervention is protective',
                '1:1 odds of covariate')

### Randomized, covariate is risk, intervention is protective

1:1 odds of covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,True,False,1
1,False,True,1
2,False,True,1
3,True,True,1
4,False,False,1
5,True,False,1
6,True,False,1
7,True,False,1
8,False,False,0
9,False,True,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.50104]
$\bar A$,0.49748
$\bar Y$,0.49969
$\bar Y^c_0$,0.61796
$\bar Y^c_1$,0.38504


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.61796,0.38504,-0.949108
Standardized means,0.614427,0.383795,-0.939428
Outcome regression,0.614427,0.383795,-0.939428
Inverse probability weighted means (IPW),0.614427,0.383795,-0.939428
Doubly robust means,0.614427,0.383795,-0.939428
Marginal structural model (MSM),0.614411,0.383807,-0.939312
g-estimation,,,-0.996024


In [29]:
summarize_model(generate_simple_logistic_model_data(beta_A_0=-2, beta_Y_A=-1, beta_Y_L=1),
                'Randomized, covariate is risk, rare intervention is protective',
                '1:1 odds of covariate')

### Randomized, covariate is risk, rare intervention is protective

1:1 odds of covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,False,False,1
1,True,False,1
2,False,False,1
3,True,False,1
4,True,False,1
5,False,True,0
6,False,False,0
7,True,False,1
8,False,False,1
9,False,False,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.50224]
$\bar A$,0.11841
$\bar Y$,0.58931
$\bar Y^c_0$,0.61622
$\bar Y^c_1$,0.38354


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.61622,0.38354,-0.948084
Standardized means,0.616885,0.383991,-0.94899
Outcome regression,0.616885,0.383991,-0.94899
Inverse probability weighted means (IPW),0.616885,0.383991,-0.94899
Doubly robust means,0.616885,0.383991,-0.94899
Marginal structural model (MSM),0.616865,0.384026,-0.948756
g-estimation,,,-1.009511


In [30]:
summarize_model(generate_simple_logistic_model_data(P_L=0.75, beta_A_0=-2, beta_Y_A=-1, beta_Y_L=1),
                'Randomized, common covariate is risk, rare intervention is protective',
                '1:1 odds of covariate')

### Randomized, common covariate is risk, rare intervention is protective

1:1 odds of covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,False,False,0
1,True,False,1
2,False,False,0
3,True,False,0
4,True,False,1
5,True,False,1
6,False,False,0
7,True,False,1
8,True,True,1
9,True,True,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.75195]
$\bar A$,0.11758
$\bar Y$,0.64536
$\bar Y^c_0$,0.67291
$\bar Y^c_1$,0.44685


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.67291,0.44685,-0.934782
Standardized means,0.672807,0.439337,-0.964762
Outcome regression,0.672807,0.439337,-0.964762
Inverse probability weighted means (IPW),0.672807,0.439337,-0.964762
Doubly robust means,0.672807,0.439337,-0.964762
Marginal structural model (MSM),0.672785,0.439382,-0.96448
g-estimation,,,-1.011617


In [31]:
summarize_model(generate_simple_logistic_model_data(P_L=0.75,
                                                    beta_A_0=-2, beta_A_L=1, 
                                                    beta_Y_A=-1, beta_Y_L=1),
                'Common covariate is risk for disease and intervention, intervention is protective',
                '')

### Common covariate is risk for disease and intervention, intervention is protective



#### Sample data

Unnamed: 0,L0,A,Y
0,True,False,0
1,True,True,0
2,False,False,1
3,False,False,0
4,False,False,1
5,False,True,0
6,True,True,0
7,True,False,0
8,True,False,1
9,True,True,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.7467]
$\bar A$,0.22965
$\bar Y$,0.61869
$\bar Y^c_0$,0.67067
$\bar Y^c_1$,0.44229


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.67067,0.44229,-0.94309
Standardized means,0.671421,0.443927,-0.939858
Outcome regression,0.671421,0.443927,-0.939858
Inverse probability weighted means (IPW),0.671421,0.443927,-0.939858
Doubly robust means,0.671421,0.443927,-0.939858
Marginal structural model (MSM),0.671398,0.443979,-0.939543
g-estimation,,,-0.982345


*Total execution time*: 1.009530761046335 s

In [32]:
summarize_model(generate_simple_logistic_model_data(P_L=0.5,
                                                    beta_A_0=0, 
                                                    beta_Y_A=1, beta_Y_L=1, beta_Y_LA=-2),
                'Covariate and intervention are risks, but negate eachother',
                '1:1 odds of covariate')

### Covariate and intervention are risks, but negate eachother

1:1 odds of covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,True,True,1
1,True,False,1
2,False,True,1
3,False,False,1
4,True,False,0
5,True,False,1
6,True,False,1
7,True,True,0
8,True,False,0
9,False,True,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.50101]
$\bar A$,0.50114
$\bar Y$,0.61306
$\bar Y^c_0$,0.61612
$\bar Y^c_1$,0.616


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.61612,0.616,-0.000507
Standardized means,0.612296,0.614541,0.009466
Outcome regression,0.612296,0.614541,0.009466
Inverse probability weighted means (IPW),0.612296,0.614541,0.009466
Doubly robust means,0.612296,0.614541,0.009466
Marginal structural model (MSM),0.612292,0.614541,0.009484
g-estimation,,,0.010537


*Total execution time*: 1.1407756889238954 s

## Simulations with multi-dimensional $L$

In [33]:
summarize_model(generate_simple_logistic_model_data(P_L=(0.5,0.5)), 
    'Multidimensional maximal entropy model', 'No interactions, 1:1 odds for L, A, and Y')

### Multidimensional maximal entropy model

No interactions, 1:1 odds for L, A, and Y

#### Sample data

Unnamed: 0,L0,L1,A,Y
0,False,True,True,1
1,True,False,True,1
2,True,False,False,0
3,True,True,True,0
4,True,True,True,0
5,False,True,True,0
6,True,False,False,0
7,False,True,True,1
8,True,False,True,1
9,True,False,True,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,"[0.4996, 0.50067]"
$\bar A$,0.50053
$\bar Y$,0.49801
$\bar Y^c_0$,0.49687
$\bar Y^c_1$,0.50062


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.49687,0.50062,0.015
Standardized means,0.496104,0.499924,0.015282
Outcome regression,0.496104,0.499924,0.015282
Inverse probability weighted means (IPW),0.496104,0.499924,0.015282
Doubly robust means,0.496104,0.499924,0.015282
Marginal structural model (MSM),0.496114,0.499909,0.015182
g-estimation,,,0.015282


*Total execution time*: 1.0057426958810538 s

In [34]:
summarize_model(generate_simple_logistic_model_data(
    P_L=(0.2,0.5), beta_A_0=-2, beta_A_L=(4,0), beta_Y_L=(2, 1), beta_Y_LA=(-2,2)), 
    'Treatment already targeted',
    'Treatment only benefits rare condition, commonly causes harm. Common risk factor present.')

### Treatment already targeted

Treatment only benefits rare condition, commonly causes harm. Common risk factor present.

#### Sample data

Unnamed: 0,L0,L1,A,Y
0,False,False,False,0
1,False,False,False,0
2,False,True,False,1
3,False,True,False,1
4,True,False,True,0
5,False,False,False,1
6,False,True,False,0
7,False,False,False,0
8,False,False,False,0
9,False,False,False,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,"[0.20012, 0.49972]"
$\bar A$,0.27287
$\bar Y$,0.65085
$\bar Y^c_0$,0.67449
$\bar Y^c_1$,0.72474


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.67449,0.72474,0.239533
Standardized means,0.673424,0.72448,0.243078
Outcome regression,0.673424,0.72448,0.243078
Inverse probability weighted means (IPW),0.673424,0.72448,0.243078
Doubly robust means,0.673424,0.72448,0.243078
Marginal structural model (MSM),0.673305,0.724286,0.24265
g-estimation,,,-17.814648


*Total execution time*: 5.420631361892447 s

In [35]:
summarize_model(generate_simple_logistic_model_data(
    P_L=(0.5,0.2), beta_A_0=-2, beta_A_L=(0,4), beta_Y_L=(1, 2), beta_Y_LA=(2,-2)), 
    'Treatment already targeted, swapped L values',
    'Treatment only benefits rare condition, commonly causes harm. Common risk factor present.')

### Treatment already targeted, swapped L values

Treatment only benefits rare condition, commonly causes harm. Common risk factor present.

#### Sample data

Unnamed: 0,L0,L1,A,Y
0,True,True,True,0
1,True,True,True,1
2,True,False,False,1
3,True,True,True,1
4,True,False,False,1
5,True,False,False,0
6,True,False,False,1
7,True,False,False,1
8,True,False,False,1
9,False,False,False,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,"[0.4999, 0.2015]"
$\bar A$,0.27425
$\bar Y$,0.65459
$\bar Y^c_0$,0.67767
$\bar Y^c_1$,0.72597


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.67767,0.72597,0.231187
Standardized means,0.677981,0.730735,0.253847
Outcome regression,0.677981,0.730735,0.253847
Inverse probability weighted means (IPW),0.677981,0.730735,0.253847
Doubly robust means,0.677981,0.730735,0.253847
Marginal structural model (MSM),0.677986,0.731544,0.25794
g-estimation,,,-17.621491


*Total execution time*: 4.612512594088912 s

In [36]:
summarize_model(generate_simple_logistic_model_data(
    P_L=(0.2,0.5), beta_A_0=-2/4, beta_A_L=(4/4,0/4), beta_Y_L=(2/4, 1/4), beta_Y_LA=(-2/4,2/4)), 
    'Treatment already targeted, effect size reduced',
    'Treatment only benefits rare condition, commonly causes harm. Common risk factor present.')

### Treatment already targeted, effect size reduced

Treatment only benefits rare condition, commonly causes harm. Common risk factor present.

#### Sample data

Unnamed: 0,L0,L1,A,Y
0,False,False,False,1
1,False,False,False,0
2,False,False,False,0
3,True,True,True,0
4,True,False,True,1
5,False,False,False,0
6,False,True,False,1
7,False,False,False,0
8,False,True,False,1
9,True,True,False,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,"[0.19701, 0.50136]"
$\bar A$,0.42406
$\bar Y$,0.56419
$\bar Y^c_0$,0.55585
$\bar Y^c_1$,0.58832


#### Estimators

Unnamed: 0,$\hat Y^{a=0}$,$\hat Y^{a=1}$,$\hat \beta_a$
Underlying counterfactual data,0.55585,0.58832,0.132689
Standardized means,0.553627,0.589247,0.145518
Outcome regression,0.553627,0.589247,0.145518
Inverse probability weighted means (IPW),0.553627,0.589247,0.145518
Doubly robust means,0.553627,0.589247,0.145518
Marginal structural model (MSM),0.55363,0.589266,0.145586
g-estimation,,,0.155748


*Total execution time*: 1.3699218060355633 s

## Notes for next time

- Consider adding g-estimation (Jamie Robbins, different than g-formula, performs well but difficult to understand)
- Consider looking a propensity weighting of general hospital population and population with CAM screens to use model for delirium prediction with patients with CAM screens to predict risk in general population (a censoring problem)
- Consider simulating L to have same range as actual data (or sampled from actual data?) or subset of actual L?  Likely better to wait until new data import pipeline is working.
- (Later) will need to think about feature selection for real model; may want to filter first for features correlated with outcome or intervention, then go through by hand for likely causal relation given domain knowledge.
- Consider replacing existing non-parametric estimators - likely will require trying different models with outcome regression, IPW, and doubly robust (all should match if models are working properly).
- Could consider using using difference between IPW and outcome regression as part of loss function? (unproven - only guarantees that they match)