In [1]:
import numpy as np
import scipy as sp 
import pandas as pd
from scipy import stats as sps
from sklearn.linear_model import LogisticRegression
from IPython.display import display, Markdown

In [2]:
from datetime import datetime

## Example data from Chapter 2 of "What If?"

In [3]:
ch2data = {
    'L': [0]*8 + [1]*12,
    'A': [0]*4 + [1]*4 + [0]*3 + [1]*9,
    'Y': [0,1] + [0]*5 + [1]*3 + [0] + [1]*6 + [0]*3,
}
ch2names = ['Rheia', 'Kronos', 'Demeter', 'Hades', 'Hestia', 'Poseidon', 'Hera', 
            'Zeus', 'Artemis', 'Apollo', 'Leto', 'Ares', 'Athena', 'Hephaestus', 
            'Aphrodite', 'Cyclope', 'Persephone', 'Hermes', 'Hebe', 'Dionysus']
pd.DataFrame(ch2data, index=ch2names)

Unnamed: 0,L,A,Y
Rheia,0,0,0
Kronos,0,0,1
Demeter,0,0,0
Hades,0,0,0
Hestia,0,1,0
Poseidon,0,1,0
Hera,0,1,0
Zeus,0,1,1
Artemis,1,0,1
Apollo,1,0,1


## Basic function definitions

A utility function for generating all binary vectors of a given length.

In [4]:
def all_binary_vectors(length):
    result = [[]]
    while len(result[0]) < length:
        result = [ vector + [val] for vector in result for val in [False, True] ]
    return result

In [5]:
pd.DataFrame(all_binary_vectors(3))

Unnamed: 0,0,1,2
0,False,False,False
1,False,False,True
2,False,True,False
3,False,True,True
4,True,False,False
5,True,False,True
6,True,True,False
7,True,True,True


Calculate the standardized means for each level of A for a given dataset

In [6]:
def fit_phat_Y_given_L_A(L, A, Y):
    A = np.array(A)
    Y = np.array(Y)
    L = np.reshape(np.array(L), (len(Y), -1)) # force L to be a 2D array
    M = L.shape[1]
    A_levels = [False, True]
    L_levels = all_binary_vectors(M)
    phat_Y_given_L_A_table = np.empty((len(L_levels), len(A_levels)))
    
    for i,l in enumerate(L_levels):
        mask_l = (np.product(L == l, axis=1) != 0)
        for j,a in enumerate(A_levels):
            phat_Y_given_L_A_table[i,j] = np.mean(Y[mask_l * (A == a)])
    
    def phat_Y_given_L_A(L, A):
        A = np.array(A)
        L = np.reshape(np.array(L), (-1, M)) # force L to be a 2D array
        result = np.zeros(L.shape[0])
        for i,l in enumerate(L_levels):
            #print(L, l, )#np.product(L == l, axis=1) != 0)
            mask_l = (np.product(L == l, axis=1) != 0)
            for j,a in enumerate(A_levels):
                result[mask_l * (A == a)] = phat_Y_given_L_A_table[i,j]
        return result
    
    return phat_Y_given_L_A

In [7]:
def fit_phat_L(L):
    L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
    M = L.shape[1]
    L_levels = all_binary_vectors(M)
    phat_L_table = np.zeros((len(L_levels),))

    for i,l in enumerate(L_levels):
        mask_l = (np.product(L == l, axis=1) != 0)
        phat_L_table[i] = np.mean(mask_l)
        
    def phat_L(L):
        L = np.reshape(np.array(L), (-1,M)) # force L to be a 2D array
        result = np.zeros((L.shape[0],))
        for i,l in enumerate(L_levels):
            mask_l = (np.product(L == l, axis=1) != 0)
            result += mask_l * phat_L_table[i]
        return result
        
    return phat_L

In [8]:
def standardized_means_for_A(L, A, Y):
    A = np.array(A)
    A_levels = [False, True]
    L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
    L_levels = all_binary_vectors(L.shape[1])
    Y_mean = np.zeros((len(A_levels),))

    phat_L = fit_phat_L(L)
    phat_Y_given_L_A = fit_phat_Y_given_L_A(L, A, Y)
    
    for i,a in enumerate(A_levels):
        for l in L_levels:    
            Y_mean[i] += (phat_Y_given_L_A(l, a) * phat_L(l))[0]

    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [9]:
pd.DataFrame(standardized_means_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


In [10]:
def outcome_regression_for_A(L, A, Y):
    A = np.array(A)
    A_levels = [False, True]
    L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
    Y_mean = np.zeros((len(A_levels),))

    phat_Y_given_L_A = fit_phat_Y_given_L_A(L, A, Y)
    
    for i,a in enumerate(A_levels):
        Y_mean[i] += np.mean(phat_Y_given_L_A(L, a))

    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [11]:
pd.DataFrame(outcome_regression_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


Calculate the inverse probability weighted means for each level of A for a given dataset

In [12]:
# Separately tabulate the freqency of A for each possible level of L
# (note: vectorized with respect to samples, so fast but a little ugly)
def fit_phat_A_given_L(L, A):
    A = np.array(A)
    L = np.reshape(np.array(L), (len(A), -1)) # force L to be a 2D array
    A_levels = [False, True]
    L_levels = all_binary_vectors(L.shape[1])
    phat_A_given_L_table = np.zeros((len(L_levels), len(A_levels)))

    for i,l in enumerate(L_levels):
        mask_l = (np.product(L == l, axis=1) != 0)
        P_l = np.mean(mask_l)
        for j,a in enumerate(A_levels):
            phat_A_given_L_table[i,j] = np.mean(mask_l * (A == a))/P_l
            
    def phat_A_given_L(L):
        L = np.reshape(np.array(L), (np.array(L).shape[0], -1)) # force L to be a 2D array
        result = np.zeros((L.shape[0], len(A_levels)))
        for i,l in enumerate(L_levels):
            mask_l = (np.product(L == l, axis=1) != 0)
            for j,a in enumerate(A_levels):
                result[:,j] += mask_l * phat_A_given_L_table[i,j]
        return result
        
    return phat_A_given_L

In [13]:
def ip_weighted_means_for_A(L, A, Y):
    A = np.array(A)
    A_levels = [False, True]
    phat_A_given_L = fit_phat_A_given_L(L, A)
    Y_mean = [np.mean(Y * (A == a) / phat_A_given_L(L)[:,i]) for i,a in enumerate(A_levels)]

    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [14]:
pd.DataFrame(ip_weighted_means_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


Calculate the double-robust means for each level of A for a given dataset

In [15]:
def doubly_robust_means_for_A(L, A, Y):
    A = np.array(A)
    A_levels = [False, True]
    Y = np.array(Y)
    
    phat_A_given_L = fit_phat_A_given_L(L, A)
    phat_Y_given_L_A = fit_phat_Y_given_L_A(L, A, Y)
    f = phat_A_given_L(L)
    
    Y_mean = np.zeros((len(A_levels),))
    for i,a in enumerate(A_levels):
        q = phat_Y_given_L_A(L, a)
        Y_mean[i] = np.mean(q + (A == a) / f[:,i] * (Y - q))

    return {
        'A_level': A_levels,
        'Y_mean': Y_mean,
    }

In [16]:
pd.DataFrame(doubly_robust_means_for_A(**ch2data))

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


A simple utility function for summarizing results for a given model

In [17]:
def summarize_model(data, title='', description=''):
    A = data['A']
    L = np.reshape(np.array(data['L']), (len(A), -1)) # force L to be a 2D array
    Y = data['Y']
    
    display(Markdown('### ' + title))
    display(Markdown(description))
    display(Markdown('#### Sample data'))
    sample_df = pd.concat([
        pd.DataFrame(L).add_prefix("L"), 
        pd.Series(A, name='A'), 
        pd.Series(Y, name='Y')
        ], axis=1)
    display(sample_df.iloc[:12])

    display(Markdown('#### Summary statistics'))
    stats = {
        '$\\bar L$': [np.mean(L[:,i]) for i in range(L.shape[1])],
        '$\\bar A$': np.mean(A),
        '$\\bar Y$': np.mean(Y),
    }
    if 'Y_a_0' in data:
        stats = stats | {
        '$\\bar Y^c_0$': np.mean(data['Y_a_0']),
        '$\\bar Y^c_1$': np.mean(data['Y_a_1']),
        }
    display(pd.DataFrame({'statistic': stats.values()}, index=stats.keys()))
    
    display(Markdown('#### Standardized means'))
    display(pd.DataFrame(standardized_means_for_A(L, A, Y)))

    display(Markdown('#### Outcome regression'))
    display(pd.DataFrame(outcome_regression_for_A(L, A, Y)))
    
    display(Markdown('#### Inverse probability weighted means'))
    display(pd.DataFrame(ip_weighted_means_for_A(L, A, Y)))
        
    display(Markdown('#### Doubly-robust means'))
    display(pd.DataFrame(doubly_robust_means_for_A(L, A, Y)))

In [18]:
summarize_model(ch2data, 'Analysis of data from chapter 2', 'Validating the summary function with data from chapter 2')

### Analysis of data from chapter 2

Validating the summary function with data from chapter 2

#### Sample data

Unnamed: 0,L0,A,Y
0,0,0,0
1,0,0,1
2,0,0,0
3,0,0,0
4,0,1,0
5,0,1,0
6,0,1,0
7,0,1,1
8,1,0,1
9,1,0,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.6]
$\bar A$,0.65
$\bar Y$,0.5


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.5
1,True,0.5


## Testing with generated data

Start with a simple parameterized model...

In [19]:
def generate_simple_logistic_model_data(N=1000000, P_L=0.5,
                                        beta_A_0=0, beta_A_L=0, 
                                        beta_Y_0=0, beta_Y_L=0, beta_Y_A=0, beta_Y_LA=0,
                                        debug_info=False):
    # reshape and broadcast values into appropriately sized arrays
    P_L = np.array(P_L).reshape((-1,)) # expand P_L to a 2d array if needed
    M = len(P_L)
    if np.isscalar(beta_A_L) == 1: # broadcast to appropriate shape if needed
        beta_A_L = np.array([beta_A_L]*M)
    else:
        beta_A_L = np.array([beta_A_L]).reshape((M,))
    if np.isscalar(beta_Y_L) == 1: # broadcast to appropriate shape if needed
        beta_Y_L = np.array([beta_Y_L]*M)
    else:
        beta_Y_L = np.array([beta_Y_L]).reshape((M,))
    if np.isscalar(beta_Y_LA) == 1: # broadcast to appropriate shape if needed
        beta_Y_LA = np.array([beta_Y_LA]*M)
    else:
        beta_Y_LA = np.array([beta_Y_LA]).reshape((M,))
    
    L = np.random.uniform(size=(N,)+np.array(P_L).shape) < P_L
    P_A_given_l = sps.logistic.cdf(beta_A_0 + L.dot(beta_A_L))
    A = np.random.uniform(size=N) < P_A_given_l
    #P_Y_given_l_a = sps.logistic.cdf(beta_Y_0 + L.dot(beta_Y_L) +
    #                                 A.dot(beta_Y_A) +
    #                                 L.dot(beta_Y_LA)*A
    #                                 )
    #Y = np.random.uniform(size=N) < P_Y_given_l_a
    P_Y_given_l_a_0 = sps.logistic.cdf(beta_Y_0 + L.dot(beta_Y_L) #+
                                     #A.dot(beta_Y_A) +
                                     #L.dot(beta_Y_LA)*A
                                     )
    P_Y_given_l_a_1 = sps.logistic.cdf(beta_Y_0 + L.dot(beta_Y_L) +
                                     beta_Y_A +#A.dot(beta_Y_A) +
                                     L.dot(beta_Y_LA)#L.dot(beta_Y_LA)*A
                                     )
    Y_a_0 = np.random.uniform(size=N) < P_Y_given_l_a_0
    Y_a_1 = np.random.uniform(size=N) < P_Y_given_l_a_1
    Y = Y_a_0 * (1 - A) + Y_a_1 * A
    
    return {
        'L' : L,
        'A' : A,
        'Y' : Y,
        'Y_a_0': Y_a_0,
        'Y_a_1': Y_a_1,
    } | ({
        'P_L' : P_L,
        'P_A_given_l' : P_A_given_l, 
        'P_Y_given_l_a' : P_Y_given_l_a,
    } if debug_info else {})

In [20]:
summarize_model(generate_simple_logistic_model_data(), 'Maximal entropy model', 'No interactions, 1:1 odds for L, A, and Y')

### Maximal entropy model

No interactions, 1:1 odds for L, A, and Y

#### Sample data

Unnamed: 0,L0,A,Y
0,False,False,1
1,True,True,0
2,False,True,0
3,False,True,1
4,True,True,0
5,True,False,1
6,False,True,0
7,True,False,1
8,False,True,1
9,True,True,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.500091]
$\bar A$,0.500835
$\bar Y$,0.500096
$\bar Y^c_0$,0.500087
$\bar Y^c_1$,0.500161


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.500053
1,True,0.500139


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.500053
1,True,0.500139


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.500053
1,True,0.500139


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.500053
1,True,0.500139


In [21]:
summarize_model(generate_simple_logistic_model_data(beta_Y_A=-1),
                'Simple randomized protective intervention', 'Assumes no effect from covariate')

### Simple randomized protective intervention

Assumes no effect from covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,True,True,0
1,True,False,1
2,False,True,0
3,True,False,1
4,True,True,1
5,True,True,0
6,False,False,1
7,True,False,0
8,True,False,1
9,False,False,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.498906]
$\bar A$,0.499903
$\bar Y$,0.384103
$\bar Y^c_0$,0.499793
$\bar Y^c_1$,0.26913


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.499427
1,True,0.268734


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.499427
1,True,0.268734


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.499427
1,True,0.268734


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.499427
1,True,0.268734


In [22]:
summarize_model(generate_simple_logistic_model_data(beta_Y_A=-1, beta_Y_L=1),
                'Randomized, covariate is risk, intervention is protective',
                '1:1 odds of covariate')

### Randomized, covariate is risk, intervention is protective

1:1 odds of covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,False,True,0
1,False,False,1
2,False,False,0
3,True,True,1
4,False,True,1
5,False,False,0
6,False,True,1
7,True,False,1
8,False,True,0
9,False,True,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.499937]
$\bar A$,0.499998
$\bar Y$,0.499663
$\bar Y^c_0$,0.614837
$\bar Y^c_1$,0.384295


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.614596
1,True,0.384728


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.614596
1,True,0.384728


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.614596
1,True,0.384728


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.614596
1,True,0.384728


Note that $Y_\mathrm{mean}$ in the first row corresponds to $P(Y | \mathrm{do}(A=\mathrm{False}))$ and the second row corresponds to $P(Y | \mathrm{do}(A=\mathrm{True}))$.

The first row is $\mathrm{sigmoid}(0\cdot \beta_A + \ldots)$

The second row is $\mathrm{sigmoid}(1 \cdot \beta_A + \ldots)$

We can recover $\beta_A$ by inverting the sigmoid function for each and taking the difference between them:

In [23]:
sp.special.logit(0.383850)-sp.special.logit(0.616879)

-0.9495606173935895

In [24]:
summarize_model(generate_simple_logistic_model_data(beta_A_0=-2, beta_Y_A=-1, beta_Y_L=1),
                'Randomized, covariate is risk, rare intervention is protective',
                '1:1 odds of covariate')

### Randomized, covariate is risk, rare intervention is protective

1:1 odds of covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,True,False,1
1,False,False,0
2,True,False,1
3,True,False,0
4,True,False,1
5,True,False,1
6,False,False,0
7,True,False,1
8,False,False,0
9,True,False,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.500537]
$\bar A$,0.119652
$\bar Y$,0.587937
$\bar Y^c_0$,0.6157
$\bar Y^c_1$,0.384989


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.615605
1,True,0.384368


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.615605
1,True,0.384368


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.615605
1,True,0.384368


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.615605
1,True,0.384368


In [25]:
summarize_model(generate_simple_logistic_model_data(P_L=0.75, beta_A_0=-2, beta_Y_A=-1, beta_Y_L=1),
                'Randomized, common covariate is risk, rare intervention is protective',
                '1:1 odds of covariate')

### Randomized, common covariate is risk, rare intervention is protective

1:1 odds of covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,True,False,0
1,True,False,1
2,False,False,1
3,False,True,0
4,True,False,1
5,True,False,1
6,True,False,0
7,True,False,1
8,True,False,1
9,True,False,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.749748]
$\bar A$,0.119483
$\bar Y$,0.645685
$\bar Y^c_0$,0.673123
$\bar Y^c_1$,0.442166


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.673209
1,True,0.442841


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.673209
1,True,0.442841


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.673209
1,True,0.442841


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.673209
1,True,0.442841


In [26]:
summarize_model(generate_simple_logistic_model_data(P_L=0.75,
                                                    beta_A_0=-2, beta_A_L=1, 
                                                    beta_Y_A=-1, beta_Y_L=1),
                'Common covariate is risk for disease and intervention, intervention is protective',
                '')

### Common covariate is risk for disease and intervention, intervention is protective



#### Sample data

Unnamed: 0,L0,A,Y
0,True,True,1
1,True,True,1
2,False,False,0
3,True,False,1
4,False,False,1
5,False,False,1
6,True,False,1
7,True,False,1
8,True,False,0
9,True,False,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.749738]
$\bar A$,0.231778
$\bar Y$,0.619979
$\bar Y^c_0$,0.673696
$\bar Y^c_1$,0.442385


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.673645
1,True,0.441654


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.673645
1,True,0.441654


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.673645
1,True,0.441654


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.673645
1,True,0.441654


In [27]:
summarize_model(generate_simple_logistic_model_data(P_L=0.5,
                                                    beta_A_0=0, 
                                                    beta_Y_A=1, beta_Y_L=1, beta_Y_LA=-2),
                'Covariate and intervention are risks, but negate eachother',
                '1:1 odds of covariate')

### Covariate and intervention are risks, but negate eachother

1:1 odds of covariate

#### Sample data

Unnamed: 0,L0,A,Y
0,True,True,1
1,False,True,0
2,True,False,0
3,True,False,1
4,True,False,1
5,True,False,1
6,True,True,0
7,True,True,1
8,False,True,1
9,False,True,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,[0.50024]
$\bar A$,0.500035
$\bar Y$,0.615763
$\bar Y^c_0$,0.615213
$\bar Y^c_1$,0.615476


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.615576
1,True,0.61584


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.615576
1,True,0.61584


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.615576
1,True,0.61584


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.615576
1,True,0.61584


## Simulations with multi-dimensional $L$

In [28]:
summarize_model(generate_simple_logistic_model_data(P_L=(0.5,0.5)), 
    'Multidimensional maximal entropy model', 'No interactions, 1:1 odds for L, A, and Y')

### Multidimensional maximal entropy model

No interactions, 1:1 odds for L, A, and Y

#### Sample data

Unnamed: 0,L0,L1,A,Y
0,True,False,False,1
1,True,False,True,0
2,True,True,True,0
3,True,True,False,1
4,True,False,False,1
5,True,False,True,1
6,True,True,True,0
7,False,False,True,1
8,True,False,False,1
9,False,True,True,0


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,"[0.49978, 0.499511]"
$\bar A$,0.500625
$\bar Y$,0.500521
$\bar Y^c_0$,0.500233
$\bar Y^c_1$,0.500461


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.501143
1,True,0.499898


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.501143
1,True,0.499898


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.501143
1,True,0.499898


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.501143
1,True,0.499898


In [29]:
summarize_model(generate_simple_logistic_model_data(
    P_L=(0.2,0.5), beta_A_0=-2, beta_A_L=(4,0), beta_Y_L=(2, 1), beta_Y_LA=(-2,2)), 
    'Treatment already targeted',
    'Treatment only benefits rare condition, commonly causes harm. Common risk factor present.')

### Treatment already targeted

Treatment only benefits rare condition, commonly causes harm. Common risk factor present.

#### Sample data

Unnamed: 0,L0,L1,A,Y
0,False,True,False,0
1,False,False,False,1
2,False,True,False,1
3,False,False,False,0
4,False,False,False,1
5,False,False,False,0
6,False,True,False,0
7,False,False,False,0
8,False,False,False,0
9,False,True,True,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,"[0.19922, 0.500325]"
$\bar A$,0.271121
$\bar Y$,0.652781
$\bar Y^c_0$,0.675495
$\bar Y^c_1$,0.727247


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.676046
1,True,0.728042


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.676046
1,True,0.728042


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.676046
1,True,0.728042


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.676046
1,True,0.728042


In [None]:
summarize_model(generate_simple_logistic_model_data(
    P_L=(0.5,0.2), beta_A_0=-2, beta_A_L=(0,4), beta_Y_L=(1, 2), beta_Y_LA=(2,-2)), 
    'Treatment already targeted, swapped L values',
    'Treatment only benefits rare condition, commonly causes harm. Common risk factor present.')

### Treatment already targeted, swapped L values

Treatment only benefits rare condition, commonly causes harm. Common risk factor present.

#### Sample data

Unnamed: 0,L0,L1,A,Y
0,False,False,False,1
1,True,True,True,1
2,True,False,True,1
3,False,True,True,0
4,True,False,False,1
5,True,False,False,1
6,False,False,False,1
7,True,True,True,1
8,True,False,True,1
9,True,False,False,1


#### Summary statistics

Unnamed: 0,statistic
$\bar L$,"[0.500816, 0.200272]"
$\bar A$,0.271804
$\bar Y$,0.653218
$\bar Y^c_0$,0.676374
$\bar Y^c_1$,0.726132


#### Standardized means

Unnamed: 0,A_level,Y_mean
0,False,0.677066
1,True,0.726523


#### Outcome regression

Unnamed: 0,A_level,Y_mean
0,False,0.677066
1,True,0.726523


#### Inverse probability weighted means

Unnamed: 0,A_level,Y_mean
0,False,0.677066
1,True,0.726523


#### Doubly-robust means

Unnamed: 0,A_level,Y_mean
0,False,0.677066
1,True,0.726523


## Notes for next time

- ...