In [1]:
import os
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from super_learner import*
import statsmodels.api as sm
from scipy.stats import norm, t
from scipy.special import logit, expit
import random
from TLP import TLP


In [2]:
def generate_data(N, outcome_type, treatment_type):

    W1 = np.random.binomial(1, 0.5, N)
    W2 = np.random.binomial(1, 0.65, N)
    W3 = np.round(np.random.uniform(0, 4, N), decimals=3)
    W4 = np.round(np.random.uniform(0, 5, N), decimals=3)

    if treatment_type == 'binary':
        Ap = expit(-0.4 + 0.2*W2 + 0.15*W3 + 0.2*W4 + 0.15*W2*W4)
        A = np.random.binomial(1, Ap, N)
        
    elif treatment_type == 'multigroup':
        Ap1 = expit(-0.4 + 0.2*W2 + 0.3*W3 + 0.1*W4 + 0.4*W2*W4)
        Ap2 = expit(-0.4 + 0.5*W2 + 0.1*W3 + 0.2*W4 + 0.1*W2*W4)
        Ap3 = expit(-0.4 + 0.7*W2 + 0.5*W3 + 0.3*W4 + 0.2*W2*W4)
        Ap4 = expit(-0.4 + 0.1*W2 + 0.2*W3 + 0.4*W4 + 0.1*W2*W4)
        Ap = np.array([Ap1, Ap2, Ap3, Ap4]).T
        Ap = (Ap / Ap.sum(1).reshape(-1,1))
        
        l = [0,1,2,3]
        A = []
        for i in range(len(Ap)):
            ap = Ap[i]
            onehot = np.zeros((4,1))
            choice = random.choices(l, ap)
            onehot[choice] = 1
            A.append(onehot)
        A = np.concatenate(A,1).T


    if outcome_type == 'cls':
        
        if treatment_type  == 'binary':
            Y1p = expit(-1 + 1 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4)
            Y0p = expit(-1 + 0 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4)
            Y1 = np.random.binomial(1, Y1p, N)
            Y0 = np.random.binomial(1, Y0p, N)
            Y = Y1*A + Y0*(1-A)
            
        if treatment_type == 'multigroup':         
            Y3p = expit(-1 + 1 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4)
            Y2p = expit(-1 + 2 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4)
            Y1p = expit(-1 + 0.5 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4)
            Y0p = expit(-1 + 8 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4)
            
            Y3 = np.random.binomial(1, Y3p, N)
            Y2 = np.random.binomial(1, Y2p, N)
            Y1 = np.random.binomial(1, Y1p, N)
            Y0 = np.random.binomial(1, Y0p, N)

            Y = A[:, 3] * Y3 + A[:, 2] * Y2 +  A[:, 1] * Y1 +  A[:, 0] * Y0
            
    elif outcome_type == 'reg':
        
        if treatment_type == 'binary':
            Y1 = -1 + 2*A -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4
            Y0 = -1 + 0 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4

            Y = Y1*A + Y0*(1-A)
        
        if treatment_type == 'multigroup':       
            Y3 = -1 + 1 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4
            Y2 = -1 + 2 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4
            Y1 = -1 + 0.5 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4
            Y0 = -1 + 3 -0.1*W1 + 0.3*W2 + 0.25*W3 + 0.2*W4 + 0.15*W2*W4   

            Y = A[:, 3] * Y3 + A[:, 2] * Y2 +  A[:, 1] * Y1 +  A[:, 0] * Y0
        Y = np.clip(Y, -1, 6)
            
    if treatment_type == 'multigroup':
        data = pd.DataFrame([W1, W2, W3, W4, A, Y, Y0, Y1, Y2, Y3]).T
        data.columns = ['W1', 'W2', 'W3', 'W4', 'Adummy', 'Y', 'Y0','Y1', 'Y2', 'Y3']
        
        A_group = []
        for a in data.Adummy.values:
            val = np.where(a==1)
            A_group.append(val[0])
        A_group = np.concatenate(A_group)
        data['A'] = A_group 
    elif treatment_type == 'binary':
        data = pd.DataFrame([W1, W2, W3, W4, A, Y, Y1, Y0]).T
        data.columns = ['W1', 'W2', 'W3', 'W4', 'A', 'Y', 'Y1','Y0']
    return data
    

In [None]:
est_dict_Q = ['Elastic', 'BR', 'SV', 'LR', 'RF', 'MLP', 'AB', 'poly']
est_dict_Q = ['BR', 'SV', 'LR', 'RF', 'MLP', 'AB', 'poly']
est_dict_G = ['LR', 'NB', 'MLP','SV', 'poly', 'RF','AB']

'''
BR = bayesian ridge (reg)
Elastic = elastic net (reg)
SV = support vector (reg/cls)
LR = linear/logistic regression (reg/cls)
RF = random forest (reg/cls)
MLP = multilayer perceptron (reg/cls)
AB = adaboost (reg/cls)
poly = polynomial linear/logistic regression (reg/cls)
NB = Gaussian Naive Bayes (cls)'''

outcome_type = 'reg'   # cls or reg
treatment_type = 'multigroup' # binary or multigroup
N = 600
data = generate_data(N=N, outcome_type=outcome_type, treatment_type=treatment_type)

true_psi_1_0 = data.Y1.mean() - data.Y0.mean()
true_psi_2_0 = data.Y2.mean() - data.Y0.mean()
true_psi_3_0 = data.Y3.mean() - data.Y0.mean()
print(true_psi_1_0, true_psi_2_0, true_psi_3_0)

k = 8
tlp = TLP(data, cause='A', outcome='Y', confs=['W1', 'W2', 'W3', 'W4'],
          precs=[], outcome_type=outcome_type, Q_learners=est_dict_Q, G_learners=est_dict_G)

group_comparisons =[[1,0],[2,0],[3,0]]  # comparison in list format with 'group A [vs] reference_group'

all_preds_Q, gts_Q, all_preds_G, gts_G = tlp.fit(k=k, standardized_outcome=False, calibration=True)
pre_update_effects, post_update_effects, ses, ps = tlp.target_multigroup(group_comparisons=group_comparisons)
print(post_update_effects)
print(true_psi_1_0, true_psi_2_0, true_psi_3_0)


-2.5000000000000027 -1.0000000000000013 -2.000000000000003
Training G Learners...


 29%|████████████▊                                | 2/7 [00:00<00:01,  2.71it/s]