In [41]:
# experiment on synthetic data 

In [42]:
# No treatment effect
# size of treatment < < size of control 
# size of treatment ≈ size of control 
# different distribution of treatment / control 
# same dis of treatment / control 

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from scipy.stats import norm, sem
from scipy.interpolate import UnivariateSpline
from sklearn.calibration import calibration_curve
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats.stats import pearsonr
from numpy.random import default_rng
import random
pd.set_option('display.max_columns', 100)
from resample import * 
from metalearner import * 

####  Extension: For get_data_with_same_distribution , we always generate one dimensional feature x ∈ (0,1), for w = 0, y = x + 1. for w = 1, y = x + 2. So the ite and ate are both 1. In addition,  # treatment sample = # control sample

In [4]:
syn_data_class = resample_from_synthetic_data(n_sample= 1000000)
d = syn_data_class.get_data_with_same_distribution(ratio = 0.5)

In [5]:
s_learner = Slearner(baselearner=LinearRegression(), is_regressor=True)
s_learner.fit(X = np.array(d['X']).reshape(len(d['X']),1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))

In [6]:
ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(X =np.array(d['X']).reshape(len(d['X']),1), treatment = d['W'], y =  d['Y'])

In [7]:
print('ATE for S learner: ', np.mean(ite))

ATE for S learner:  0.9999999999999966


In [8]:
t_learner = Tlearner(LinearRegression(),LinearRegression(), is_regressor= True)
t_learner.fit(X = np.array(d['X']).reshape(-1,1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))

In [9]:
ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(X = np.array(d['X']).reshape(-1,1), treatment = d['W'], y =  d['Y'])

In [10]:
print('ATE for T learner: ', np.mean(ite))

ATE for T learner:  1.0000000000000002


In [11]:
x_learner = Xlearner(LinearRegression(),
                     propensity_model = LogisticRegression(),
                    is_regressor= True)
x_learner.fit(X = np.array(d['X']).reshape(-1,1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))

In [12]:
ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(X = np.array(d['X']).reshape(-1,1), treatment = d['W'], y =  d['Y'])

In [13]:
print('ATE for X learner: ', np.mean(ite))

ATE for X learner:  1.0


We can draw conclusion from above result that when control and treatment groups are from the same simple distribution 
and when the data sizes are equal, the three metalearners perform equally well. 

#### Paper claims:  X-learner performs particularly well when the treatment group sizes are very unbalanced

In [35]:
syn_data_class = resample_from_synthetic_data(n_sample= 1000000)
d = syn_data_class.get_data_with_same_distribution(ratio = 0.01)

In [36]:
d_test = syn_data_class.get_data_with_same_distribution(ratio = 0.01)

In [37]:
d[d['W'] == 1] 

Unnamed: 0,W,Y0,Y1,X,Y
990000,1,1.139203,2.139203,0.139203,2.139203
990001,1,1.540330,2.540330,0.540330,2.540330
990002,1,1.360063,2.360063,0.360063,2.360063
990003,1,1.323327,2.323327,0.323327,2.323327
990004,1,1.831549,2.831549,0.831549,2.831549
...,...,...,...,...,...
999995,1,1.812929,2.812929,0.812929,2.812929
999996,1,1.491826,2.491826,0.491826,2.491826
999997,1,1.701604,2.701604,0.701604,2.701604
999998,1,1.589628,2.589628,0.589628,2.589628


In [38]:
s_learner = Slearner(baselearner=LinearRegression(), is_regressor=True)
s_learner.fit(X = np.array(d['X']).reshape(len(d['X']),1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))
ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(X =np.array(d_test['X']).reshape(len(d_test['X']),1), treatment = d_test['W'], y =  d['Y'])
print('ATE for S learner: ', np.mean(ite))

ATE for S learner:  1.0


In [39]:
t_learner = Tlearner(LinearRegression(),LinearRegression(), is_regressor= True)
t_learner.fit(X = np.array(d['X']).reshape(-1,1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))

ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(X = np.array(d_test['X']).reshape(-1,1), treatment = d_test['W'], y =  d_test['Y'])

print('ATE for T learner: ', np.mean(ite))

ATE for T learner:  1.0


In [40]:
x_learner = Xlearner(LinearRegression(),
                     propensity_model = LogisticRegression(),
                    is_regressor= True)
x_learner.fit(X = np.array(d['X']).reshape(-1,1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))
ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(X = np.array(d_test['X']).reshape(-1,1), treatment = d_test['W'], y =  d_test['Y'])

print('ATE for X learner: ', np.mean(ite))

ATE for X learner:  1.0000000000000002


All three metalearners perform well 

#### Paper claims:  X-learner performs well for two extreme cases where: 1. CATE functions are very complex and 2. treatmnent effect is 0 

#####  complex function w = 1 

In [20]:
# complex CATE function (i.e.: when w= 1, true distribution is quite complex)

syn_data_class = resample_from_synthetic_data(n_sample= 1000)
d = syn_data_class.get_data_with_diff_distribution(ratio = 0.1)
d_test = syn_data_class.get_data_with_diff_distribution(ratio = 0.1)

In [21]:
# For every x, true ite is 3 + np.abs(x*x*x - x) - (x+1) = 2 - x + np.abs(x*x*x - x)
# calculate true ate first 
true_ite = [2 - x + np.abs(x*x*x - x) for x in d_test['X']]
true_ate = np.mean(true_ite)
print('True ate for this data is: ', true_ate)

True ate for this data is:  1.7322758574140962


In [22]:
s_learner = Slearner(baselearner=LinearRegression(), is_regressor=True)
s_learner.fit(X = np.array(d['X']).reshape(len(d['X']),1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))
ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(X =np.array(d_test['X']).reshape(len(d_test['X']),1), treatment = d_test['W'], y =  d['Y'])
print('ATE for S learner: ', np.mean(ite))

ATE for S learner:  1.7353553289558623


In [23]:
t_learner = Tlearner(LinearRegression(),LinearRegression(), is_regressor= True)
t_learner.fit(X = np.array(d['X']).reshape(-1,1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))

ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(X = np.array(d_test['X']).reshape(-1,1), treatment = d_test['W'], y =  d_test['Y'])

print('ATE for T learner: ', np.mean(ite))

ATE for T learner:  1.715321689046918


In [24]:
x_learner = Xlearner(LinearRegression(),
                     propensity_model = LogisticRegression(),
                    is_regressor= True)
x_learner.fit(X = np.array(d['X']).reshape(-1,1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))
ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(X = np.array(d_test['X']).reshape(-1,1), treatment = d_test['W'], y =  d_test['Y'])

print('ATE for X learner: ', np.mean(ite))

ATE for X learner:  1.7153216890469185


##### treatment effect = 0 

In [30]:
syn_data_class = resample_from_synthetic_data(n_sample= 1000)
d = syn_data_class.get_data_with_zero_treatment_effect(ratio = 0.5)
d_test = syn_data_class.get_data_with_zero_treatment_effect(ratio = 0.5)

In [31]:
s_learner = Slearner(baselearner=LinearRegression(), is_regressor=True)
s_learner.fit(X = np.array(d['X']).reshape(len(d['X']),1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))
ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(X =np.array(d_test['X']).reshape(len(d_test['X']),1), treatment = d_test['W'], y =  d['Y'])
print('ATE for S learner: ', np.mean(ite))

ATE for S learner:  -8.881784197001253e-19


In [32]:
t_learner = Tlearner(LinearRegression(),LinearRegression(), is_regressor= True)
t_learner.fit(X = np.array(d['X']).reshape(-1,1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))

ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(X = np.array(d_test['X']).reshape(-1,1), treatment = d_test['W'], y =  d_test['Y'])

print('ATE for T learner: ', np.mean(ite))

ATE for T learner:  -8.526512829121202e-17


In [33]:
x_learner = Xlearner(LinearRegression(),
                     propensity_model = LogisticRegression(),
                    is_regressor= True)
x_learner.fit(X = np.array(d['X']).reshape(-1,1), treatment = np.array(d['W']), 
              y =  np.array(d['Y']))
ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(X = np.array(d_test['X']).reshape(-1,1), treatment = d_test['W'], y =  d_test['Y'])

print('ATE for X learner: ', np.mean(ite))

ATE for X learner:  -4.234797938131524e-17


Conclusion: X-learner performs well on both tasks, but T and S learners perform very well too. 

#### Experiments with GOTV data (we do not know the true ate of data ) 

In [2]:
# data preprocessing 

df = pd.read_csv('GerberGreenLarimer_APSR_2008_social_pressure.csv')
df = df[df['treatment'].isin([' Control',' Neighbors'])]

# df = pd.read_csv('GerberGreenLarimer_APSR_2008_social_pressure.csv')
df['treatment'] = np.where(df.treatment == ' Control',0,1)
df['voted'] = np.where(df.voted == 'Yes', 1, 0)
df['sex'] = np.where(df.sex == 'male',1, 0)
df['g2000'] = np.where(df.g2000 == 'yes', 1, 0)
df['g2002'] = np.where(df.g2002 == 'yes', 1, 0)
df['g2004'] = np.where(df.g2004 == 'yes', 1, 0)
df['p2000'] = np.where(df.p2000 == 'yes', 1, 0)
df['p2002'] = np.where(df.p2002 == 'yes', 1, 0)
df['p2004'] = np.where(df.p2004 == 'Yes', 1, 0)

cts_variables_names = ["yob","treatment","cluster","hh_id","hh_size","numberofnames","p2004_mean","g2004_mean"]
binary_variables_names = ["sex","g2000", "g2002", "p2000", "p2002", "p2004"]
# for column in binary_variables_names:
#     if column == 'sex':
#         df[column] = np.where(df[column] == ' male',1,0)
#     else:
#         df[column] = df[column].str.lower()
#         df[column] = np.where(df[column] == ' yes',1,0)
scaled_cts_covariates = StandardScaler().fit_transform(df[cts_variables_names])
binary_covariates = df[binary_variables_names]
d = pd.DataFrame(np.concatenate((scaled_cts_covariates, binary_covariates), axis=1), 
                        columns=cts_variables_names+binary_variables_names, index=df.index)
d["W"] = df["treatment"]
d["Y"] = df["voted"]

gotv_data = resample_from_GOTV(d)

##### All dataset ate result 

In [8]:
s_learner = Slearner(baselearner=RandomForestClassifier(), is_regressor=False)
s_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
print('ATE for S learner: ', np.mean(ite))
print('rmse:', rmse)

ATE for S learner:  0.04186450070605465
rmse: 0.008350676095496454


In [9]:
t_learner = Tlearner(RandomForestClassifier(),RandomForestClassifier(), is_regressor= False)
t_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])

print('ATE for T learner: ', np.mean(ite))
print('rmse:', rmse)

ATE for T learner:  0.08233968128751823
rmse: 0.008607679896763086


In [11]:
x_learner = Xlearner(RandomForestClassifier(),
                     propensity_model = LogisticRegression(),
                     control_effect_learner = LinearRegression(),
                     treatment_effect_learner = LinearRegression(),
                    is_regressor= False)
x_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])

print('ATE for X learner: ', np.mean(ite))
print('rmse:', rmse)

ATE for X learner:  1032.3032128106656
rmse: 0.007231897637475376


##### With equal size of treatment/ control

In [12]:
d[d['W'] == 1]

Unnamed: 0,yob,treatment,cluster,hh_id,hh_size,numberofnames,p2004_mean,g2004_mean,sex,g2000,g2002,p2000,p2002,p2004,W,Y
19,-1.188085,2.237461,-1.727258,-1.727229,-1.498182,0.139749,-1.443668,-2.865244,1.0,1.0,1.0,0.0,1.0,0.0,1,1
30,-0.703996,2.237461,-1.727258,-1.727114,-0.233199,0.139749,-1.443668,-1.562139,1.0,1.0,1.0,1.0,0.0,0.0,1,0
31,-1.672174,2.237461,-1.727258,-1.727114,-0.233199,0.139749,-1.443668,-1.562139,0.0,1.0,1.0,1.0,0.0,0.0,1,0
38,-0.012441,2.237461,-1.726912,-1.727018,1.031783,0.139749,-0.378716,-0.259034,1.0,1.0,1.0,0.0,0.0,1.0,1,1
39,0.609959,2.237461,-1.726912,-1.727018,1.031783,0.139749,-0.378716,-0.259034,0.0,1.0,1.0,0.0,0.0,1.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
344035,-1.810485,2.237461,1.735860,1.735774,-0.233199,0.139749,2.017426,0.392520,1.0,1.0,0.0,0.0,0.0,1.0,1,1
344043,0.402492,2.237461,1.735860,1.735870,-1.498182,0.139749,0.153760,-0.259034,1.0,1.0,1.0,0.0,0.0,1.0,1,0
344059,-2.640351,2.237461,1.736207,1.736063,-1.498182,0.139749,0.686236,-0.259034,0.0,1.0,1.0,1.0,0.0,1.0,1,0
344076,-0.358218,2.237461,1.736207,1.736313,-0.233199,0.139749,0.952474,0.392520,0.0,1.0,1.0,0.0,1.0,0.0,1,0


In [13]:
d[d['W'] == 0]

Unnamed: 0,yob,treatment,cluster,hh_id,hh_size,numberofnames,p2004_mean,g2004_mean,sex,g2000,g2002,p2000,p2002,p2004,W,Y
5,1.716448,-0.446935,-1.727258,-1.727383,1.031783,0.139749,-1.443668,-0.259034,1.0,0.0,0.0,0.0,0.0,0.0,0,0
6,0.195026,-0.446935,-1.727258,-1.727383,1.031783,0.139749,-1.443668,-0.259034,0.0,1.0,1.0,0.0,1.0,0.0,0,1
7,-0.012441,-0.446935,-1.727258,-1.727383,1.031783,0.139749,-1.443668,-0.259034,1.0,1.0,1.0,0.0,1.0,0.0,0,1
8,0.817426,-0.446935,-1.727258,-1.727364,-0.233199,0.139749,-1.443668,-0.259034,0.0,0.0,0.0,0.0,1.0,0.0,0,0
9,0.748270,-0.446935,-1.727258,-1.727364,-0.233199,0.139749,-1.443668,-0.259034,1.0,1.0,1.0,0.0,1.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
344079,-0.911463,-0.446935,1.736207,1.736332,-0.233199,0.139749,0.952474,0.392520,1.0,1.0,1.0,0.0,1.0,1.0,0,1
344080,-0.081596,-0.446935,1.736207,1.736351,-0.233199,0.139749,-0.378716,-0.910586,1.0,1.0,1.0,1.0,0.0,1.0,0,0
344081,0.125870,-0.446935,1.736207,1.736351,-0.233199,0.139749,-0.378716,-0.910586,0.0,1.0,0.0,0.0,0.0,0.0,0,0
344082,-1.326396,-0.446935,1.736207,1.736371,-0.233199,0.139749,0.153760,-0.910586,1.0,1.0,1.0,1.0,1.0,1.0,0,1


In [14]:
d_w0, d_w1  = gotv_data.get_treat_control_equalsize()

In [15]:
d = pd.concat([d_w0,d_w1])

In [16]:
s_learner = Slearner(baselearner=RandomForestClassifier(), is_regressor=False)
s_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
print('ATE for S learner: ', np.mean(ite))

ATE for S learner:  0.033004


In [17]:
t_learner = Tlearner(RandomForestClassifier(),RandomForestClassifier(), is_regressor= False)
t_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])

print('ATE for T learner: ', np.mean(ite))

ATE for T learner:  0.068063


In [18]:
x_learner = Xlearner(RandomForestClassifier(),
                     propensity_model = LogisticRegression(),
                     control_effect_learner = LinearRegression(),
                     treatment_effect_learner = LinearRegression(),
                    is_regressor= False)
x_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])

print('ATE for X learner: ', np.mean(ite))

ATE for X learner:  -7.695425515984699


In [19]:
#  to verify if X learner is the best estimator, lets run experiment 10 times, get the avg values of ate for three
# learnaers

s_ates = []
t_ates = []
x_ates = []
for _ in range(10):
    # resample 10000 data 
    d_w0, d_w1  = gotv_data.get_treat_control_equalsize(n_sample=10000)
    d = pd.concat([d_w0,d_w1])
    
    s_learner = Slearner(baselearner=RandomForestClassifier(), is_regressor=False)
    s_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    s_ates.append(np.mean(ite))
    
    
    t_learner = Tlearner(RandomForestClassifier(),RandomForestClassifier(), is_regressor= False)
    t_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    t_ates.append(np.mean(ite))


    x_learner = Xlearner(RandomForestClassifier(),
                     propensity_model = LogisticRegression(),
                     control_effect_learner = LinearRegression(),
                     treatment_effect_learner = LinearRegression(),
                    is_regressor= False)
    x_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    x_ates.append(np.mean(ite))
    

In [20]:
result = pd.DataFrame()
result['Estimator'] = ['Slearner','Tlearner','Xlearner']
result['ATE_Mean'] = [np.mean(s_ates),np.mean(t_ates),np.mean(x_ates)]
result

Unnamed: 0,Estimator,ATE_Mean
0,Slearner,0.036314
1,Tlearner,0.080295
2,Xlearner,6.277162


##### With diff size of treatment/ control (0.01 treatment)

In [21]:
d_w0, d_w1  = gotv_data.get_treat_control_diffsize(n_sample= 10000,ratio= 0.01)

In [22]:
d = pd.concat([d_w0,d_w1])

In [23]:
s_learner = Slearner(baselearner=RandomForestClassifier(), is_regressor=False)
s_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
print('ATE for S learner: ', np.mean(ite))

ATE for S learner:  0.024999


In [24]:
t_learner = Tlearner(RandomForestClassifier(),RandomForestClassifier(), is_regressor= False)
t_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])

print('ATE for T learner: ', np.mean(ite))

ATE for T learner:  0.060592999999999994


In [25]:
x_learner = Xlearner(RandomForestClassifier(),
                     propensity_model = LogisticRegression(),
                     control_effect_learner = LinearRegression(),
                     treatment_effect_learner = LinearRegression(),
                    is_regressor= False)
x_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])

print('ATE for X learner: ', np.mean(ite))

ATE for X learner:  29.582934821209165


In [26]:
#  to verify if X learner is the best estimator, lets run experiment 10 times, get the avg values of ate for three
# learnaers

s_ates = []
t_ates = []
x_ates = []
for _ in range(10):
    # resample 10000 data 
    d_w0, d_w1  = gotv_data.get_treat_control_diffsize(n_sample= 10000,ratio= 0.01)
    d = pd.concat([d_w0,d_w1])
    
    s_learner = Slearner(baselearner=RandomForestClassifier(), is_regressor=False)
    s_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    ite, yhat_ts, yhat_cs, rmse = s_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    s_ates.append(np.mean(ite))
    
    
    t_learner = Tlearner(RandomForestClassifier(),RandomForestClassifier(), is_regressor= False)
    t_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    ite, yhat_ts, yhat_cs, rmse = t_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    t_ates.append(np.mean(ite))


    x_learner = Xlearner(RandomForestClassifier(),
                     propensity_model = LogisticRegression(),
                     control_effect_learner = LinearRegression(),
                     treatment_effect_learner = LinearRegression(),
                    is_regressor= False)
    x_learner.fit(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    ite, yhat_ts, yhat_cs, rmse = x_learner.get_ite(d.drop(columns = ['W','Y']), d['W'], d['Y'])
    x_ates.append(np.mean(ite))
    

In [27]:
result = pd.DataFrame()
result['Estimator'] = ['Slearner','Tlearner','Xlearner']
result['ATE_Mean'] = [np.mean(s_ates),np.mean(t_ates),np.mean(x_ates)]
result

Unnamed: 0,Estimator,ATE_Mean
0,Slearner,0.026003
1,Tlearner,0.064725
2,Xlearner,-120.00444
