In [1]:
%run CommonCodeForExperiments.ipynb

In [4]:
import dowhy
from dowhy import CausalModel
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/AMLab-Amsterdam/CEVAE/master/datasets/IHDP/csv/ihdp_npci_1.csv", header = None)
col =  ["treatment", "y_factual", "y_cfactual", "mu0", "mu1" ,]
for i in range(1,26):
    col.append("x"+str(i))
data.columns = col
data = data.astype({"treatment":'bool'}, copy=False)

In [3]:
data['treatment'] = (data['treatment'] == True).astype(int)
data['treatment'] = (data['treatment'] == False).astype(int)
data.head()

Unnamed: 0,treatment,y_factual,y_cfactual,mu0,mu1,x1,x2,x3,x4,x5,...,x16,x17,x18,x19,x20,x21,x22,x23,x24,x25
0,0,5.599916,4.31878,3.268256,6.854457,-0.528603,-0.343455,1.128554,0.161703,-0.316603,...,1,1,1,1,0,0,0,0,0,0
1,1,6.875856,7.856495,6.636059,7.562718,-1.736945,-1.802002,0.383828,2.24432,-0.629189,...,1,1,1,1,0,0,0,0,0,0
2,1,2.996273,6.633952,1.570536,6.121617,-0.807451,-0.202946,-0.360898,-0.879606,0.808706,...,1,0,1,1,0,0,0,0,0,0
3,1,1.366206,5.697239,1.244738,5.889125,0.390083,0.596582,-1.85035,-0.879606,-0.004017,...,1,0,1,1,0,0,0,0,0,0
4,1,1.963538,6.202582,1.685048,6.191994,-1.045229,-0.60271,0.011465,0.161703,0.683672,...,1,1,1,1,0,0,0,0,0,0


In [6]:
# Create a causal model from the data and given common causes.
xs = ""
for i in range(1,26):
    xs += ("x"+str(i)+"+")
model=CausalModel(
        data = data,
        treatment='treatment',
        outcome='y_factual',
        common_causes=xs.split('+')
        )

INFO:dowhy.causal_model:Model to find the causal effect of treatment ['treatment'] on outcome ['y_factual']


In [7]:
#Identify the causal effect
identified_estimand = model.identify_effect()

INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['', 'x4', 'x7', 'x18', 'x16', 'x19', 'x8', 'x21', 'x1', 'x17', 'x11', 'x12', 'x14', 'x13', 'x10', 'x5', 'x15', 'x2', 'x3', 'x6', 'x22', 'x25', 'x23', 'x24', 'x20', 'x9']


WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt) [y/n] y


INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]


In [9]:

# Estimate the causal effect and compare it with Average Treatment Effect
estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.linear_regression", test_significance=True
)

print(estimate)

print("Causal Estimate is " + str(estimate.value))
data_1 = data[data["treatment"]==1]
data_0 = data[data["treatment"]==0]

print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))

INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator
INFO:dowhy.causal_estimator:b: y_factual~treatment+x4+x7+x18+x16+x19+x8+x21+x1+x17+x11+x12+x14+x13+x10+x5+x15+x2+x3+x6+x22+x25+x23+x24+x20+x9


                            OLS Regression Results                            
Dep. Variable:              y_factual   R-squared:                       0.740
Model:                            OLS   Adj. R-squared:                  0.731
Method:                 Least Squares   F-statistic:                     78.91
Date:                Fri, 26 Jun 2020   Prob (F-statistic):          6.10e-191
Time:                        09:21:36   Log-Likelihood:                -1138.1
No. Observations:                 747   AIC:                             2330.
Df Residuals:                     720   BIC:                             2455.
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.1492      0.455     13.520      0.0

In [10]:
data = data.drop(['mu0', 'x1', 'x16', 'x20'], axis=1)

In [11]:
# Create a causal model from the data and given common causes.
xs = ""
for i in range(1,26):
    xs += ("x"+str(i)+"+")
model=CausalModel(
        data = data,
        treatment='treatment',
        outcome='y_factual',
        common_causes=xs.split('+')
        )

INFO:dowhy.causal_model:Model to find the causal effect of treatment ['treatment'] on outcome ['y_factual']


In [12]:
#Identify the causal effect
identified_estimand = model.identify_effect()

INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['', 'x4', 'x7', 'x18', 'x16', 'x19', 'x8', 'x21', 'x1', 'x17', 'x11', 'x12', 'x14', 'x13', 'x10', 'x5', 'x15', 'x2', 'x3', 'x6', 'x22', 'x25', 'x23', 'x24', 'x20', 'x9']


WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt) [y/n] y


INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]


In [13]:

# Estimate the causal effect and compare it with Average Treatment Effect
estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.linear_regression", test_significance=True
)

print(estimate)

print("Causal Estimate is " + str(estimate.value))
data_1 = data[data["treatment"]==1]
data_0 = data[data["treatment"]==0]

print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))

INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator
INFO:dowhy.causal_estimator:b: y_factual~treatment+x4+x7+x18+x19+x8+x21+x17+x11+x12+x14+x13+x10+x5+x15+x2+x3+x6+x22+x25+x23+x24+x9


                            OLS Regression Results                            
Dep. Variable:              y_factual   R-squared:                       0.738
Model:                            OLS   Adj. R-squared:                  0.730
Method:                 Least Squares   F-statistic:                     88.48
Date:                Fri, 26 Jun 2020   Prob (F-statistic):          1.90e-192
Time:                        09:36:02   Log-Likelihood:                -1141.5
No. Observations:                 747   AIC:                             2331.
Df Residuals:                     723   BIC:                             2442.
Df Model:                          23                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.9548      0.383     15.541      0.0