# DoWhy example on ihdp (Infant Health and Development Program) dataset

In [1]:
# importing required libraries
import os, sys
sys.path.append(os.path.abspath("../../"))
import dowhy
from dowhy.do_why import CausalModel
import pandas as pd
import numpy as np

### Loading Data

In [2]:
data= pd.read_csv("https://raw.githubusercontent.com/AMLab-Amsterdam/CEVAE/master/datasets/IHDP/csv/ihdp_npci_1.csv", header = None)
col =  ["treatment", "y_factual", "y_cfactual", "mu0", "mu1" ,]

for i in range(1,26):
    col.append("x"+str(i))
data.columns = col
data.head()

Unnamed: 0,treatment,y_factual,y_cfactual,mu0,mu1,x1,x2,x3,x4,x5,...,x16,x17,x18,x19,x20,x21,x22,x23,x24,x25
0,1,5.599916,4.31878,3.268256,6.854457,-0.528603,-0.343455,1.128554,0.161703,-0.316603,...,1,1,1,1,0,0,0,0,0,0
1,0,6.875856,7.856495,6.636059,7.562718,-1.736945,-1.802002,0.383828,2.24432,-0.629189,...,1,1,1,1,0,0,0,0,0,0
2,0,2.996273,6.633952,1.570536,6.121617,-0.807451,-0.202946,-0.360898,-0.879606,0.808706,...,1,0,1,1,0,0,0,0,0,0
3,0,1.366206,5.697239,1.244738,5.889125,0.390083,0.596582,-1.85035,-0.879606,-0.004017,...,1,0,1,1,0,0,0,0,0,0
4,0,1.963538,6.202582,1.685048,6.191994,-1.045229,-0.60271,0.011465,0.161703,0.683672,...,1,1,1,1,0,0,0,0,0,0


### 1.Model

In [3]:
# Create a causal model from the data and given common causes.
xs = ""
for i in range(1,26):
    xs += ("x"+str(i)+"+")
    
model=CausalModel(
        data = data,
        treatment='treatment',
        outcome='y_factual',
        common_causes=xs.split('+')
        )


INFO:dowhy.do_why:Model to find the causal effect of treatment ['treatment'] on outcome ['y_factual']


### 2.Identify

In [4]:
#Identify the causal effect
identified_estimand = model.identify_effect()

INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['', 'x21', 'x22', 'x9', 'x8', 'x11', 'x16', 'x25', 'x4', 'x5', 'x20', 'x10', 'x17', 'x13', 'x7', 'x2', 'x23', 'x3', 'x24', 'x1', 'x15', 'x14', 'x6', 'x19', 'x18', 'x12']


WARN: Do you want to continue by ignoring these unobserved confounders? [y/n] y


INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]


### 3. Estimate (using different methods)

#### 3.1 Using Linear Regression

In [5]:
# Estimate the causal effect and compare it with Average Treatment Effect
estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.linear_regression", test_significance=True
)

print(estimate)

print("Causal Estimate is " + str(estimate.value))
data_1 = data[data["treatment"]==1]
data_0 = data[data["treatment"]==0]

print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))


INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator
INFO:dowhy.causal_estimator:b: y_factual~treatment+x21+x22+x9+x8+x11+x16+x25+x4+x5+x20+x10+x17+x13+x7+x2+x23+x3+x24+x1+x15+x14+x6+x19+x18+x12


*** Causal Estimate ***

## Target estimand
Estimand type: ate
### Estimand : 1
Estimand name: iv
No such variable found!
### Estimand : 2
Estimand name: backdoor
Estimand expression:
    d                                                                         
──────────(Expectation(y_factual|x21,x22,x9,x8,x11,x16,x25,x4,x5,x20,x10,x17,x
dtreatment                                                                    

                                               
13,x7,x2,x23,x3,x24,x1,x15,x14,x6,x19,x18,x12))
                                               
Estimand assumption 1, Unconfoundedness: If U→treatment and U→y_factual then P(y_factual|treatment,x21,x22,x9,x8,x11,x16,x25,x4,x5,x20,x10,x17,x13,x7,x2,x23,x3,x24,x1,x15,x14,x6,x19,x18,x12,U) = P(y_factual|treatment,x21,x22,x9,x8,x11,x16,x25,x4,x5,x20,x10,x17,x13,x7,x2,x23,x3,x24,x1,x15,x14,x6,x19,x18,x12)

## Realized estimand
b: y_factual~treatment+x21+x22+x9+x8+x11+x16+x25+x4+x5+x20+x10+x17+x13+x7+x2+x23+x3+x24+x1+x15+x14+x6+x

#### 3.2 Using Propensity Score Matching

In [6]:
estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_matching"
)

print("Causal Estimate is " + str(estimate.value))

print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))


INFO:dowhy.causal_estimator:INFO: Using Propensity Score Matching Estimator
INFO:dowhy.causal_estimator:b: y_factual~treatment+x21+x22+x9+x8+x11+x16+x25+x4+x5+x20+x10+x17+x13+x7+x2+x23+x3+x24+x1+x15+x14+x6+x19+x18+x12


Causal Estimate is 3.8436503200364402
ATE 4.02112101243


#### 3.3 Using Propensity Score Stratification

In [13]:
estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification", method_params={'num_strata'50, 'clipping_threshold':5}
)

print("Causal Estimate is " + str(estimate.value))
print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))



INFO:dowhy.causal_estimator:INFO: Using Propensity Score Stratification Estimator
INFO:dowhy.causal_estimator:b: y_factual~treatment+x21+x22+x9+x8+x11+x16+x25+x4+x5+x20+x10+x17+x13+x7+x2+x23+x3+x24+x1+x15+x14+x6+x19+x18+x12


Causal Estimate is 4.0560672956
ATE 4.02112101243


#### 3.4 Using Propensity Score Weighting

In [8]:
estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_weighting"
)

print("Causal Estimate is " + str(estimate.value))

print("ATE", np.mean(data_1["y_factual"])- np.mean(data_0["y_factual"]))


INFO:dowhy.causal_estimator:INFO: Using Propensity Score Weighting Estimator
INFO:dowhy.causal_estimator:b: y_factual~treatment+x21+x22+x9+x8+x11+x16+x25+x4+x5+x20+x10+x17+x13+x7+x2+x23+x3+x24+x1+x15+x14+x6+x19+x18+x12


Causal Estimate is 4.04761815345
ATE 4.02112101243


### 4. Refute
##### Refute the obtained estimate using multiple robustness checks.
##### 4.1 Adding a random common cause

In [9]:
refute_results=model.refute_estimate(identified_estimand, estimate,
        method_name="random_common_cause")
print(refute_results)

INFO:dowhy.causal_estimator:INFO: Using Propensity Score Weighting Estimator
INFO:dowhy.causal_estimator:b: y_factual~treatment+x21+x22+x9+x8+x11+x16+x25+x4+x5+x20+x10+x17+x13+x7+x2+x23+x3+x24+x1+x15+x14+x6+x19+x18+x12+w_random


Refute: Add a Random Common Cause
Estimated effect:(4.0476181534545397,)
New effect:(4.0480367100453618,)



##### 4.2 Using a placebo treatment

In [10]:
res_placebo=model.refute_estimate(identified_estimand, estimate,
        method_name="placebo_treatment_refuter", placebo_type="permute")
print(res_placebo)

INFO:dowhy.causal_estimator:INFO: Using Propensity Score Weighting Estimator
INFO:dowhy.causal_estimator:b: y_factual~placebo+x21+x22+x9+x8+x11+x16+x25+x4+x5+x20+x10+x17+x13+x7+x2+x23+x3+x24+x1+x15+x14+x6+x19+x18+x12


Refute: Use a Placebo Treatment
Estimated effect:(4.0476181534545397,)
New effect:(0.057511331649253705,)



#### 4.3 Data Subset Refuter

In [11]:
res_subset=model.refute_estimate(identified_estimand, estimate,
        method_name="data_subset_refuter", subset_fraction=0.9)
print(res_subset)

INFO:dowhy.causal_estimator:INFO: Using Propensity Score Weighting Estimator
INFO:dowhy.causal_estimator:b: y_factual~treatment+x21+x22+x9+x8+x11+x16+x25+x4+x5+x20+x10+x17+x13+x7+x2+x23+x3+x24+x1+x15+x14+x6+x19+x18+x12


Refute: Use a subset of data
Estimated effect:(4.0476181534545397,)
New effect:(4.0274748385128563,)

