In [21]:
import pandas as pd
import numpy as np
from dowhy import CausalModel
import warnings

# --- 0. Setup and Data Loading (FINAL REVISION) ---
warnings.filterwarnings("ignore")

# Define URL and Variables (Must be defined before data cleaning)
url = 'https://users.nber.org/~rdehejia/data/nsw_dw.dta'
treatment = 'treat'
outcome = 're78'
common_causes = [
    'age', 'education', 'black', 'hispanic', 'married', 
    'nodegree', 're74', 're75'
]
required_columns = [treatment, outcome] + common_causes


In [22]:
# 1. Load the data 
try:
    df = pd.read_stata(url)
except Exception as e:
    print(f"FATAL: Failed to load data from the NBER URL. Error: {e}")
    raise


data = df[required_columns].copy() 

print("Data Loaded and Cleaned Successfully.")

print(f"Columns used: {data.columns.tolist()}") # Uncomment to verify columns

Data Loaded and Cleaned Successfully.
Columns used: ['treat', 're78', 'age', 'education', 'black', 'hispanic', 'married', 'nodegree', 're74', 're75']


In [23]:
# --- 1. Model the Causal Problem ---
# Model is now initialized with clean data matching the variable definitions
model = CausalModel(
    data=data,
    treatment=treatment,
    outcome=outcome,
    common_causes=common_causes,
    # Use ALL column names from the now-cleaned DataFrame
    observed_node_names=data.columns.tolist() 
)

print("Step 1: Causal Model Initialized.")

Step 1: Causal Model Initialized.


In [24]:
identified_estimand = model.identify_effect(
    proceed_when_unidentifiable=True
)

print("\n--- Step 2: Identified Causal Estimand ---")
print(identified_estimand)


--- Step 2: Identified Causal Estimand ---
Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
   d                                                                     
────────(E[re78|re75,nodegree,hispanic,education,age,re74,black,married])
d[treat]                                                                 
Estimand assumption 1, Unconfoundedness: If U→{treat} and U→re78 then P(re78|treat,re75,nodegree,hispanic,education,age,re74,black,married,U) = P(re78|treat,re75,nodegree,hispanic,education,age,re74,black,married)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!

### Estimand : 4
Estimand name: general_adjustment
Estimand expression:
   d                                                                     
────────(E[re78|re75,nodegree,hispanic,education,age,re74,black,married])
d[treat]                                                 

In [25]:
print("\n--- Step 3: Estimation Results ---")

# Inverse Probability Weighting (IPW)
ipw_estimate = model.estimate_effect(
    identified_estimand,
    method_name="backdoor.propensity_score_weighting",
    target_units="ate"
)
print(f"Causal Estimate (ATE) via IPW: ${ipw_estimate.value:.2f}")


# Standard Linear Regression (Adjustment Formula)
reg_estimate = model.estimate_effect(
    identified_estimand,
    method_name="backdoor.linear_regression",
    target_units="ate"
)

print(f"Causal Estimate (ATE) via Regression: ${reg_estimate.value:.2f}")


--- Step 3: Estimation Results ---
Causal Estimate (ATE) via IPW: $1631.55
Causal Estimate (ATE) via Regression: $1676.34


In [26]:
print("\n--- Step 4: Refutation (Testing Robustness) ---")

# Adding a Placebo Treatment 
refute_placebo_treatment = model.refute_estimate(
    identified_estimand,
    ipw_estimate,
    method_name="placebo_treatment_refuter",
    placebo_type="permute"
)

print("\nRefutation 1 (Placebo Treatment - Expect near 0):")
print(refute_placebo_treatment)


# Adding a Random Common Cause
refute_random_common_cause = model.refute_estimate(
    identified_estimand,
    ipw_estimate,
    method_name="random_common_cause"
)

print("\nRefutation 2 (Random Common Cause - Expect small change from original):")
print(refute_random_common_cause)


--- Step 4: Refutation (Testing Robustness) ---

Refutation 1 (Placebo Treatment - Expect near 0):
Refute: Use a Placebo Treatment
Estimated effect:1631.5502820324882
New effect:-137.1307307394609
p value:0.78


Refutation 2 (Random Common Cause - Expect small change from original):
Refute: Add a random common cause
Estimated effect:1631.5502820324882
New effect:1631.550282032488
p value:1.0

