# ACIC Dataset Analysis

Analyzing ACIC synthetic benchmark datasets.


In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)


In [None]:
from src.data_loader import download_acic
from src.dowhy_pipeline import run_full_pipeline
from src.metrics import compute_ate_rmse

data_path = download_acic(year=2019)

if data_path and Path(data_path).exists():
    data = pd.read_csv(data_path)
    print(f"Loaded {len(data)} rows")
    
    results = run_full_pipeline(
        dataset_name="acic",
        estimators=["ipw", "psm", "dr", "dml"],
        output_dir=Path("../results"),
        random_state=RANDOM_SEED
    )
    
    if "true_ate" in data.columns:
        true_ate = data["true_ate"].iloc[0]
        rmse_results = []
        for _, row in results.iterrows():
            if not pd.isna(row["ate"]):
                rmse = compute_ate_rmse(row["ate"], true_ate)
                rmse_results.append({
                    "estimator": row["estimator"],
                    "ate": row["ate"],
                    "true_ate": true_ate,
                    "rmse": rmse
                })
        
        rmse_df = pd.DataFrame(rmse_results)
        rmse_df.to_csv(Path("../results/acic/ATE_rmse_table.csv"), index=False)
        print("\nRMSE Results:")
        print(rmse_df.to_string())
else:
    print("ACIC dataset requires manual download. See data/acic2019/README.txt")
