# 04 Robustness Checks

This notebook explores robustness checks for the Double ML results, including alternative machine learning models and subsampling by region or income group.


In [None]:

import pandas as pd
from doubleml import DoubleMLData, DoubleMLPLR
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from xgboost import XGBRegressor

# Load merged dataset
merged = pd.read_csv('../data/merged_panel.csv')

# Define outcome, treatment, and controls
y_col = 'bond_yield'
d_col = 'nd_gain'
control_cols = [col for col in merged.columns if col not in ['iso3c', 'date', y_col, d_col]]

# Drop missing values
model_data = merged.dropna(subset=[y_col, d_col] + control_cols)

# Create DoubleMLData object
dml_data = DoubleMLData(model_data, y_col=y_col, d_cols=d_col, x_cols=control_cols)

# Alternative ML models
models = {
    'Random Forest': RandomForestRegressor(n_estimators=500, max_depth=5, random_state=42),
    'Lasso': Lasso(alpha=0.1),
    'XGBoost': XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=4)
}

results = {}
for name, ml in models.items():
    dml_model = DoubleMLPLR(dml_data, ml_g=ml, ml_m=ml)
    dml_model.fit()
    results[name] = dml_model.summary
    print(f"
{name} Results:")
    print(dml_model.summary)

# TODO: Add subsampling by region/income group and include fixed effects if needed
