# 03 Double Machine Learning Main Analysis

This notebook implements the Double Machine Learning (DML) methodology using the `doubleml` package. The goal is to estimate the causal effect of the ND-GAIN vulnerability score on 10-year sovereign bond yields.


In [None]:

import pandas as pd
from doubleml import DoubleMLData, DoubleMLPLR
from sklearn.ensemble import RandomForestRegressor

# Load merged dataset
merged = pd.read_csv('../data/merged_panel.csv')

# Define outcome, treatment, and controls
y_col = 'bond_yield'
d_col = 'nd_gain'

# Define control variables (update this list with actual controls)
control_cols = [col for col in merged.columns if col not in ['iso3c', 'date', y_col, d_col]]

# Drop rows with missing values in the selected columns
model_data = merged.dropna(subset=[y_col, d_col] + control_cols)

# Create DoubleMLData object
dml_data = DoubleMLData(model_data, y_col=y_col, d_cols=d_col, x_cols=control_cols)

# Specify machine learning models for nuisance parameters
ml_model = RandomForestRegressor(n_estimators=500, max_depth=5, random_state=42)

# Initialize DML model (Partially Linear Regression)
dml_plr = DoubleMLPLR(dml_data, ml_g=ml_model, ml_m=ml_model)

# Fit the model
dml_plr.fit()

# Display results
print(dml_plr.summary)
