In [13]:
import pandas as pd
import numpy as np
from pathlib import Path

merton = pd.read_csv("data/derived/merton_weekly.csv")
nig = pd.read_csv("data/derived/NIG_weekly.csv")

print(merton.keys())
print(nig.keys())

Index(['gvkey', 'date', 'sigma_hat', 'mu_hat', 'V_0', 'V_used', 'B_used',
       'PD_Q', 'PD_P', 'train_end_date', 'training_end'],
      dtype='object')
Index(['gvkey', 'date', 'A_hat', 'L', 'alpha', 'beta', 'delta', 'mu', 'PD_1y'], dtype='object')


In [15]:
# data sort and labeling to then merge on firm and date
for df in (merton, nig):
    df["date"] = pd.to_datetime(df["date"], errors="coerce")

merton_keep = (
    merton.loc[:, ["gvkey", "date", "PD_Q", "PD_P"]]
          .rename(columns={"PD_Q": "PD_1y_Merton_Q", "PD_P": "PD_1y_Merton_P"})
)

nig_keep = (
    nig.loc[:, ["gvkey", "date", "PD_1y"]]
       .rename(columns={"PD_1y": "PD_1y_NIG"})
)

# ensure numeric PDs
for c in ["PD_1y_Merton_Q", "PD_1y_Merton_P"]:
    merton_keep[c] = pd.to_numeric(merton_keep[c], errors="coerce")
nig_keep["PD_1y_NIG"] = pd.to_numeric(nig_keep["PD_1y_NIG"], errors="coerce")

# Guard against duplicate keys, keep the last row per (gvkey,date)
merton_keep = merton_keep.sort_values(["gvkey", "date"]).drop_duplicates(["gvkey", "date"], keep="last")
nig_keep    = nig_keep.sort_values(["gvkey", "date"]).drop_duplicates(["gvkey", "date"], keep="last")

merged = merton_keep.merge(nig_keep, on=["gvkey", "date"], how="outer")

merged = merged.sort_values(["gvkey", "date"]).reset_index(drop=True)

print(merged.columns)
print(merged.head())


Index(['gvkey', 'date', 'PD_1y_Merton_Q', 'PD_1y_Merton_P', 'PD_1y_NIG'], dtype='object')
   gvkey       date  PD_1y_Merton_Q  PD_1y_Merton_P     PD_1y_NIG
0  14447 2014-01-03             NaN             NaN  2.298318e-09
1  14447 2014-01-10             NaN             NaN  3.589342e-09
2  14447 2014-01-17             NaN             NaN  3.319471e-09
3  14447 2014-01-24             NaN             NaN  4.126655e-09
4  14447 2014-01-31             NaN             NaN  2.078421e-09
