In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from data_import import load_data
from model_dfs import prepare_nig_inputs
#from nig_em_paper import EM_algo, one_year_pd_timeseries
from nig_gibbs import gibbs_sampler

In [2]:
# Load Accenture dataset
ret_daily, bs = load_data(
    xlsx_path= None,
    verbose=True
)

print(ret_daily.head())
print("-"*40)
print(bs.head())
print("-"*40)

# Load ECB 1Y risk-free yield data

# If first time, call API to get data, otherwise:
path = Path.cwd() / "data/derived" / "ecb_riskfree_1y_daily.csv"
df_rf= pd.read_csv(path, parse_dates=["date"])
print(df_rf.head())

      country_iso          isin       date                       company  \
41651         DEU  DE0005190003 2010-01-05  BAYERISCHE MOTOREN WERKE AKT   
41652         DEU  DE0005190003 2010-01-06  BAYERISCHE MOTOREN WERKE AKT   
41653         DEU  DE0005190003 2010-01-07  BAYERISCHE MOTOREN WERKE AKT   
41654         DEU  DE0005190003 2010-01-08  BAYERISCHE MOTOREN WERKE AKT   
41655         DEU  DE0005190003 2010-01-11  BAYERISCHE MOTOREN WERKE AKT   

        gvkey   shares_out   close  mcap_reported  shares_out_filled  \
41651  100022  601995196.0  32.310   1.945046e+10        601995196.0   
41652  100022  601995196.0  32.810   1.975146e+10        601995196.0   
41653  100022  601995196.0  33.100   1.992604e+10        601995196.0   
41654  100022  601995196.0  32.655   1.965815e+10        601995196.0   
41655  100022  601995196.0  32.170   1.936619e+10        601995196.0   

               mcap  bad_day  logret_close  logret_mcap  
41651  1.945046e+10    False      0.008080     0.008

In [3]:
# Build NIG inputs and fill missing liabilities
df_nig_panel, nig_em_data = prepare_nig_inputs(ret_daily, bs, df_rf)
print(df_nig_panel.head(50))


     gvkey       date             E          isin  \
0   100022 2010-01-05  1.945046e+10  DE0005190003   
1   100080 2010-01-05  4.578810e+10  DE000BAY0017   
2   100312 2010-01-05  1.765719e+09  DE0007030009   
3   100581 2010-01-05  4.701386e+10  FR0000120321   
4   100957 2010-01-05  3.539016e+10  ES0144580Y14   
5   101202 2010-01-05  2.188290e+10  FR0000120073   
6   101204 2010-01-05  7.372136e+10  FR0000120578   
7   101248 2010-01-05  8.674785e+09  FR0000121667   
8   101305 2010-01-05  6.061525e+09  FR0000073272   
9   101336 2010-01-05  2.149174e+10  FR0000121972   
10  101361 2010-01-05  4.566206e+09  NL0000395903   
11  102296 2010-01-05  2.096499e+10  FR0000125486   
12  103487 2010-01-05  4.002267e+10  DE0007164600   
13  132740 2010-01-05  4.605613e+09  DE0006231004   
14   14140 2010-01-05  9.784074e+10  ES0113900J37   
15   14447 2010-01-05  3.839475e+10  FR0000121014   
16   15181 2010-01-05  4.973802e+10  ES0113211835   
17   15532 2010-01-05  6.765080e+10  FR0000131

In [4]:
import numpy as np
import pandas as pd

import nig_base, nig_em, nig_pd


def nig_firm_timeseries_em_every_15d(
    *,
    dates: np.ndarray,
    E: np.ndarray,
    L: np.ndarray,
    r: np.ndarray,
    start_params: dict,
    daycount: int = 250,
    train_min: int = 250,         # first PD at end of first year
    window_max: int = 500,        # expand until 2y, then roll 2y
    em_every: int = 15,           # <-- your 15 trading days
    tau_mode: str = "one_year",
    liability_mode: str = "timevarying",
    max_iter: int = 10,
    min_iter: int = 3,
    tol: float = 1e-3,
) -> pd.DataFrame:
    """
    One-firm full-history driver:
      - EM recalibration every `em_every` trading days
      - daily A_t^(1Y) and PD_t computed using last calibrated params in-between

    Returns a daily dataframe with A_hat, theta, PD_physical, PD_risk_neutral
    and the (piecewise-constant) parameters used each day.
    """

    # Ensure EM uses the same implementations you refactored into nig_base
    nig_em.get_asset_path = nig_base.get_asset_path
    nig_em.update_theta = nig_base.update_theta

    dates = np.asarray(dates)
    E = np.asarray(E, dtype=float)
    L = np.asarray(L, dtype=float)
    r = np.asarray(r, dtype=float)

    n = len(dates)
    if not (len(E) == len(L) == len(r) == n):
        raise ValueError("dates, E, L, r must have the same length")

    # outputs (daily)
    A_hat = np.full(n, np.nan, dtype=float)
    theta = np.full(n, np.nan, dtype=float)
    pd_p = np.full(n, np.nan, dtype=float)
    pd_q = np.full(n, np.nan, dtype=float)

    alpha_s = np.full(n, np.nan, dtype=float)
    beta1_s = np.full(n, np.nan, dtype=float)
    delta_s = np.full(n, np.nan, dtype=float)
    mu_s = np.full(n, np.nan, dtype=float)
    is_recalib = np.zeros(n, dtype=bool)
    em_converged = np.full(n, False, dtype=bool)
    em_n_iter = np.full(n, np.nan, dtype=float)

    # schedule of EM end-indices (first one at train_min-1)
    first_end = train_min - 1
    if first_end >= n:
        # not enough data for even 1 year
        return pd.DataFrame({"date": dates, "A_hat": A_hat, "PD_physical": pd_p, "PD_risk_neutral": pd_q})

    em_end_idxs = list(range(first_end, n, em_every))

    params_cur = dict(start_params)

    # We compute in blocks: [t_cal, t_next) share the SAME params_cur
    for k, t_cal in enumerate(em_end_idxs):
        # window start index: expanding until window_max, then rolling
        w0 = 0 if t_cal < window_max else (t_cal - window_max + 1)

        # --- EM recalibration on [w0, t_cal] ---
        out = nig_em.EM_algo(
            E_series=E,
            L_face_series=L,
            rf_series=r,
            dates=dates,
            start_params=params_cur,          # warm-start params from last calibration
            start_date=dates[w0],
            end_date=dates[t_cal],
            tau_mode=tau_mode,
            liability_mode=liability_mode,
            daycount=daycount,
            max_iter=max_iter,
            min_iter=min_iter,
            tol=tol,
        )

        params_cur = dict(out["params"])
        is_recalib[t_cal] = True
        em_converged[t_cal] = bool(out.get("converged", False))
        em_n_iter[t_cal] = float(out.get("n_iter", np.nan))

        # on EM day, we already have A at the end of the window
        A_cal = float(out["A_win"][-1])
        A_hat[t_cal] = A_cal

        # determine block end (exclusive)
        t_next = em_end_idxs[k + 1] if (k + 1) < len(em_end_idxs) else n

        # --- compute A_hat for the in-between days using fixed params_cur ---
        # We compute A for [t_cal, t_next-1] in ONE call (warm-started at A_cal).
        # Then we overwrite the first element with A_cal (exact value from EM output).
        if t_next - 1 >= t_cal:
            A_block = nig_base.get_asset_path(
                params=params_cur,
                rf_series=r,
                dates=dates,
                E_series=E,
                L_face_series=L,
                start_date=dates[t_cal],
                end_date=dates[t_next - 1],
                tau_mode=tau_mode,
                liability_mode=liability_mode,
                daycount=daycount,
                warm_start=True,
                A0=A_cal,
                warn=False,
            )
            idx_block = np.arange(t_cal, t_next, dtype=int)
            A_hat[idx_block] = A_block
            A_hat[t_cal] = A_cal  # ensure exact consistency on the calibration day

        # --- daily theta + PDs on the same block ---
        # theta is computed from r_t and params_cur (Esscher tilt); PD_Q uses beta1+theta.
        theta_block = nig_base.update_theta(params_cur, r[t_cal:t_next], enforce_pricing=True, warn=False)
        theta[t_cal:t_next] = theta_block

        alpha = float(params_cur["alpha"])
        beta1 = float(params_cur["beta1"])
        delta = float(params_cur["delta"])
        mu = float(params_cur["beta0"])

        alpha_s[t_cal:t_next] = alpha
        beta1_s[t_cal:t_next] = beta1
        delta_s[t_cal:t_next] = delta
        mu_s[t_cal:t_next] = mu

        for t in range(t_cal, t_next):
            A0 = float(A_hat[t])
            Lt = float(L[t])

            if not (np.isfinite(A0) and np.isfinite(Lt) and A0 > 0.0 and Lt > 0.0):
                continue

            # Physical PD: beta=beta1
            pd_p[t] = nig_pd.compute_pd_physical(A0=A0, L=Lt, T=1.0, params=params_cur)

            # Risk-neutral PD: beta=beta1+theta_t
            th = float(theta[t])
            if np.isfinite(th):
                params_t = dict(params_cur)
                params_t["theta"] = th
                pd_q[t] = nig_pd.compute_pd_risk_neutral(A0=A0, L=Lt, T=1.0, params=params_t)

    # Assemble output
    df_out = pd.DataFrame({
        "date": dates,
        "E": E,
        "L": L,
        "r": r,
        "A_hat": A_hat,
        "theta": theta,
        "PD_physical": pd_p,
        "PD_risk_neutral": pd_q,
        "alpha": alpha_s,
        "beta1": beta1_s,
        "delta": delta_s,
        "beta0_mu": mu_s,
        "is_recalib": is_recalib,
        "em_converged": em_converged,
        "em_n_iter": em_n_iter,
    })

    return df_out


In [9]:
df = df_nig_panel.copy()

df["date"] = pd.to_datetime(df["date"])
df = df.sort_values(["gvkey", "date"])

gv = df["gvkey"].iloc[0]          # pick any firm id
g = df[df["gvkey"] == gv].copy().sort_values("date")

res = nig_firm_timeseries_em_every_15d(
    dates=g["date"].to_numpy(),
    E=g["E"].to_numpy(float),
    L=g["L"].to_numpy(float),
    r=g["r"].to_numpy(float),
    start_params={"alpha": 1.0, "beta1": 0.0, "delta": 0.2, "beta0": 0.0},
    em_every=15,
)

res.loc[res["is_recalib"], ["date", "alpha", "beta1", "delta", "beta0_mu", "em_converged", "em_n_iter"]].head()
res[["date", "A_hat", "PD_physical", "PD_risk_neutral"]].dropna().head()


KeyboardInterrupt: 