The Kalman filter estimates time-varying exposures from noisy returns using a linear Gaussian state-space model. Let:

---

**Model Dimensions and Notation**

- $\beta_t \in \mathbb{R}^{K \times 1}$ — latent exposures to $K$ risk factors at time $t$
- $y_t \in \mathbb{R}^{1 \times 1}$ — observed return fund at time $t$
- $H_t \in \mathbb{R}^{1 \times K}$ — row vector of factor/benchmark returns at time $t$
- $T \in \mathbb{R}^{K \times K}$ — transition matrix (often $T = I_K$ for a random walk)
- $Q \in \mathbb{R}^{K \times K}$ — covariance of state (exposure) noise
- $R \in \mathbb{R}^{1 \times 1}$ — variance of observation noise
- $P_{t|s} \in \mathbb{R}^{K \times K}$ — covariance of state at $t$ given observations up to $s$
- $\hat{\beta}_{t|s} \in \mathbb{R}^{K \times 1}$ — estimate of $\beta_t$ given data up to $s$


**State Equation**  
The exposure vector evolves as a linear Gaussian process:

$$
\beta_t = T \beta_{t-1} + \eta_t, \quad \eta_t \sim \mathcal{N}(0, Q)
$$

**Observation Equation**  
The return is modeled as a noisy linear combination of the exposures:

$$
y_t = H_t \beta_t + \epsilon_t, \quad \epsilon_t \sim \mathcal{N}(0, R)
$$


**Prediction Step**

$$
\hat{\beta}_{t|t-1} = T \hat{\beta}_{t-1|t-1}
$$

$$
P_{t|t-1} = T P_{t-1|t-1} T^\top + Q
$$

These equations propagate the state estimate and uncertainty one step forward using the state dynamics.


**Update Step**

Residual (innovation):

$$
\tilde{y}_t = y_t - H_t \hat{\beta}_{t|t-1}
$$

Innovation covariance:

$$
S_t = H_t P_{t|t-1} H_t^\top + R
$$

Kalman gain:

$$
K_t = P_{t|t-1} H_t^\top S_t^{-1}
$$
---

Posterior mean:

$$
\hat{\beta}_{t|t} = \hat{\beta}_{t|t-1} + K_t \tilde{y}_t
$$

Posterior covariance:

$$
P_{t|t} = (I_K - K_t H_t) P_{t|t-1}
$$


The Kalman filter balances model-driven prediction with observation-driven correction. The innovation term reflects deviation from expectation; the Kalman gain controls the strength of that correction. The posterior estimate is a rank-one affine update to the prior mean, and the covariance shrinks along the direction informed by the new data.


In [None]:
import pandas as pd
import numpy as np
import os
import sys
import argparse
import logging
import time
import pymc as pm
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

import utils.utils as utils
import plotly.graph_objects as go
from viz.viz_tools import get_sci_template, attach_line_end_labels
from filters.kalman import KalmanSpec, KalmanEngine
from viz.kalman_viz import (ModelDiagnosticsPlotter, 
                            summarize_model_diagnostics, 
                            summarize_factor_dynamics, 
                            plot_beta_grid, 
                            plot_factor_contributions)
import numpy as np
import pandas as pd

: 

In [None]:
bond_return_data = pd.read_csv("data/bond_factor_returns.csv", index_col=0, parse_dates=True)
yield_data = pd.read_csv("data/yield_data.csv", index_col=0, parse_dates=True)
yield_data.rename(columns={yield_data.columns[0]: "yield_level"}, inplace=True)
yield_data["yield_delta"] = yield_data["yield_level"].diff()
yield_data = yield_data.drop(columns=["yield_level"])
merged_data = bond_return_data.merge(yield_data, left_index=True, right_index=True, how="left")
data_weekly = utils.aggregate_weekly_data(merged_data, additive_cols=["yield_delta"],)

for col in ['duration_t', 'yield_level']:
    if col in data_weekly.columns:
        data_weekly = data_weekly.drop(columns=[col])
data_weekly.head(10)

In [None]:
# --- Inspect Results ---Ca

# --- Define Target and Factor (PFF now used as factor) ---
target_col = "Preferred Income (FPE)"  # Example target
factor_cols = ["Preferred Stock (PFF)", 'Convertibles (ICVT)']  # PFF as factor

y = data_weekly[target_col]
H = data_weekly[factor_cols]
index = data_weekly.index

# --- Build Kalman Spec ---
spec = (
    KalmanSpec(K=len(factor_cols), name='Simple Model')
    #.set_initial_state_from_ols(H, y)
    #.set_Q_from_factor_vols(H)

    #.set_Q_from_rolling_beta_var(df=data_weekly, target_col=target_col, factor_cols=factor_cols, window=12)
    #.set_Q_from_rolling_residual_vol(df=data_weekly, target_col=target_col, factor_cols=factor_cols, window=12)
    .set_R_from_ols(H=H, y=y)
    #.set_R_from_rolling_ols_residuals(df=data_weekly, target_col=target_col, factor_cols=factor_cols, window=12)
    #.set_R_from_rolling_factor_vols(H, window=12, )
)

# --- Run Kalman Filter ---
engine = KalmanEngine(spec)
results = KalmanEngine(spec).run(data_weekly, 
target_col= "Preferred Income (FPE)", 
factor_cols=factor_cols, 
burn=8
)

viz = ModelDiagnosticsPlotter(results)
fig = viz.plot(include=None)  # uses default diagnostics: residuals, log_likelihood, gain_norm
display(fig)
df_factors = summarize_model_diagnostics(results)
display(df_factors.style.format(precision=4))  # control formatting here, not in the function
#display(plot_factor_contributions(results, ))

display(summarize_factor_dynamics(results).style.format(precision=4))

#fig = plot_beta_grid(results)
#display(fig)

In [None]:
import numpy as np
import pandas as pd
from itertools import product
from typing import Callable

from filters.kalman import KalmanEngine, KalmanSpec
from viz.kalman_viz import summarize_model_diagnostics

# --- Default Q/R/Init Setter Registries ---
DEFAULT_Q_SETTERS = {
    "const": lambda spec, df, y, X: spec.set_Q_from_mle(df, y, X),
    "resid_var": lambda spec, df, y, X: spec.set_Q_from_rolling_residual_vol(df, y, X),
    "beta_var": lambda spec, df, y, X: spec.set_Q_from_rolling_beta_var(df, y, X)
}

DEFAULT_R_SETTERS = {
    "ols": lambda spec, df, y, X: spec.set_R_from_ols(df[X], df[y]),
    "rolling_vol": lambda spec, df, y, X: spec.set_R_from_rolling_factor_vols(df[X]),
    "mle": lambda spec, df, y, X: spec.set_R_from_mle(df, y, X)
}

DEFAULT_INIT_SETTERS = {
    "ols": lambda spec, df, y, X: spec.set_initial_state_from_ols(df[X], df[y])
}


def run_kalman_grid_search(
    df: pd.DataFrame,
    target_col: str,
    factor_cols: list[str],
    base_spec: KalmanSpec,
    q_setters: dict[str, Callable] = None,
    r_setters: dict[str, Callable] = None,
    init_setters: dict[str, Callable] = None,
    burn: int = 0
) -> pd.DataFrame:
    """
    Run every combination of (Q, R, init) setters on a KalmanSpec,
    and return a summary of model diagnostics.
    """
    if q_setters is None:
        q_setters = DEFAULT_Q_SETTERS
    if r_setters is None:
        r_setters = DEFAULT_R_SETTERS
    if init_setters is None:
        init_setters = DEFAULT_INIT_SETTERS

    records = []

    for q_key, r_key, init_key in product(q_setters, r_setters, init_setters):
        spec = base_spec.copy()
        spec = init_setters[init_key](spec, df, target_col, factor_cols)
        spec = q_setters[q_key](spec, df, target_col, factor_cols)
        spec = r_setters[r_key](spec, df, target_col, factor_cols)

        model_name = f"Q={q_key}, R={r_key}, Init={init_key}"
        spec.name = model_name

        engine = KalmanEngine(spec)
        try:
            results = engine.run(df, target_col, factor_cols, burn=burn)
            summary = summarize_model_diagnostics(results).iloc[0].to_dict()
            summary["Q Mode"] = spec.meta.get("Q_mode", q_key)
            summary["R Mode"] = spec.meta.get("R_mode", r_key)
            summary["Init Mode"] = init_key
            
            records.append(summary)
        except Exception as e:
            records.append({
                "Model Name": model_name,
                "Target": target_col,
                "Error": str(e),
                "Q Mode": q_key,
                "R Mode": r_key,
                "Init Mode": init_key
            })
    out = pd.DataFrame(records)
    out = out.sort_values(by='Mean RMSE', ascending=True)
    return pd.DataFrame(records)


In [None]:
results['meta']

In [None]:
grid_results = run_kalman_grid_search(
    df=data_weekly,
    target_col=target_col,
    factor_cols=factor_cols,
    base_spec=KalmanSpec(K=2),
    burn=24
)


In [None]:
grid_results.style.bar(subset=['Mean RMSE']).bar('Cumulative Log-Likelihood').format(precision=4)

In [None]:
spec.describe(target_col=target_col, factor_cols=factor_cols)