In [2]:
import pandas as pd
import numpy as np
import os
from typing import Tuple

In [3]:
# portfolio predicted volatility
def portfolio_vol_pred(
    omega_dir: str,
    returns_df: pd.DataFrame,
    w: np.ndarray,
    horizon: int=1
)-> pd.Series:
    """
    Compute predicted portfolio volatility for each date using
    cumulative horizon-day covariance matrices.

    Returns:
        pd.Series: Predicted portfolio volatilities indexed by date.
    """
    omega_files = sorted(
        [f for f in os.listdir(omega_dir) 
         if f.startswith('omega_') and f.endswith('.csv')]
    )
    dates = [pd.to_datetime(f.split('_')[1].split('.')[0], format='%Y%m%d') for f in omega_files]

    portfolio_vol_pred = {}

    for i in range(len(dates)):
        if i+horizon > len(dates):
            continue

        omega_sum = None
        for j in range(horizon):
            omega_path = os.path.join(omega_dir, omega_files[i + j])
            omega_df = pd.read_csv(omega_path, index_col=0)
            omega_df = omega_df.reindex(index=returns_df.columns, columns=returns_df.columns)
            omega_mat = omega_df.values

            if omega_sum is None:
                omega_sum = omega_mat
            else:
                omega_sum += omega_mat

        vol = np.sqrt(w.T @ omega_sum @ w)
        portfolio_vol_pred[dates[i]] = vol
        
    return pd.Series(portfolio_vol_pred).sort_index()

In [4]:
def portfolio_returns(
    returns_df: pd.DataFrame,
    w: np.ndarray
) -> pd.Series:
    portfolio_returns = returns_df.dot(w)
    return portfolio_returns 

In [5]:
def compute_zscore(
    portfolio_returns: pd.Series,
    portfolio_vol_pred: pd.Series,
    horizon: int=1
) -> pd.Series:
    """
    Compute z-score for a given horizon:
    z_t = realized cumulative return over [t, t+horizon-1] / predicted volatility at t-1

    Args:
        portfolio_returns (pd.Series): daily portfolio returns indexed by date.
        portfolio_vol_pred (pd.Series): predicted volatilities indexed by date.
        horizon (int): holding period (days).

    Returns:
        pd.Series: z-score indexed by date corresponding to the start of the holding period.
    """
    cum_returns = portfolio_returns.rolling(window=horizon).sum().shift(-(horizon-1))
    
    # Shift predicted volatility backward by horizon days (use forecast made at t for [t, t+horizon-1])
    vol_shifted = portfolio_vol_pred.shift(horizon)

    # Align dates
    common_dates = cum_returns.index.intersection(vol_shifted.index)
    returns_aligned = cum_returns.loc[common_dates]
    vol_aligned = vol_shifted.loc[common_dates]

    # Compute z-score
    zscore = returns_aligned / vol_aligned

    # Clean z-score
    zscore = zscore.replace([np.inf, -np.inf], np.nan).dropna()

    return zscore

In [6]:
def compute_bias_stat(
    zscore: pd.Series
) -> float:
    return zscore.std()

In [7]:
def compute_q_stat(
    zscore: pd.Series
) -> float:
    Q_t = zscore**2 - np.log(zscore**2)
    Q_stat = Q_t.mean()
    return Q_stat

In [8]:
def compute_stat_pipeline(
    omega_dir: str,
    returns_df: pd.DataFrame,
    w: np.ndarray,
    horizon: int
) -> Tuple[float, float]:
    
    vol_pred = portfolio_vol_pred(omega_dir, returns_df, w, horizon)
    portfolio_return = portfolio_returns(returns_df, w)
    
    zscore = compute_zscore(portfolio_return, vol_pred, horizon)
    
    bias_stat = compute_bias_stat(zscore)
    q_stat = compute_q_stat(zscore)
    return bias_stat, q_stat

In [9]:
# Set parameters
omega_dirs = {
    'modelA': "synthetic_data/models/modelA/omega",
    'modelB': "synthetic_data/models/modelB/omega"
}
returns_df = pd.read_csv('synthetic_data/data/returns.csv', index_col=0, parse_dates=True)
returns_df.sort_index(inplace=True)
horizons = [1, 5, 21, 63]
# euqally weighted portfolio
nAssets = returns_df.shape[1]
w = np.ones(nAssets) / nAssets

In [10]:
results = []

for model_name, omega_dir in omega_dirs.items():
    for horizon in horizons:
        bias_stat, q_stat = compute_stat_pipeline(
            omega_dir,
            returns_df,
            w,
            horizon=horizon
        )
        print(f"Model: {model_name}, Horizon: {horizon} days, Bias Stat: {bias_stat:.4f}, Q Stat: {q_stat:.4f}")
        results.append({
            'Model': model_name,
            'Horizon': horizon,
            'Bias Statistic': bias_stat,
            'Q Statistic': q_stat
        })

Model: modelA, Horizon: 1 days, Bias Stat: 1.0669, Q Stat: 2.3873
Model: modelA, Horizon: 5 days, Bias Stat: 1.0565, Q Stat: 2.3813
Model: modelA, Horizon: 21 days, Bias Stat: 1.1099, Q Stat: 2.2647
Model: modelA, Horizon: 63 days, Bias Stat: 1.0883, Q Stat: 1.9620
Model: modelB, Horizon: 1 days, Bias Stat: 1.0403, Q Stat: 2.3818
Model: modelB, Horizon: 5 days, Bias Stat: 1.0306, Q Stat: 2.3778
Model: modelB, Horizon: 21 days, Bias Stat: 1.0835, Q Stat: 2.2568
Model: modelB, Horizon: 63 days, Bias Stat: 1.0635, Q Stat: 1.9586


In [11]:
results_df = pd.DataFrame(results)
results_df.to_csv('volatility_forecast_bias_q_stats.csv', index=False)
print("Results saved to volatility_forecast_bias_q_stats.csv")

Results saved to volatility_forecast_bias_q_stats.csv
