In [5]:
import numpy as np
import pandas as pd
import os
from scipy.io import savemat

# ============================================================
# Synthetic Risk Model Data Generator (Python)
# ------------------------------------------------------------
# Creates realistic synthetic returns from a TRUE factor model,
# then builds two competing factor-risk models:
#   - Model A: well-specified (close to true factor structure)
#   - Model B: misaligned (perturbed/missing factors)
#
# Output files:
#   ./data/returns.mat
#   ./shared/{alpha_vector, constraints, horizon_map}.mat
#   ./models/modelA/*  (good model)
#   ./models/modelB/*  (bad model)
# ============================================================

np.random.seed(42)

BASE = os.path.expanduser("~/synthetic_risk_output")  # <–– writable location
os.makedirs(os.path.join(BASE, "data"),   exist_ok=True)
os.makedirs(os.path.join(BASE, "shared"), exist_ok=True)
os.makedirs(os.path.join(BASE, "models"), exist_ok=True)

# Parameters 
nAssets = 50
nDays = 252 * 5           # 5 years daily
nFactors = 6
lookback = 60             # rolling window
horizons = np.array([1, 5, 21])
hlabels = ['h1d', 'h1w', 'h1m']

dates = pd.date_range(start="2020-01-01", periods=nDays, freq="B")  # business days
assetIDs = np.array([f"Asset{i+1}" for i in range(nAssets)], dtype=object)

# True underlying factor model 
true_X = np.random.randn(nAssets, nFactors)
true_X = (true_X - true_X.mean()) / true_X.std()  # normalize exposures

# True factor covariance Σ_F (some correlation structure)
Sigma_F_true = 0.0001 * (0.7 * np.eye(nFactors) + 0.3 * np.ones((nFactors, nFactors)))

# True specific variances Δ (idiosyncratic risk)
Delta_diag = 0.0002 * (0.5 + np.random.rand(nAssets))
Delta_true = np.diag(Delta_diag)

# Simulate factor returns ~ N(0, Σ_F_true)
factorRets_true = np.random.multivariate_normal(mean=np.zeros(nFactors), cov=Sigma_F_true, size=nDays)

# Simulate idiosyncratic noise ~ N(0, Δ_true)
residuals_true = np.random.multivariate_normal(mean=np.zeros(nAssets), cov=Delta_true, size=nDays)

# Generate TRUE returns: R_t = true_X * f_t + eps_t
returns = factorRets_true @ true_X.T + residuals_true

# choose a writable base directory (e.g., Desktop or current dir)
BASE = os.path.expanduser("~/synthetic_risk_output")   # or "./synthetic_output"
os.makedirs(BASE, exist_ok=True)


savemat(os.path.join(BASE, "data", "returns.mat"), {
    "dates": dates.astype("O"),
    "assetIDs": assetIDs,
    "returns": returns
})



os.makedirs("shared", exist_ok=True)

alpha = 0.05 * np.random.randn(nAssets, 1)  # toy alpha (can align with a factor)
savemat("shared/alpha_vector.mat", {"alpha": alpha})
savemat(os.path.join(BASE, "shared", "alpha_vector.mat"), {"alpha": alpha})

constraints = {
    "Rebalance": "Monthly",
    "MaxGrossExposure": 1.0,
    "LongOnly": True,
    "TurnoverLimit": 0.25
}

savemat(os.path.join(BASE, "shared", "constraints.mat"), {"constraints": constraints})


savemat(os.path.join(BASE, "shared", "horizon_map.mat"), {"horizon_map": horizons})


def build_and_save_model(model_name, X_exposures):
    mdlDir = os.path.join(BASE, "models", model_name)
    os.makedirs(mdlDir, exist_ok=True)

    # Normalize exposures for stability
    X = (X_exposures - X_exposures.mean()) / X_exposures.std()

    # Estimate factor returns & residuals via cross-sectional regression
    pinvXX = np.linalg.pinv(X.T @ X) @ X.T
    factorRets_est = returns @ pinvXX.T                 # nDays × nFactors
    residuals_est = returns - (factorRets_est @ X.T)    # nDays × nAssets

    # Rolling window estimates for Omega_t
    nH = len(horizons)
    sigmaStruct = {hl: np.full((nDays, nAssets), np.nan) for hl in hlabels}
    omega = np.full((nDays, nAssets, nAssets), np.nan)

    for t in range(lookback, nDays):
        idx = slice(t - lookback, t)
        F_win = np.cov(factorRets_est[idx, :].T)      # factor cov
        Delta_win = np.var(residuals_est[idx, :], axis=0)  # specific var
        omega_t = X @ F_win @ X.T + np.diag(Delta_win)
        omega[t] = omega_t

        assetVars = np.diag(omega_t)
        for hIdx, h in enumerate(horizons):
            sigmaStruct[hlabels[hIdx]][t, :] = np.sqrt(h * assetVars)

    # Save .mat files for this model
    savemat(os.path.join(mdlDir, "sigma_forecast.mat"), {
        "sigmaStruct": sigmaStruct,
        "horizons": horizons,
        "dates": dates.astype("O")
    })
    savemat(os.path.join(mdlDir, "omega.mat"), {
        "omega": omega,
        "dates": dates.astype("O")
    })
    savemat(os.path.join(mdlDir, "X_F_Delta.mat"), {
        "X": X,
        "F_win": F_win,
        "Delta_win": Delta_win
    })


# Model A
X_A = true_X + 0.05 * np.random.randn(*true_X.shape)
build_and_save_model("modelA", X_A)

# Model B: misaligned 
X_B = true_X[:, :-2] * 0.5 + 0.5 * np.random.randn(nAssets, nFactors)
build_and_save_model("modelB", X_B)

print("Synthetic data and two models generated!")

OSError: [Errno 30] Read-only file system: 'shared'