In [1]:
import numpy as np
import pandas as pd
import os
from datetime import datetime, timedelta

np.random.seed(42)

# 1. Parameter Setting
nAssets = 50
nDays = 252 * 5
nFactors = 6
lookback = 60
horizons = [1, 5, 21]  # 1d/1w/1m
hlabels = ['h1d', 'h1w', 'h1m']

baseDate = datetime(2020, 1, 1)
dates = pd.date_range(baseDate, periods=nDays, freq='D')
assetIDs = [f"Asset{i+1}" for i in range(nAssets)]

# 2. Step 1: Simulate returns
dailySigma = 0.02
returns = dailySigma * np.random.randn(nDays, nAssets)
returns_df = pd.DataFrame(returns, index=dates, columns=assetIDs)
os.makedirs('data', exist_ok=True)
returns_df.to_csv('data/returns.csv')
# Date and AssetID can be stored separately.
pd.DataFrame({'dates': dates}).to_csv('data/dates.csv', index=False)
pd.DataFrame({'assetIDs': assetIDs}).to_csv('data/assetIDs.csv', index=False)

# 3. Step 2: Shared artefacts
os.makedirs('shared', exist_ok=True)
alpha = 0.05 * np.random.randn(nAssets)
pd.Series(alpha, index=assetIDs).to_csv('shared/alpha_vector.csv', header=['alpha'])

constraints = pd.DataFrame({
    'Rebalance': ['Monthly'],
    'MaxGrossExposure': [1.0],
    'LongOnly': [True],
    'TurnoverLimit': [0.25]
})
constraints.to_csv('shared/constraints.csv', index=False)

horizonLabels = pd.DataFrame({'horizonLabels': ['1d', '1w', '1m'], 'horizons': horizons})
horizonLabels.to_csv('shared/horizon_map.csv', index=False)

# 4. Step 3: Build two factor models
for model_name in ['modelA', 'modelB']:
    mdlDir = os.path.join('models', model_name)
    os.makedirs(mdlDir, exist_ok=True)

    # 3a. Exposures
    X = np.random.randn(nAssets, nFactors)
    X = (X - X.mean()) / X.std()
    X_df = pd.DataFrame(X, index=assetIDs, columns=[f"Factor{j+1}" for j in range(nFactors)])
    X_df.to_csv(os.path.join(mdlDir, 'exposures_X.csv'))

    # 3b. Factor returns & residuals
    pinvXX = np.linalg.pinv(X.T @ X) @ X.T
    factorRets = (pinvXX @ returns.T).T  # nDays × nFactors
    factorRets_df = pd.DataFrame(factorRets, index=dates, columns=[f"Factor{j+1}" for j in range(nFactors)])
    factorRets_df.to_csv(os.path.join(mdlDir, 'factor_returns.csv'))

    residuals = returns - (X @ factorRets.T).T  # nDays × nAssets
    residuals_df = pd.DataFrame(residuals, index=dates, columns=assetIDs)
    residuals_df.to_csv(os.path.join(mdlDir, 'residuals.csv'))

    # 3c. Rolling estimates
    nH = len(horizons)
    sigma_dict = {hlabel: [] for hlabel in hlabels}
    F_list, Delta_list = [], []

    omega_dir = os.path.join(mdlDir, 'omega')
    os.makedirs(omega_dir, exist_ok=True)

    for t in range(lookback, nDays):
        idx = np.arange(t-lookback, t)
        F_win = np.cov(factorRets[idx, :], rowvar=False)
        Delta_win = np.var(residuals[idx, :], axis=0)
        F_list.append(F_win.flatten())
        Delta_list.append(Delta_win)

        # asset covariance
        omega_t = X @ F_win @ X.T + np.diag(Delta_win)
        omega_df = pd.DataFrame(omega_t, index=assetIDs, columns=assetIDs)
        omega_df.to_csv(os.path.join(omega_dir, f'omega_{dates[t].strftime("%Y%m%d")}.csv'))

        # volatility forecasts for each horizon (per asset)
        assetVars = np.diag(omega_t)
        for h, hlabel in zip(horizons, hlabels):
            sigmas = np.sqrt(h * assetVars)
            sigma_dict[hlabel].append(sigmas)

    # Save sigma forecasts as DataFrames
    for hlabel in hlabels:
        sigmas_array = np.array(sigma_dict[hlabel])
        idx_dates = dates[lookback:]
        sigma_df = pd.DataFrame(sigmas_array, index=idx_dates, columns=assetIDs)
        sigma_df.to_csv(os.path.join(mdlDir, f'sigma_forecast_{hlabel}.csv'))

    # Save rolling F and Delta as DataFrame
    F_df = pd.DataFrame(F_list, index=dates[lookback:])
    Delta_df = pd.DataFrame(Delta_list, index=dates[lookback:], columns=assetIDs)
    F_df.to_csv(os.path.join(mdlDir, 'F_rolling.csv'))
    Delta_df.to_csv(os.path.join(mdlDir, 'Delta_rolling.csv'))

print("Synthetic data generation complete. All files are saved as .csv.")

Synthetic data generation complete. All files are saved as .csv.


In [2]:
import shutil

shutil.make_archive('synthetic_data', 'zip', '.')

from google.colab import files
files.download('synthetic_data.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>