In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from torch.utils.data import TensorDataset, DataLoader

from volsense_core.data.feature_engineering import build_features
from volsense_core.data.fetch import build_dataset
from volsense_core.evaluation.feature_selection import (compute_feature_correlations, compute_mutual_information, 
perform_recursive_feature_elimination, plot_feature_heatmap, rank_features)

In [2]:
df = build_dataset(tickers=["AAPL", "MSFT", "GOOG", "AMZN"], start="2005-01-01", end="2025-11-01")

üåç Fetching market data: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:02<00:00,  1.50ticker/s]


In [2]:
multi_df = pd.read_csv("../data/processed/volnetx_multi_ticker_data.csv")
multi_df

Unnamed: 0,date,realized_vol_log,realized_vol,ticker,return,vol_vol,return_sharpe_20d,macd_diff,vol_3d,market_stress_1d_lag,...,vol_60d,vol_entropy,skew_5d,vol_kurt_20d,vol_ratio,ewma_vol_10d,vol_stress,rsi_14,vol_skew_20d,abs_return
0,2005-01-25,-0.706510,0.493362,AAPL,0.018231,0.000000,0.000000,0.000000,0.493362,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.999998,0.493362,0.017709,0.000000,0.000000,0.018231
1,2005-01-26,-0.705208,0.494005,AAPL,0.002776,0.000455,0.000000,-0.000986,0.493684,0.017709,...,0.000000,0.000000,0.000000,0.000000,0.999998,0.493479,0.032465,0.000000,0.000000,0.002776
2,2005-01-27,-0.704852,0.494181,AAPL,0.005398,0.000431,0.000000,-0.001383,0.493849,0.032465,...,0.000000,0.000000,0.628032,0.000000,0.999998,0.493607,0.012322,0.000000,0.000000,0.005398
3,2005-01-28,-0.704045,0.494579,AAPL,0.018447,0.000507,0.000000,-0.000721,0.494255,0.012322,...,0.000000,0.000000,-0.049140,0.000000,1.000450,0.493783,0.009239,0.000000,0.000000,0.018447
4,2005-01-31,-0.837282,0.432884,AAPL,0.039470,0.027349,0.000000,0.001080,0.473882,0.009235,...,0.000000,0.000000,0.660010,0.000000,0.983558,0.482711,0.014591,0.000000,0.000000,0.039470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20903,2025-10-27,-2.020124,0.132638,MSFT,0.015107,0.006880,0.185402,0.001274,0.142235,0.008716,...,0.167869,-1315.194138,0.848502,1.632760,0.950492,0.147616,0.010147,60.651309,-0.860415,0.015107
20904,2025-10-28,-1.909825,0.148105,MSFT,0.019849,0.006466,0.237702,0.001929,0.142801,0.010675,...,0.166809,-1320.893085,0.269403,1.735806,0.959574,0.147705,0.010649,58.571901,-0.889229,0.019849
20905,2025-10-29,-1.905825,0.148699,MSFT,-0.000959,0.005909,0.214903,0.000902,0.143147,0.011097,...,0.165532,-1339.605424,0.290157,1.902887,0.966849,0.147886,0.011354,51.533138,-0.936720,0.000959
20906,2025-10-30,-1.632705,0.195399,MSFT,-0.029157,0.016145,0.086663,-0.001599,0.164068,0.011743,...,0.164996,-600.297462,-0.895050,7.665364,1.078512,0.156524,0.029837,47.234082,2.032174,0.029157


In [None]:
# üì¶ VolNetX Training Cell (CPU Test Run on 4-Ticker Set)

from volsense_core.models.volnetx import VolNetXConfig, train_volnetx, evaluate_volnetx
from volsense_core.models.volnetx import build_volnetx_dataset

# Load preprocessed dataset (ensure it has the selected features + realized_vol_log)
df = multi_df

# --- Feature set: Finalized after analysis
# Optimal Feature Set for VolNetX (Input Size = 15)
# Final Feature Set for VolNetX
# Count: 18 Features (plus 'return' and 'realized_vol' base features = 20 total)

EXTRA_FEATURES = [
    # --- Core Trends (Autoregressive) ---
    "vol_20d", "vol_60d", "vol_3d",  # Short, Medium, Long term memory
    
    # --- Volatility Dynamics (2nd Order) ---
    "vol_vol",       # Vol of Vol (Tail risk proxy)
    "vol_entropy",   # Regime stability
    "vol_chg",       # Velocity of vol changes
    "vol_ratio",     # Mean reversion signal
    
    # --- Price & Momentum ---
    "abs_return",    # Magnitude of moves
    "macd_diff",     # Trend shifts
    "rsi_14",        # Overbought/Oversold conditions
    
    # --- Macro & Stress Regimes (Sector Drivers) ---
    "macro_VIX",     # Global fear gauge
    "macro_Oil",     # Energy sector driver
    "macro_BTC",     # Risk-on/Liquidity proxy (Tech corr)
    "macro_Rates",   # Valuation driver (Tech discount rate)
    "market_stress", # Cross-sectional dispersion
    "vol_stress",    # Interaction term (Idiosyncratic vs Market)
    
    # --- Distribution ---
    "skew_scaled_return" # Asymmetry/Crash risk
]

# Note: 'return' and 'realized_vol' are usually base features included 
# automatically by the data loader, but if not, add them to this list.

# --- Config + Train
cfg = VolNetXConfig(
    window=65,
    input_size=len(features),
    horizons=[1, 5, 10],
    device="cpu",
    epochs=2,
    batch_size=64,
    early_stop=True,
    patience=3
)

ticker_to_id, train_loader, val_loader, train_ds, val_ds = build_volnetx_dataset(
    df=df,
    features=features,
    window=cfg.window,
    horizons=cfg.horizons,
    batch_size=cfg.batch_size,
    device=cfg.device
)

In [4]:
model = train_volnetx(cfg, train_loader, val_loader, n_tickers=len(ticker_to_id))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1024x81 and 32x32)

In [None]:
# @title 3. VolNetX Configuration
from volsense_core.forecaster_core import VolSenseForecaster
import torch

# --- 1. Feature Set (18 Extra + 'return' + 'realized_vol') ---
EXTRA_FEATURES = [
    # --- Core Trends ---
    "vol_20d", "vol_60d", "vol_3d",
    # --- Volatility Dynamics ---
    "vol_vol", "vol_entropy", "vol_chg", "vol_ratio",
    # --- Price & Momentum ---
    "abs_return", "macd_diff", "rsi_14",
    # --- Macro & Stress ---
    "macro_VIX", "macro_Oil", "macro_BTC", "macro_Rates",
    "market_stress", "vol_stress",
    # --- Distribution ---
    "skew_scaled_return"
]

# --- 2. Hyperparameters ---
MODEL_VERSION = "v701_volnetx"  # Unique tag for this run
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

TRAIN_CONFIG = {
    "window": 65,                # 3 months context for Transformer
    "horizons": [1, 5, 10],      # Multi-horizon target
    "loss_horizon_weights": [0.5, 0.3, 0.2], # Prioritize 1-day accuracy
    "hidden_dim": 160,           # d_model size
    "num_layers": 3,             # Depth (LSTM + Transformer blocks)
    "epochs": 50,                # Max epochs (early stopping will handle overtraining)
    "batch_size": 128,           # Larger batch for stable gradients
    "lr": 8e-4,                  # Slightly conservative learning rate
    "dropout": 0.2,              # Regularization
    "val_start": "2023-06-01",   # Validation cutoff
    "use_transformer": True,     # Enable VolNetX hybrid mode
    "use_feature_attention": True, # Enable dynamic feature selection
    "global_ckpt_path": f"models/{MODEL_VERSION}" # Save path relative to repo root
}

print(f"‚öôÔ∏è configured VolNetX ({MODEL_VERSION}) on {DEVICE}")
print(f"   Features: {len(EXTRA_FEATURES)} explicit + base features")

In [None]:
# @title 4. Train VolNetX
import time

# Initialize Forecaster with "volnetx" method
forecaster = VolSenseForecaster(
    method="volnetx",
    device=DEVICE,
    extra_features=EXTRA_FEATURES,
    **TRAIN_CONFIG
)

print("üöÄ Starting Training Run...")
start_time = time.time()

# Run Fit (Handles Dataset Build -> Train -> Save internally)
forecaster.fit(df)

end_time = time.time()
print(f"\n‚úÖ Training Complete in {(end_time - start_time)/60:.1f} minutes.")

In [1]:
# @title üß™ Quick Training Test (2 Epochs)
import pandas as pd
import torch
from volsense_core.forecaster_core import VolSenseForecaster

# 1. Load Small Data Subset
DATA_PATH = "../data/processed/master_lstm_dataset_v2.csv"
print(f"üìÇ Loading subset from {DATA_PATH}...")

# Read just 10k rows to be fast
df_test = pd.read_csv(DATA_PATH, parse_dates=["date"]).iloc[-10000:].copy() 
print(f"   Loaded {len(df_test)} rows for testing.")

# 2. Define Minimal Config
TEST_CONFIG = {
    "window": 30,               # Short window for speed
    "horizons": [1, 5],         # Reduced horizons
    "hidden_dim": 32,           # Tiny model
    "num_layers": 1,
    "epochs": 2,                # Just 2 epochs
    "batch_size": 16,
    "lr": 1e-3,
    "val_mode": "causal",
    "val_start": "2024-01-01",  # Ensure this date exists in your subset!
    "loss_horizon_weights": [0.7, 0.3],
    "use_transformer": True,
    "use_feature_attention": True
}

# 3. Run Training
print("\nüöÄ Starting Test Run...")
try:
    forecaster = VolSenseForecaster(
        method="volnetx",
        device="cuda" if torch.cuda.is_available() else "cpu",
        extra_features=["vol_20d", "vol_vol"], # Minimal features
        **TEST_CONFIG
    )
    
    forecaster.fit(df_test)
    print("\n‚úÖ Test Run Complete! Training loop is functional.")
    
except Exception as e:
    print(f"\n‚ùå Test Failed: {e}")

üìÇ Loading subset from ../data/processed/master_lstm_dataset_v2.csv...
   Loaded 10000 rows for testing.

üöÄ Starting Test Run...
üß† Training VolNetX Hybrid Model...
   ‚Ü≥ Building VolNetX dataset (causal mode)...
   ‚öñÔ∏è Fitting new global scaler (Train split only)...
   ‚Ü≥ Starting training loop...




Epoch 1/2 - Train Loss: 0.1195 | Val Loss: 0.1219
Epoch 2/2 - Train Loss: 0.0568 | Val Loss: 0.0913

‚úÖ Test Run Complete! Training loop is functional.
