# Forecasting Pipeline Notebook

This notebook runs the end-to-end forecasting pipeline for the competition.

In [None]:
import sys
import os
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.utils import setup_logging, set_seeds
from src.data import get_cached_data
from src.features import create_features, save_features
from src.train import train_stacked_model, train_volatility_model
from src.backtest import compute_weights, calibrate_k, simulate_portfolio, calculate_sharpe, apply_ema_smoothing
from src.models import save_model

setup_logging()
set_seeds()

In [None]:
# Load data
data_root = '/data'
cache_path = '../cache/processed_data.parquet'
rebuild_cache = False  # Set to True to rebuild

df = get_cached_data(cache_path, rebuild_cache, data_root)
print(f"Data shape: {df.shape}")
df.head()

In [None]:
# Create features
features_path = '../cache/features.parquet'
if rebuild_cache or not os.path.exists(features_path):
    df_feat, norm_params = create_features(df)
    save_features(df_feat, features_path)
else:
    from src.features import load_features
    df_feat = load_features(features_path)

print(f"Features shape: {df_feat.shape}")
df_feat.head()

In [None]:
# Train models
target_col = 'market_forward_excess_returns'
vol_target = 'volatility'
feature_cols = [c for c in df_feat.columns if c not in [target_col, vol_target]]

stacked_model = train_stacked_model(df_feat, target_col, feature_cols)
vol_model = train_volatility_model(df_feat, vol_target, feature_cols)

print("Models trained")

In [None]:
# Predict and compute weights
mu = stacked_model.predict(df_feat[feature_cols])
sigma = vol_model.predict(df_feat[feature_cols])

k = 1.0  # Calibrate as needed
variant = 'mid'  # Choose: 'conservative', 'aggressive', 'mid'
weights = compute_weights(mu, sigma, k, variant, df_feat)

# Apply EMA smoothing
weights = apply_ema_smoothing(weights, alpha=0.1)

print(f"Weights range: {weights.min()} to {weights.max()}")

In [None]:
# Backtest
port_returns = simulate_portfolio(df_feat[target_col], weights)
sharpe = calculate_sharpe(port_returns)

print(f"Sharpe ratio: {sharpe}")

# Plot
plt.figure(figsize=(10, 5))
plt.plot((1 + port_returns).cumprod())
plt.title('Cumulative Portfolio Returns')
plt.show()

In [None]:
# Save submission
submission = pd.DataFrame({'date': df_feat.index, 'weight': weights})
submission_path = '../submissions/submission.csv'
submission.to_csv(submission_path, index=False)
print(f"Submission saved to {submission_path}")

In [None]:
# Profiling
import time
start_time = time.time()
# Assume pipeline runs here
end_time = time.time()
print(f"Total runtime: {end_time - start_time:.2f} seconds")