# CUDA HRP Multi-Variant ML Super Agent

## Key Issues to Address:
1. **Label Leakage**: Features and labels must be properly time-aligned to avoid look-ahead bias
2. **Sharpe Calculation**: Need consistent return frequency for annualization
3. **Label Encoding**: Ensure y_train contains class indices (0-4), not window values
4. **Missing Data**: Handle NaN values in economic features

## Recommended Approach:
- Shift features back by one period (use t-1 features to predict t performance)
- Standardize feature scaling
- Add proper handling for missing economic data

In [None]:
# Cell 0: Install Required Packages (Run if not already installed)

# Note: ta (not ta-lib) is easier to install on Windows
!pip install cupy-cuda12x scikit-learn scipy tqdm pandas numpy matplotlib xgboost ta joblib

Mode: CPU
Output Directory: Super_Agent_Output


In [None]:
# Cell 1: Imports and Setup

import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.decomposition import PCA
import ta  # For technical indicators like MACD, RSI
import random
import joblib
import sys

# Add the path to the utils directory
sys.path.append(r'C:/Users/lucas/OneDrive/Bureau/HRP/utils')

# Import HRP functions from external script
from hrp_functions import compute_hrp_weights

# GPU Check (from hrp_functions, but redefine if needed)
try:
    import cupy as cp
    GPU_AVAILABLE = True
except ImportError:
    GPU_AVAILABLE = False

data_path = r'C:/Users/lucas/OneDrive/Bureau/HRP/DATA (CRSP)/PREPROCESSED DATA/ADA-HRP-Preprocessed-DATA.csv'
super_agent_dir = r'C:/Users/lucas/OneDrive/Bureau/HRP/Super_Agent_Output'
prep_path = r'C:/Users/lucas/OneDrive/Bureau/HRP/DATA (CRSP)/PREPROCESSED DATA'
os.makedirs(super_agent_dir, exist_ok=True)

windows = [1, 3, 6, 12, 24, 36]  # Months - ADDED 3 MONTH VARIANT
rebalance_freq = '3M'
min_stocks = 20
market_index = '84398.0'  # PERMNO for SPY as market proxy

# Removed USEPUINDXD as it's not in preprocessed data
series_list = ['CPIAUCSL', 'GDPC1', 'M2SL', 'PPIACO', 'T10Y2Y', 'UNRATE', 'VIXCLS']

print(f"Mode: {'GPU (CUDA)' if GPU_AVAILABLE else 'CPU'}")
print(f"Output Directory: {super_agent_dir}")

Mode: CPU
Output Directory: C:/Users/lucas/OneDrive/Bureau/HRP/Super_Agent_Output


In [None]:
# Cell 2: Load Data and Prepare Dates

df = pd.read_csv(data_path)
print(f"Loaded data: {df.shape[0]} rows, {df.shape[1]} columns")

date_cols = [col for col in df.columns if col not in ['PERMNO', 'Company_Ticker']]
parsed_dates = pd.to_datetime([col.replace('_', ':') for col in date_cols], errors='coerce')
sort_order = np.argsort(parsed_dates)
date_cols = [date_cols[i] for i in sort_order]
dates = parsed_dates[sort_order]

for col in date_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

stocks_df = df[df['PERMNO'].notna()].copy()
print(f"Stocks: {len(stocks_df)} securities")

def get_quarterly_dates(dates):
    df_dates = pd.DataFrame({'date': dates})
    df_dates['year'] = df_dates['date'].dt.year
    df_dates['quarter'] = df_dates['date'].dt.quarter
    quarterly_ends = df_dates.groupby(['year', 'quarter'])['date'].max()
    return quarterly_ends.tolist()

quarterly_rebalance_dates = get_quarterly_dates(dates)
print(f"Quarterly rebalance dates: {len(quarterly_rebalance_dates)}")

# Global returns matrix (rows: dates, columns: PERMNO)
returns_all = stocks_df[date_cols].T
returns_all.columns = stocks_df['PERMNO'].astype(str)
returns_all.index = dates

# SAFETY CHECK: Verify market index exists
if market_index not in returns_all.columns:
    print(f"WARNING: Market index {market_index} not found!")
    print(f"Available PERMNOs: {sorted(returns_all.columns)[:10]}...")
    raise ValueError(f"Market index PERMNO {market_index} not in data. Please verify.")
else:
    print(f"‚úì Market index {market_index} found in data")

# Filter valid rebal dates with enough history
max_win = max(windows)
valid_rebal_dates = [d for d in quarterly_rebalance_dates if (d - dates.min()).days / 30 >= max_win]
print(f"Valid rebal dates for all windows: {len(valid_rebal_dates)}")

# Load economic data
econ_dict = {}
for series in series_list:
    file = os.path.join(prep_path, f'{series}_quarterly.csv')
    if os.path.exists(file):
        df_econ = pd.read_csv(file, index_col='DATE', parse_dates=True)
        econ_dict[series] = df_econ[series]
        print(f"‚úì Loaded {series}: {len(df_econ)} observations")
    else:
        print(f"‚úó Missing: {series}")
        
print(f"\nLoaded {len(econ_dict)}/{len(series_list)} economic series")

Loaded data: 7670 rows, 1190 columns
Stocks: 7668 securities
Quarterly rebalance dates: 396
Valid rebal dates for all windows: 384


In [None]:
# Cell 3: Define Feature Extraction Function

def extract_features(returns_all, rebal_date, econ_dict=None, market_col=market_index, roll_days_vol=20, roll_days_corr=63):
    market_returns = returns_all[market_col].loc[:rebal_date].dropna()
    if len(market_returns) < roll_days_vol:
        return pd.Series()  # Empty if insufficient data
    
    # Volatility
    vol = market_returns.tail(roll_days_vol).std()
    
    # MACD
    macd = ta.trend.MACD(market_returns)
    macd_val = macd.macd().iloc[-1]
    
    # Mean correlation (on all assets, last 3 months)
    recent_returns = returns_all.loc[:rebal_date].tail(roll_days_corr).dropna(axis=1, how='all')
    if recent_returns.shape[1] > 1:
        corr_matrix = recent_returns.corr()
        corr_mean = corr_matrix.mean().mean()
    else:
        corr_mean = np.nan
    
    # RSI
    rsi = ta.momentum.RSIIndicator(market_returns).rsi().iloc[-1]
    
    # Max Drawdown (last quarter)
    cum_ret = (1 + market_returns.tail(63)).cumprod()
    dd = (cum_ret.cummax() - cum_ret).max() / cum_ret.cummax().iloc[-1]
    
    # Lag return (last quarter)
    lag_ret = market_returns.tail(63).sum()
    
    row_features = pd.Series({
        'vol': vol,
        'macd': macd_val,
        'corr_mean': corr_mean,
        'rsi': rsi,
        'drawdown': dd,
        'lag_return': lag_ret
    })
    
    # Add economic features with forward-fill for missing values
    if econ_dict:
        for name, series in econ_dict.items():
            prev_data = series[series.index <= rebal_date]
            if not prev_data.empty:
                row_features[name] = prev_data.iloc[-1]
            else:
                # Use first available value if no data before rebal_date
                if not series.empty:
                    row_features[name] = series.iloc[0]
                else:
                    row_features[name] = 0.0  # Default to 0 instead of NaN
    
    return row_features

In [None]:
# Cell 4: Precompute Variant Performances and Features (FIXED: No label leakage)

sharpes = pd.DataFrame(index=valid_rebal_dates, columns=windows)
past_sharpes = pd.DataFrame(index=valid_rebal_dates, columns=windows)
features_df = pd.DataFrame(index=valid_rebal_dates)

prev_rebal = None
prev_weights = {}

# Checkpoint: Check if we can resume from saved state
checkpoint_file = os.path.join(super_agent_dir, 'cell4_checkpoint.pkl')
if os.path.exists(checkpoint_file):
    print(f"Found checkpoint file. Loading previous progress...")
    checkpoint = joblib.load(checkpoint_file)
    sharpes = checkpoint['sharpes']
    past_sharpes = checkpoint['past_sharpes']
    features_df = checkpoint['features_df']
    prev_weights = checkpoint['prev_weights']
    prev_rebal = checkpoint['prev_rebal']
    start_idx = checkpoint['last_completed_idx'] + 1
    print(f"Resuming from rebalance {start_idx}/{len(valid_rebal_dates)}")
else:
    start_idx = 0
    print("Starting from scratch...")

for idx, rebal_date in enumerate(tqdm(valid_rebal_dates[start_idx:], desc="Processing rebalances", initial=start_idx, total=len(valid_rebal_dates))):
    current_weights = {}
    
    # Compute lagged Sharpes if previous rebalance exists
    if prev_rebal is not None:
        past_start = prev_rebal + pd.DateOffset(days=1)
        past_end = rebal_date
        past_returns_df = returns_all.loc[past_start:past_end]
        for win in windows:
            if win in prev_weights:
                weights_prev = prev_weights[win]
                port_past = past_returns_df.reindex(columns=weights_prev.index, fill_value=0).dot(weights_prev)
                if len(port_past) > 1 and port_past.std() > 0:
                    # Annualized Sharpe (assuming daily returns)
                    sharpe_past = (port_past.mean() / port_past.std()) * np.sqrt(252)
                    past_sharpes.loc[rebal_date, win] = sharpe_past
                else:
                    past_sharpes.loc[rebal_date, win] = np.nan
    
    # Extract features including economic data
    row_features = extract_features(returns_all, rebal_date, econ_dict=econ_dict)
    
    # Add lagged Sharpes as features
    lagged_sharpes = past_sharpes.loc[rebal_date].rename(lambda w: f'lagged_sharpe_{w}')
    row_features = pd.concat([row_features, lagged_sharpes])
    
    features_df.loc[rebal_date] = row_features
    
    # Compute forward Sharpes for labels
    for win in windows:
        start = rebal_date - pd.DateOffset(months=win)
        window_returns = returns_all.loc[start:rebal_date].dropna(axis=1, how='all')
        if window_returns.shape[1] < min_stocks:
            continue
        
        weights = compute_hrp_weights(window_returns)
        current_weights[win] = weights
        
        next_start = rebal_date + pd.DateOffset(days=1)
        next_end = rebal_date + pd.DateOffset(months=3)
        next_returns_df = returns_all.loc[next_start:next_end].reindex(columns=weights.index, fill_value=0)
        port_returns = next_returns_df.dot(weights)
        
        if len(port_returns) > 1 and port_returns.std() > 0:
            sharpe = (port_returns.mean() / port_returns.std()) * np.sqrt(252)
        else:
            sharpe = np.nan
        
        sharpes.loc[rebal_date, win] = sharpe
    
    prev_weights = current_weights
    prev_rebal = rebal_date
    
    # Save checkpoint every 10 iterations
    if (start_idx + idx) % 10 == 0:
        checkpoint = {
            'sharpes': sharpes,
            'past_sharpes': past_sharpes,
            'features_df': features_df,
            'prev_weights': prev_weights,
            'prev_rebal': prev_rebal,
            'last_completed_idx': start_idx + idx
        }
        joblib.dump(checkpoint, checkpoint_file)

# Labels: best window per date (these are the FORWARD Sharpes)
labels_raw = sharpes.idxmax(axis=1)

# FIX LABEL LEAKAGE: Shift labels forward by 1 period
# Features at time t should predict the best window for period t+1
labels = labels_raw.shift(-1).dropna()

# Also shift sharpes to align
sharpes_aligned = sharpes.shift(-1).dropna()

print(f"Total samples after alignment: {len(labels)}")
print(f"Label distribution:\n{labels.value_counts().sort_index()}")

# SAVE FINAL RESULTS
print("\nSaving Cell 4 outputs...")
sharpes.to_csv(os.path.join(super_agent_dir, 'forward_sharpes_by_window.csv'))
past_sharpes.to_csv(os.path.join(super_agent_dir, 'lagged_sharpes_by_window.csv'))
features_df.to_csv(os.path.join(super_agent_dir, 'features_all_rebalances.csv'))
labels.to_csv(os.path.join(super_agent_dir, 'labels_best_window.csv'))
joblib.dump(prev_weights, os.path.join(super_agent_dir, 'final_weights_dict.pkl'))

# Clean up checkpoint
if os.path.exists(checkpoint_file):
    os.remove(checkpoint_file)
    
print("‚úì All Cell 4 outputs saved successfully")

Processing rebalances:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 262/384 [00:07<00:03, 34.44it/s] 


KeyboardInterrupt: 

In [None]:
# Cell 5: Train Classifier (FIXED: No look-ahead bias in scaling)

# Combine features and labels (now properly time-aligned)
data = pd.concat([features_df, labels.rename('label')], axis=1).dropna()

# FIX LABEL ENCODING: Convert window values to class indices
window_to_class = {win: idx for idx, win in enumerate(windows)}
class_to_window = {idx: win for idx, win in enumerate(windows)}
data['label'] = data['label'].map(window_to_class)

print(f"Label encoding: {window_to_class}")
print(f"Class distribution:\n{data['label'].value_counts().sort_index()}")

# Train/test split
train_end_idx = int(len(data) * 0.8)
train_end = data.index[train_end_idx]
train = data.loc[:train_end]
test = data.loc[train_end:]

X_train = train.drop('label', axis=1)
y_train = train['label']
X_test = test.drop('label', axis=1)
y_test = test['label']

# FIX LOOK-AHEAD BIAS: Use expanding window for scaling
# Fit scaler only on initial training data, then update during backtest
initial_scaler = StandardScaler()
X_train_scaled = pd.DataFrame(
    initial_scaler.fit_transform(X_train),
    index=X_train.index,
    columns=X_train.columns
)

# For test set, we'll scale dynamically during backtest (see Cell 6)
# Here we just create a placeholder using the initial scaler for grid search
X_test_scaled = pd.DataFrame(
    initial_scaler.transform(X_test),
    index=X_test.index,
    columns=X_test.columns
)

# Save initial scaler
joblib.dump(initial_scaler, os.path.join(super_agent_dir, 'initial_scaler.pkl'))

# Train model on training data
model = xgb.XGBClassifier(
    n_estimators=200,
    tree_method='gpu_hist' if GPU_AVAILABLE else 'hist',
    random_state=42
)
params = {'max_depth': [3, 5, 7], 'learning_rate': [0.01, 0.1, 0.3]}
cv = TimeSeriesSplit(n_splits=5)
grid = GridSearchCV(model, params, cv=cv, scoring='accuracy')
grid.fit(X_train_scaled, y_train)
best_model = grid.best_estimator_

# Save model and mapping
joblib.dump(best_model, os.path.join(super_agent_dir, 'super_agent_model.pkl'))
joblib.dump(class_to_window, os.path.join(super_agent_dir, 'class_to_window.pkl'))

print(f"Best params: {grid.best_params_}")
print(f"Train score: {best_model.score(X_train_scaled, y_train):.2f}")
print(f"Test score (with initial scaler): {best_model.score(X_test_scaled, y_test):.2f}")
print("\nNote: Test score here uses initial scaler. Backtest will use expanding window scaling.")

# SAVE TRAINING DATA AND METADATA
print("\nSaving Cell 5 outputs...")
X_train.to_csv(os.path.join(super_agent_dir, 'X_train.csv'))
y_train.to_csv(os.path.join(super_agent_dir, 'y_train.csv'))
X_test.to_csv(os.path.join(super_agent_dir, 'X_test.csv'))
y_test.to_csv(os.path.join(super_agent_dir, 'y_test.csv'))

# Save grid search results
cv_results_df = pd.DataFrame(grid.cv_results_)
cv_results_df.to_csv(os.path.join(super_agent_dir, 'grid_search_cv_results.csv'), index=False)

# Save metadata
metadata = {
    'windows': windows,
    'window_to_class': window_to_class,
    'class_to_window': class_to_window,
    'best_params': grid.best_params_,
    'train_end_date': train_end,
    'test_start_date': test.index[0],
    'n_train_samples': len(train),
    'n_test_samples': len(test),
    'n_features': X_train.shape[1],
    'feature_names': list(X_train.columns)
}
joblib.dump(metadata, os.path.join(super_agent_dir, 'training_metadata.pkl'))

print("‚úì All Cell 5 outputs saved successfully")

Labeled 0 dates


In [None]:
# Cell 6: Backtest Super Agent (FIXED: True expanding window - strictly past data only)

super_returns = []
baseline_returns = []
super_dates = []
baseline_dates = []
predicted_windows_log = []  # Track which window was selected at each date

win_fixed = 12

# Combine train and test features for expanding window scaling
all_features = pd.concat([X_train, X_test])

print("Starting backtest with expanding window scaling (strictly past data)...")
for idx, t in enumerate(tqdm(test.index, desc="Backtesting")):
    # CRITICAL FIX: Only use data BEFORE current time t (not including t)
    # This ensures zero look-ahead bias
    historical_data = all_features[all_features.index < t]
    
    # If we don't have enough historical data, skip
    if len(historical_data) < 10:
        continue
    
    # Fit scaler on historical data only (strictly past)
    scaler_t = StandardScaler()
    scaler_t.fit(historical_data)
    
    # Scale only the current observation
    features_t_scaled = scaler_t.transform(X_test.loc[[t]])
    
    # Make prediction with scaled features
    pred_class = best_model.predict(features_t_scaled)[0]
    win_pred = class_to_window[pred_class]
    
    # Log the prediction
    predicted_windows_log.append({'date': t, 'predicted_window': win_pred})
    
    # Super agent
    start = t - pd.DateOffset(months=win_pred)
    window_returns = returns_all.loc[start:t].dropna(axis=1, how='all')
    if window_returns.shape[1] >= min_stocks:
        weights = compute_hrp_weights(window_returns)
        next_start = t + pd.DateOffset(days=1)
        next_end = t + pd.DateOffset(months=3)
        next_returns_df = returns_all.loc[next_start:next_end].reindex(columns=weights.index, fill_value=0)
        
        # Calculate daily portfolio returns
        daily_port_returns = next_returns_df.dot(weights)
        
        # Store mean daily return for this period
        for date, ret in daily_port_returns.items():
            super_returns.append(ret)
            super_dates.append(date)
    
    # Baseline (fixed 12m)
    start_fixed = t - pd.DateOffset(months=win_fixed)
    window_fixed = returns_all.loc[start_fixed:t].dropna(axis=1, how='all')
    if window_fixed.shape[1] >= min_stocks:
        weights_fixed = compute_hrp_weights(window_fixed)
        next_start = t + pd.DateOffset(days=1)
        next_end = t + pd.DateOffset(months=3)
        next_returns_fixed = returns_all.loc[next_start:next_end].reindex(columns=weights_fixed.index, fill_value=0)
        
        # Calculate daily portfolio returns
        daily_port_returns_fixed = next_returns_fixed.dot(weights_fixed)
        
        for date, ret in daily_port_returns_fixed.items():
            baseline_returns.append(ret)
            baseline_dates.append(date)

# Convert to Series with proper dates
super_returns = pd.Series(super_returns, index=super_dates).sort_index()
baseline_returns = pd.Series(baseline_returns, index=baseline_dates).sort_index()

# Remove duplicates (if any overlap in prediction periods)
super_returns = super_returns[~super_returns.index.duplicated(keep='first')]
baseline_returns = baseline_returns[~baseline_returns.index.duplicated(keep='first')]

# Cumulative returns
cum_super = (1 + super_returns).cumprod()
cum_baseline = (1 + baseline_returns).cumprod()

# Plot
plt.figure(figsize=(12, 6))
plt.plot(cum_super, label='Super Agent (No Look-Ahead)', linewidth=2)
plt.plot(cum_baseline, label='Baseline HRP 12m', linewidth=2)
plt.title('Backtest Equity Curves (Out-of-Sample, Zero Look-Ahead Bias)', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(super_agent_dir, 'backtest_equity_curves.png'), dpi=300, bbox_inches='tight')
plt.show()

# Compute annualized Sharpe ratio from daily returns
def compute_sharpe(returns):
    """Compute annualized Sharpe ratio from daily returns"""
    if returns.std() == 0 or len(returns) == 0:
        return np.nan
    return (returns.mean() / returns.std()) * np.sqrt(252)  # 252 trading days

sharpe_super = compute_sharpe(super_returns)
sharpe_baseline = compute_sharpe(baseline_returns)

# Additional metrics
total_return_super = cum_super.iloc[-1] - 1
total_return_baseline = cum_baseline.iloc[-1] - 1
max_dd_super = (cum_super.cummax() - cum_super).max() / cum_super.cummax().max()
max_dd_baseline = (cum_baseline.cummax() - cum_baseline).max() / cum_baseline.cummax().max()

# Compute additional statistics
def compute_metrics(returns, name):
    """Compute comprehensive performance metrics"""
    cum = (1 + returns).cumprod()
    metrics = {
        'Strategy': name,
        'Total Return': cum.iloc[-1] - 1,
        'Annualized Return': (cum.iloc[-1] ** (252 / len(returns))) - 1,
        'Sharpe Ratio': (returns.mean() / returns.std()) * np.sqrt(252) if returns.std() > 0 else 0,
        'Volatility (Ann.)': returns.std() * np.sqrt(252),
        'Max Drawdown': (cum.cummax() - cum).max() / cum.cummax().max(),
        'Calmar Ratio': ((cum.iloc[-1] ** (252 / len(returns))) - 1) / ((cum.cummax() - cum).max() / cum.cummax().max()) if (cum.cummax() - cum).max() > 0 else 0,
        'Win Rate': (returns > 0).sum() / len(returns),
        'Avg Win': returns[returns > 0].mean() if (returns > 0).any() else 0,
        'Avg Loss': returns[returns < 0].mean() if (returns < 0).any() else 0,
        'Best Day': returns.max(),
        'Worst Day': returns.min(),
        'Total Days': len(returns)
    }
    return metrics

metrics_super = compute_metrics(super_returns, 'Super Agent')
metrics_baseline = compute_metrics(baseline_returns, 'Baseline HRP 12m')

metrics_df = pd.DataFrame([metrics_super, metrics_baseline]).set_index('Strategy')

print("\n" + "="*70)
print("BACKTEST RESULTS (Out-of-Sample, Zero Look-Ahead Bias)")
print("="*70)
print(metrics_df.to_string())
print("="*70)

# SAVE BACKTEST RESULTS
print("\nSaving Cell 6 outputs...")
super_returns.to_csv(os.path.join(super_agent_dir, 'super_agent_returns.csv'))
baseline_returns.to_csv(os.path.join(super_agent_dir, 'baseline_returns.csv'))
cum_super.to_csv(os.path.join(super_agent_dir, 'super_agent_cumulative.csv'))
cum_baseline.to_csv(os.path.join(super_agent_dir, 'baseline_cumulative.csv'))
metrics_df.to_csv(os.path.join(super_agent_dir, 'performance_metrics.csv'))

# Save predicted windows log
predicted_windows_df = pd.DataFrame(predicted_windows_log)
predicted_windows_df.to_csv(os.path.join(super_agent_dir, 'predicted_windows_log.csv'), index=False)

print("‚úì All Cell 6 outputs saved successfully")

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
# Cell 7: Validation and Iteration

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# For validation, we need to recompute predictions with expanding window scaling
print("Computing predictions with expanding window scaling (strictly past data)...")
all_features = pd.concat([X_train, X_test])
preds = []

for t in tqdm(test.index, desc="Scaling and predicting"):
    # CRITICAL FIX: Only use data BEFORE time t (not including t)
    historical_data = all_features[all_features.index < t]
    
    if len(historical_data) < 10:
        # If not enough history, use a simple prediction (e.g., mode)
        preds.append(y_train.mode()[0] if len(y_train.mode()) > 0 else 0)
        continue
    
    scaler_t = StandardScaler()
    scaler_t.fit(historical_data)
    features_t_scaled = scaler_t.transform(X_test.loc[[t]])
    pred = best_model.predict(features_t_scaled)[0]
    preds.append(pred)

preds = np.array(preds)
accuracy = accuracy_score(y_test, preds)
print(f"\nOut-of-sample accuracy (zero look-ahead bias): {accuracy:.2%}")

# Confusion matrix
cm = confusion_matrix(y_test, preds)
print("\nConfusion Matrix:")
print("Rows: Actual | Columns: Predicted")
print(f"Windows: {windows}")
print(cm)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
import seaborn as sns
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=[f'{w}m' for w in windows],
            yticklabels=[f'{w}m' for w in windows])
plt.title('Confusion Matrix - Window Selection')
plt.ylabel('Actual Best Window')
plt.xlabel('Predicted Window')
plt.tight_layout()
plt.savefig(os.path.join(super_agent_dir, 'confusion_matrix.png'), dpi=300, bbox_inches='tight')
plt.show()

# Detailed classification report
print("\nClassification Report:")
target_names = [f"{w}m" for w in windows]
class_report = classification_report(y_test, preds, target_names=target_names, zero_division=0, output_dict=True)
print(classification_report(y_test, preds, target_names=target_names, zero_division=0))

# Feature importance (for XGBoost)
if hasattr(best_model, 'feature_importances_'):
    importances = pd.Series(best_model.feature_importances_, index=X_train.columns)
    top_features = importances.sort_values(ascending=False).head(15)
    
    plt.figure(figsize=(10, 6))
    top_features.plot(kind='barh')
    plt.title('Top 15 Feature Importances')
    plt.xlabel('Importance')
    plt.tight_layout()
    plt.savefig(os.path.join(super_agent_dir, 'feature_importances.png'), dpi=300, bbox_inches='tight')
    plt.show()
    
    print("\nTop 10 Most Important Features:")
    print(top_features.head(10))
    
    # Save all feature importances
    importances.sort_values(ascending=False).to_csv(os.path.join(super_agent_dir, 'all_feature_importances.csv'))

# Window selection distribution
pred_windows = pd.Series([class_to_window[p] for p in preds])
actual_windows = pd.Series([class_to_window[a] for a in y_test])

print(f"\nPredicted Window Distribution:")
print(pred_windows.value_counts().sort_index())
print(f"\nActual Best Window Distribution:")
print(actual_windows.value_counts().sort_index())

# Plot window distributions
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

actual_windows.value_counts().sort_index().plot(kind='bar', ax=axes[0], color='steelblue')
axes[0].set_title('Actual Best Windows (Test Set)')
axes[0].set_xlabel('Window (months)')
axes[0].set_ylabel('Frequency')
axes[0].grid(True, alpha=0.3)

pred_windows.value_counts().sort_index().plot(kind='bar', ax=axes[1], color='coral')
axes[1].set_title('Predicted Windows (Test Set)')
axes[1].set_xlabel('Window (months)')
axes[1].set_ylabel('Frequency')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(super_agent_dir, 'window_distributions.png'), dpi=300, bbox_inches='tight')
plt.show()

# SAVE VALIDATION RESULTS
print("\nSaving Cell 7 outputs...")
pd.DataFrame({'actual': y_test, 'predicted': preds}, index=y_test.index).to_csv(
    os.path.join(super_agent_dir, 'predictions_vs_actual.csv'))

pd.DataFrame(cm, 
             index=[f'{w}m_actual' for w in windows],
             columns=[f'{w}m_pred' for w in windows]).to_csv(
    os.path.join(super_agent_dir, 'confusion_matrix.csv'))

pd.DataFrame(class_report).T.to_csv(os.path.join(super_agent_dir, 'classification_report.csv'))

print("‚úì All Cell 7 outputs saved successfully")

In [None]:
# Cell 8: Generate Final Summary Report

import json
from datetime import datetime

print("="*70)
print("GENERATING FINAL SUMMARY REPORT")
print("="*70)

# Create comprehensive summary
summary_report = {
    'execution_info': {
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'gpu_used': GPU_AVAILABLE,
        'total_rebalance_dates': len(valid_rebal_dates),
        'train_test_split': '80/20'
    },
    'model_config': {
        'windows': windows,
        'model_type': 'XGBoost Classifier',
        'best_hyperparameters': metadata['best_params'],
        'n_features': metadata['n_features'],
        'feature_names': metadata['feature_names']
    },
    'training_performance': {
        'train_accuracy': float(best_model.score(X_train_scaled, y_train)),
        'n_train_samples': metadata['n_train_samples'],
        'train_period': f"{metadata['train_end_date'].strftime('%Y-%m-%d')}"
    },
    'test_performance': {
        'test_accuracy': float(accuracy),
        'n_test_samples': metadata['n_test_samples'],
        'test_period': f"{metadata['test_start_date'].strftime('%Y-%m-%d')} onwards"
    },
    'backtest_results': {
        'super_agent': {
            'sharpe_ratio': float(metrics_super['Sharpe Ratio']),
            'total_return': float(metrics_super['Total Return']),
            'annualized_return': float(metrics_super['Annualized Return']),
            'max_drawdown': float(metrics_super['Max Drawdown']),
            'volatility': float(metrics_super['Volatility (Ann.)']),
            'calmar_ratio': float(metrics_super['Calmar Ratio']),
            'win_rate': float(metrics_super['Win Rate'])
        },
        'baseline_12m': {
            'sharpe_ratio': float(metrics_baseline['Sharpe Ratio']),
            'total_return': float(metrics_baseline['Total Return']),
            'annualized_return': float(metrics_baseline['Annualized Return']),
            'max_drawdown': float(metrics_baseline['Max Drawdown']),
            'volatility': float(metrics_baseline['Volatility (Ann.)']),
            'calmar_ratio': float(metrics_baseline['Calmar Ratio']),
            'win_rate': float(metrics_baseline['Win Rate'])
        }
    },
    'improvement_vs_baseline': {
        'sharpe_delta': float(metrics_super['Sharpe Ratio'] - metrics_baseline['Sharpe Ratio']),
        'return_delta': float(metrics_super['Total Return'] - metrics_baseline['Total Return']),
        'sharpe_improvement_pct': float((metrics_super['Sharpe Ratio'] / metrics_baseline['Sharpe Ratio'] - 1) * 100) if metrics_baseline['Sharpe Ratio'] != 0 else None
    },
    'saved_files': [
        'super_agent_model.pkl',
        'class_to_window.pkl',
        'initial_scaler.pkl',
        'training_metadata.pkl',
        'forward_sharpes_by_window.csv',
        'features_all_rebalances.csv',
        'X_train.csv', 'X_test.csv',
        'y_train.csv', 'y_test.csv',
        'super_agent_returns.csv',
        'baseline_returns.csv',
        'performance_metrics.csv',
        'predicted_windows_log.csv',
        'feature_importances.csv',
        'classification_report.csv',
        'confusion_matrix.csv',
        'backtest_equity_curves.png',
        'feature_importances.png',
        'confusion_matrix.png',
        'window_distributions.png'
    ]
}

# Save as JSON
with open(os.path.join(super_agent_dir, 'SUMMARY_REPORT.json'), 'w') as f:
    json.dump(summary_report, f, indent=2)

# Save as readable text
with open(os.path.join(super_agent_dir, 'SUMMARY_REPORT.txt'), 'w') as f:
    f.write("="*70 + "\n")
    f.write("SUPER AGENT ML - FINAL SUMMARY REPORT\n")
    f.write("="*70 + "\n\n")
    
    f.write(f"Generated: {summary_report['execution_info']['timestamp']}\n")
    f.write(f"GPU Acceleration: {'Yes' if GPU_AVAILABLE else 'No'}\n\n")
    
    f.write("-"*70 + "\n")
    f.write("MODEL CONFIGURATION\n")
    f.write("-"*70 + "\n")
    f.write(f"Windows: {windows}\n")
    f.write(f"Model: XGBoost Classifier\n")
    f.write(f"Best Params: {metadata['best_params']}\n")
    f.write(f"Features: {metadata['n_features']}\n\n")
    
    f.write("-"*70 + "\n")
    f.write("BACKTEST PERFORMANCE (Out-of-Sample)\n")
    f.write("-"*70 + "\n")
    f.write(f"Super Agent:\n")
    f.write(f"  Sharpe Ratio:       {metrics_super['Sharpe Ratio']:.3f}\n")
    f.write(f"  Total Return:       {metrics_super['Total Return']:.2%}\n")
    f.write(f"  Annualized Return:  {metrics_super['Annualized Return']:.2%}\n")
    f.write(f"  Max Drawdown:       {metrics_super['Max Drawdown']:.2%}\n")
    f.write(f"  Volatility (Ann.):  {metrics_super['Volatility (Ann.)']:.2%}\n")
    f.write(f"  Calmar Ratio:       {metrics_super['Calmar Ratio']:.3f}\n\n")
    
    f.write(f"Baseline HRP 12m:\n")
    f.write(f"  Sharpe Ratio:       {metrics_baseline['Sharpe Ratio']:.3f}\n")
    f.write(f"  Total Return:       {metrics_baseline['Total Return']:.2%}\n")
    f.write(f"  Annualized Return:  {metrics_baseline['Annualized Return']:.2%}\n")
    f.write(f"  Max Drawdown:       {metrics_baseline['Max Drawdown']:.2%}\n")
    f.write(f"  Volatility (Ann.):  {metrics_baseline['Volatility (Ann.)']:.2%}\n")
    f.write(f"  Calmar Ratio:       {metrics_baseline['Calmar Ratio']:.3f}\n\n")
    
    sharpe_imp = summary_report['improvement_vs_baseline']['sharpe_improvement_pct']
    f.write(f"Improvement:\n")
    f.write(f"  Sharpe Œî:           {summary_report['improvement_vs_baseline']['sharpe_delta']:+.3f}\n")
    f.write(f"  Return Œî:           {summary_report['improvement_vs_baseline']['return_delta']:+.2%}\n")
    if sharpe_imp is not None:
        f.write(f"  Sharpe Improvement: {sharpe_imp:+.1f}%\n")
    
    f.write("\n" + "-"*70 + "\n")
    f.write("SAVED FILES\n")
    f.write("-"*70 + "\n")
    for file in summary_report['saved_files']:
        f.write(f"  ‚úì {file}\n")
    
    f.write("\n" + "="*70 + "\n")

print("\n" + "="*70)
print("SUMMARY REPORT")
print("="*70)
print(f"\nüìä BACKTEST RESULTS:")
print(f"   Super Agent Sharpe: {metrics_super['Sharpe Ratio']:.3f}")
print(f"   Baseline Sharpe:    {metrics_baseline['Sharpe Ratio']:.3f}")
print(f"   Improvement:        {summary_report['improvement_vs_baseline']['sharpe_delta']:+.3f}")

if sharpe_imp is not None and sharpe_imp > 0:
    print(f"   üéØ Super Agent outperforms by {sharpe_imp:.1f}%!")
elif sharpe_imp is not None:
    print(f"   ‚ö†Ô∏è  Super Agent underperforms by {abs(sharpe_imp):.1f}%")

print(f"\nüíæ OUTPUT DIRECTORY: {super_agent_dir}")
print(f"üìÅ Total files saved: {len(summary_report['saved_files'])}")
print(f"\n‚úÖ All results saved successfully!")
print("="*70)