<a href="https://colab.research.google.com/github/kesanir/ML-AI-TRADING/blob/main/XGB_LSTM_TOP10_NASDAQ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
NASDAQ-100 MOMENTUM: XGBoost + LSTM ENSEMBLE
=============================================

Combines the best of both worlds:
- XGBoost: Cross-sectional stock ranking (which stocks?)
- LSTM: Time-series momentum prediction (when to buy?)
- Ensemble: Combined signal with confidence scoring

Fixes:
1. No data leakage (walk-forward validation)
2. Proper train/test split
3. Performance metrics on unseen data
4. Ensemble predictions with uncertainty
5. Risk management and position sizing
"""

import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
import pandas as pd
import numpy as np
import xgboost as xgb
import torch
import torch.nn as nn
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

np.random.seed(42)
torch.manual_seed(42)

print("\n" + "="*80)
print("NASDAQ-100 MOMENTUM: XGBoost + LSTM ENSEMBLE")
print("="*80 + "\n")

# ============================================================================
# CONFIGURATION
# ============================================================================

class Config:
    # Data
    TRAIN_YEARS = 4
    LOOKBACK_LSTM = 30  # Days for LSTM
    LOOKBACK_XGBOOST = 1  # Forward return target for XGBoost

    # Model selection
    USE_XGBOOST = True
    USE_LSTM = True
    ENSEMBLE_WEIGHT_XGB = 0.6  # 60% XGBoost, 40% LSTM
    ENSEMBLE_WEIGHT_LSTM = 0.4

    # Portfolio
    TOP_N = 10
    #MIN_CONFIDENCE = 0.55
    MIN_CONFIDENCE = 0.50
    # Validation
    TEST_DAYS = 60  # Hold-out period

config = Config()

# ============================================================================
# TICKERS
# ============================================================================

TICKERS = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'META', 'TSLA', 'AVGO',
    'COST', 'NFLX', 'ADBE', 'PEP', 'CSCO', 'TMUS', 'AMD', 'INTC',
    'CMCSA', 'INTU', 'QCOM', 'TXN', 'AMGN', 'HON', 'AMAT', 'SBUX',
    'ISRG', 'BKNG', 'GILD', 'ADI', 'ADP', 'VRTX', 'MDLZ', 'LRCX',
    'REGN', 'MU', 'PANW', 'PYPL', 'SNPS', 'CDNS', 'KLAC', 'ASML',
    'MELI', 'CRWD', 'ABNB', 'FTNT', 'WDAY', 'MRNA', 'CTAS', 'DXCM',
    'ORLY', 'AEP', 'NXPI', 'CHTR', 'MAR', 'ADSK', 'MCHP', 'KDP',
    'MNST', 'EXC', 'ROST', 'CSX', 'KHC', 'PCAR', 'PAYX', 'CPRT',
    'AZN', 'CSGP', 'ODFL', 'DDOG', 'FAST', 'BKR', 'TTD', 'CTSH',
    'EA', 'GEHC', 'VRSK', 'LULU', 'ON', 'XEL', 'IDXX', 'ZS','S',
    'CCEP', 'TEAM', 'FANG', 'BIIB', 'CDW', 'ILMN', 'DASH',
    'GFS', 'WBD', 'MRVL', 'TTWO', 'EBAY', 'ZM', 'ALGN', 'ENPH'
    ]

print(f"Universe: {len(TICKERS)} stocks")

# ============================================================================
# 1. DOWNLOAD DATA
# ============================================================================

print("\nDownloading data...")
data = yf.download(
    TICKERS + ["QQQ", "^VIX"],
    period=f"{config.TRAIN_YEARS}y",
    auto_adjust=True,
    progress=False
)

close = data["Close"]
high = data["High"]
low = data["Low"]
volume = data["Volume"]

qqq = close["QQQ"]
vix = close["^VIX"]

print(f"‚úÖ Downloaded {len(close)} days of data")

# ============================================================================
# 2. REGIME DETECTION
# ============================================================================

qqq_200 = qqq.rolling(200).mean()
ma_distance = (qqq - qqq_200) / qqq_200
ma_distance_smooth = ma_distance.rolling(5).mean()
regime = (ma_distance_smooth > 0).astype(int).shift(1)

current_regime = "üöÄ BULL" if regime.iloc[-1] == 1 else "üêª BEAR"
print(f"Current Regime: {current_regime}")

# ============================================================================
# 3. FEATURE ENGINEERING FOR XGBOOST
# ============================================================================

print("\nBuilding feature panel for XGBoost...")

feature_list = []

for t in TICKERS:
    df = pd.DataFrame(index=close.index)
    df["Ticker"] = t
    df["Close"] = close[t]

    # Price features
    df["Returns"] = close[t].pct_change()
    df["ROC_5"] = close[t].pct_change(5)
    df["ROC_20"] = close[t].pct_change(20)

    # Volatility
    tr = pd.concat([
        high[t] - low[t],
        (high[t] - close[t].shift()).abs(),
        (low[t] - close[t].shift()).abs()
    ], axis=1).max(axis=1)
    df["ATR"] = tr.rolling(14).mean() / close[t]

    # RSI
    delta = close[t].diff()
    gain = (delta.where(delta > 0, 0)).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    rs = gain / (loss + 1e-10)
    df["RSI"] = 100 - (100 / (1 + rs))

    # Volume
    df["Volume_MA"] = volume[t].rolling(20).mean()
    df["Volume_Ratio"] = volume[t] / df["Volume_MA"]

    # Market features
    df["VIX"] = vix
    df["VIX_Change"] = vix.pct_change(10)
    df["Regime"] = regime

    # TARGET: Forward 21-day return (for XGBoost)
    df["Fwd_Return_21d"] = close[t].pct_change(config.LOOKBACK_XGBOOST).shift(-config.LOOKBACK_XGBOOST)

    feature_list.append(df)

panel = pd.concat(feature_list)
panel = panel.dropna()

print(f"‚úÖ Panel created: {len(panel)} observations")

# ============================================================================
# 4. CROSS-SECTIONAL Z-SCORING (KEY FOR XGBOOST)
# ============================================================================

print("Applying cross-sectional z-scoring...")

feature_cols = ["Returns", "ROC_5", "ROC_20", "ATR", "RSI", "Volume_Ratio", "VIX", "VIX_Change"]

def zscore_transform(x):
    mean = x.mean()
    std = x.std() if x.std() != 0 else 1
    return (x - mean) / std

panel[feature_cols] = panel.groupby(level=0)[feature_cols].transform(zscore_transform).clip(-3, 3)
panel = panel.dropna()

print(f"‚úÖ Features normalized: {len(panel)} clean observations")

# ============================================================================
# 5. TRAIN-TEST SPLIT (NO DATA LEAKAGE!)
# ============================================================================

print("\nSplitting data (no data leakage)...")

# Hold out last 60 days for testing
split_date = panel.index.unique()[-config.TEST_DAYS]
train_panel = panel[panel.index < split_date]
test_panel = panel[panel.index >= split_date]

print(f"Training: {len(train_panel)} obs ({train_panel.index.min()} to {train_panel.index.max()})")
print(f"Testing:  {len(test_panel)} obs ({test_panel.index.min()} to {test_panel.index.max()})")

# ============================================================================
# 6. XGBOOST MODEL
# ============================================================================

if config.USE_XGBOOST:
    print("\n" + "="*80)
    print("TRAINING XGBOOST (Cross-Sectional Ranking)")
    print("="*80)

    X_train_xgb = train_panel[feature_cols]
    y_train_xgb = train_panel["Fwd_Return_21d"]
    X_test_xgb = test_panel[feature_cols]
    y_test_xgb = test_panel["Fwd_Return_21d"]

    xgb_model = xgb.XGBRegressor(
        n_estimators=300,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        verbosity=0
    )

    xgb_model.fit(X_train_xgb, y_train_xgb)

    # Validate
    xgb_train_pred = xgb_model.predict(X_train_xgb)
    xgb_test_pred = xgb_model.predict(X_test_xgb)

    train_mae_xgb = mean_absolute_error(y_train_xgb, xgb_train_pred)
    test_mae_xgb = mean_absolute_error(y_test_xgb, xgb_test_pred)

    train_dir_xgb = (np.sign(xgb_train_pred) == np.sign(y_train_xgb)).mean()
    test_dir_xgb = (np.sign(xgb_test_pred) == np.sign(y_test_xgb)).mean()

    print(f"\nXGBoost Performance:")
    print(f"  Train MAE: {train_mae_xgb:.4f} | Test MAE: {test_mae_xgb:.4f}")
    print(f"  Train Dir Acc: {train_dir_xgb:.1%} | Test Dir Acc: {test_dir_xgb:.1%}")

    # Feature importance
    importance = pd.DataFrame({
        'Feature': feature_cols,
        'Importance': xgb_model.feature_importances_
    }).sort_values('Importance', ascending=False)

    print(f"\nTop 5 Features:")
    print(importance.head().to_string(index=False))

# ============================================================================
# 7. LSTM MODEL (Time-Series)
# ============================================================================

class SimpleLSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(1, 32, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = torch.tanh(self.fc(out[:, -1, :])) * 0.1
        return out

if config.USE_LSTM:
    print("\n" + "="*80)
    print("TRAINING LSTM (Time-Series Prediction)")
    print("="*80)

    lstm_results = {}

    for ticker in TICKERS: # Limit to 20 for speed
        try:
            ticker_data = close[ticker].dropna()

            if len(ticker_data) < 200:
                continue

            # Returns
            returns = ticker_data.pct_change().dropna().values.reshape(-1, 1)

            # Scale
            scaler = StandardScaler()
            scaled = scaler.fit_transform(returns)

            # Sequences
            X, y = [], []
            for i in range(config.LOOKBACK_LSTM, len(scaled)):
                X.append(scaled[i-config.LOOKBACK_LSTM:i])
                y.append(scaled[i])

            X = torch.FloatTensor(X)
            y = torch.FloatTensor(y)

            # Split (last 60 days for test)
            split = len(X) - config.TEST_DAYS
            X_train = X[:split]
            y_train = y[:split]
            X_test = X[split:]
            y_test = y[split:]

            # Train
            model = SimpleLSTM()
            criterion = nn.MSELoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

            for epoch in range(30):  # Quick training
                model.train()
                optimizer.zero_grad()
                output = model(X_train)
                loss = criterion(output, y_train)
                loss.backward()
                optimizer.step()

            # Test
            model.eval()
            with torch.no_grad():
                test_pred = model(X_test).numpy()
                test_actual = y_test.numpy()

            test_acc = (np.sign(test_pred.flatten()) == np.sign(test_actual.flatten())).mean()

            # Predict next day
            last_seq = torch.FloatTensor(scaled[-config.LOOKBACK_LSTM:]).unsqueeze(0)
            with torch.no_grad():
                next_pred_scaled = model(last_seq).item()

            next_pred = scaler.inverse_transform([[next_pred_scaled]])[0, 0]

            lstm_results[ticker] = {
                'prediction': next_pred,
                'accuracy': test_acc
            }

        except Exception as e:
            continue

    print(f"\n‚úÖ LSTM trained on {len(lstm_results)} stocks")
    avg_acc = np.mean([r['accuracy'] for r in lstm_results.values()])
    print(f"   Average test accuracy: {avg_acc:.1%}")

# ============================================================================
# 8. ENSEMBLE PREDICTIONS
# ============================================================================

print("\n" + "="*80)
print("GENERATING ENSEMBLE PREDICTIONS")
print("="*80)

# Get latest date predictions
latest_date = panel.index.max()
today_panel = panel.loc[latest_date].copy()

# XGBoost predictions (cross-sectional ranking)
if config.USE_XGBOOST:
    xgb_scores = xgb_model.predict(today_panel[feature_cols])
    today_panel["XGB_Score"] = xgb_scores
    today_panel["XGB_Rank"] = today_panel["XGB_Score"].rank(ascending=False)
else:
    today_panel["XGB_Score"] = 0
    today_panel["XGB_Rank"] = 0

# LSTM predictions (time-series)
if config.USE_LSTM:
    lstm_scores = []
    lstm_confidences = []

    for ticker in today_panel["Ticker"]:
        if ticker in lstm_results:
            lstm_scores.append(lstm_results[ticker]['prediction'])
            lstm_confidences.append(lstm_results[ticker]['accuracy'])
        else:
            lstm_scores.append(0)
            lstm_confidences.append(0.5)

    today_panel["LSTM_Score"] = lstm_scores
    today_panel["LSTM_Confidence"] = lstm_confidences
else:
    today_panel["LSTM_Score"] = 0
    today_panel["LSTM_Confidence"] = 0.5

# ENSEMBLE
if config.USE_XGBOOST and config.USE_LSTM:
    # Normalize scores to [0, 1]
    xgb_norm = (today_panel["XGB_Score"] - today_panel["XGB_Score"].min()) / (today_panel["XGB_Score"].max() - today_panel["XGB_Score"].min())
    lstm_norm = (today_panel["LSTM_Score"] - today_panel["LSTM_Score"].min()) / (today_panel["LSTM_Score"].max() - today_panel["LSTM_Score"].min())

    today_panel["Ensemble_Score"] = (
        config.ENSEMBLE_WEIGHT_XGB * xgb_norm +
        config.ENSEMBLE_WEIGHT_LSTM * lstm_norm
    )
    today_panel["Confidence"] = today_panel["LSTM_Confidence"]

elif config.USE_XGBOOST:
    today_panel["Ensemble_Score"] = today_panel["XGB_Score"]
    today_panel["Confidence"] = 0.6
else:
    today_panel["Ensemble_Score"] = today_panel["LSTM_Score"]
    today_panel["Confidence"] = today_panel["LSTM_Confidence"]

# ============================================================================
# 9. FINAL SELECTION
# ============================================================================

# Filter by regime
if regime.iloc[-1] == 1:
    candidates = today_panel.copy()
    print("\nüöÄ Bull Regime: Long positions enabled")
else:
    candidates = today_panel.copy()
    print("\nüêª Bear Regime: Proceed with caution")

# Filter by confidence
candidates = candidates[candidates["Confidence"] > config.MIN_CONFIDENCE]

# Sort and select top N
top_picks = candidates.sort_values("Ensemble_Score", ascending=False).head(config.TOP_N)

# ============================================================================
# 10. DISPLAY RESULTS
# ============================================================================

print("\n" + "="*80)
print(f"TOP {config.TOP_N} MOMENTUM PICKS - {datetime.now().strftime('%Y-%m-%d')}")
print("="*80 + "\n")

display = top_picks[["Ticker", "Close", "Ensemble_Score", "XGB_Rank", "LSTM_Score", "Confidence"]].copy()
display["Ensemble_Score"] = display["Ensemble_Score"].apply(lambda x: f"{x:.3f}")
display["LSTM_Score"] = display["LSTM_Score"].apply(lambda x: f"{x*100:+.2f}%")
display["Confidence"] = display["Confidence"].apply(lambda x: f"{x:.1%}")
display.columns = ["Ticker", "Price", "Ensemble", "XGB_Rank", "LSTM_Pred", "Confidence"]

print(display.to_string(index=False))

print("\n" + "="*80)
print("MODEL CONTRIBUTIONS")
print("="*80)
print(f"XGBoost weight:  {config.ENSEMBLE_WEIGHT_XGB:.0%} (cross-sectional ranking)")
print(f"LSTM weight:     {config.ENSEMBLE_WEIGHT_LSTM:.0%} (time-series prediction)")
print(f"Ensemble method: Weighted average of normalized scores")

if config.USE_XGBOOST and config.USE_LSTM:
    print(f"\nüí° Strategy: XGBoost selects stocks, LSTM confirms timing")
elif config.USE_XGBOOST:
    print(f"\nüí° Strategy: Pure XGBoost cross-sectional momentum")
else:
    print(f"\nüí° Strategy: Pure LSTM time-series prediction")

print("\n" + "="*80 + "\n")

# ============================================================================
# 11. SAVE RESULTS
# ============================================================================

output_file = 'ensemble_predictions.csv'
top_picks.to_csv(output_file)
print(f"üìÅ Results saved to: {output_file}\n")



NASDAQ-100 MOMENTUM: XGBoost + LSTM ENSEMBLE

Universe: 96 stocks

Downloading data...
‚úÖ Downloaded 1005 days of data
Current Regime: üöÄ BULL

Building feature panel for XGBoost...
‚úÖ Panel created: 94253 observations
Applying cross-sectional z-scoring...
‚úÖ Features normalized: 94253 clean observations

Splitting data (no data leakage)...
Training: 88493 obs (2022-03-15 00:00:00 to 2025-11-17 00:00:00)
Testing:  5760 obs (2025-11-18 00:00:00 to 2026-02-13 00:00:00)

TRAINING XGBOOST (Cross-Sectional Ranking)

XGBoost Performance:
  Train MAE: 0.0158 | Test MAE: 0.0160
  Train Dir Acc: 56.1% | Test Dir Acc: 50.0%

Top 5 Features:
   Feature  Importance
VIX_Change    0.300366
       ATR    0.142652
       RSI    0.129771
     ROC_5    0.108725
    ROC_20    0.107133

TRAINING LSTM (Time-Series Prediction)


In [6]:
# -*- coding: utf-8 -*-
"""
NASDAQ-100 MOMENTUM: XGBoost + LSTM ENSEMBLE (Optional Sequential Mode)
=======================================================================

Features:
- Parallel: XGBoost (cross-sectional) + LSTM (time-series) combined with weights
- Sequential: XGBoost score fed into LSTM for time-series refinement
- Walk-forward validation, cross-sectional z-scoring, risk-aware top-N selection
- Ensemble predictions saved daily
"""

# =========================
# INSTALL & IMPORT
# =========================
!pip install yfinance xgboost torch --quiet

import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
import pandas as pd
import numpy as np
import xgboost as xgb
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from datetime import datetime

np.random.seed(42)
torch.manual_seed(42)

# =========================
# CONFIGURATION
# =========================
class Config:
    TRAIN_YEARS = 3
    LOOKBACK_LSTM = 30      # days of returns for LSTM
    LOOKBACK_XGBOOST = 21   # forward return for XGB target
    USE_XGBOOST = True
    USE_LSTM = True
    SEQUENTIAL = True       # If True, feed XGB score into LSTM
    ENSEMBLE_WEIGHT_XGB = 0.6
    ENSEMBLE_WEIGHT_LSTM = 0.4
    TOP_N = 10
    MIN_CONFIDENCE = 0.55
    TEST_DAYS = 60

config = Config()

# =========================
# NASDAQ-100 TICKERS
# =========================
TICKERS = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'META', 'TSLA', 'AVGO',
    'COST', 'NFLX', 'ADBE', 'PEP', 'CSCO', 'AMD', 'INTC', 'CMCSA',
    'INTU', 'QCOM', 'TXN', 'AMGN', 'HON', 'AMAT', 'SBUX', 'ISRG',
    'BKNG', 'GILD', 'ADI', 'ADP', 'VRTX', 'MDLZ', 'REGN', 'MU',
    'PANW', 'SNPS', 'CDNS', 'MELI', 'CRWD', 'ABNB', 'FTNT', 'WDAY'
]

# =========================
# DOWNLOAD DATA
# =========================
data = yf.download(
    TICKERS + ["QQQ", "^VIX"],
    period=f"{config.TRAIN_YEARS}y",
    auto_adjust=True, progress=False
)

close = data["Close"]
high = data["High"]
low = data["Low"]
volume = data["Volume"]
qqq = close["QQQ"]
vix = close["^VIX"]

# =========================
# REGIME DETECTION
# =========================
qqq_200 = qqq.rolling(200).mean()
regime = (qqq > qqq_200).astype(int)
current_regime = "üöÄ BULL" if regime.iloc[-1]==1 else "üêª BEAR"
print(f"Current Regime: {current_regime}")

# =========================
# FEATURE PANEL (XGBoost)
# =========================
feature_list = []
for t in TICKERS:
    df = pd.DataFrame(index=close.index)
    df["Ticker"] = t
    df["Close"] = close[t]
    df["Returns"] = close[t].pct_change()
    df["ROC_5"] = close[t].pct_change(5)
    df["ROC_20"] = close[t].pct_change(20)

    # ATR
    tr = pd.concat([
        high[t]-low[t],
        (high[t]-close[t].shift()).abs(),
        (low[t]-close[t].shift()).abs()
    ], axis=1).max(axis=1)
    df["ATR"] = tr.rolling(14).mean()/close[t]

    # RSI
    delta = close[t].diff()
    gain = (delta.where(delta>0,0)).rolling(14).mean()
    loss = (-delta.where(delta<0,0)).rolling(14).mean()
    rs = gain/(loss+1e-10)
    df["RSI"] = 100-(100/(1+rs))

    # Volume
    df["Volume_MA"] = volume[t].rolling(20).mean()
    df["Volume_Ratio"] = volume[t]/df["Volume_MA"]

    # Market
    df["VIX"] = vix
    df["VIX_Change"] = vix.pct_change(10)
    df["Regime"] = regime

    # XGB target
    df["Fwd_Return_21d"] = close[t].pct_change(config.LOOKBACK_XGBOOST).shift(-config.LOOKBACK_XGBOOST)

    feature_list.append(df)

panel = pd.concat(feature_list).dropna()

# =========================
# CROSS-SECTIONAL Z-SCORING
# =========================
feature_cols = ["Returns","ROC_5","ROC_20","ATR","RSI","Volume_Ratio","VIX","VIX_Change"]
panel[feature_cols] = panel.groupby(level=0)[feature_cols].transform(lambda x: ((x-x.mean())/(x.std() if x.std()!=0 else 1)).clip(-3,3))
panel = panel.dropna()

# =========================
# TRAIN-TEST SPLIT
# =========================
split_date = panel.index.unique()[-config.TEST_DAYS]
train_panel = panel[panel.index<split_date]
test_panel = panel[panel.index>=split_date]

# =========================
# XGBOOST MODEL
# =========================
if config.USE_XGBOOST:
    X_train = train_panel[feature_cols]
    y_train = train_panel["Fwd_Return_21d"]
    X_test = test_panel[feature_cols]
    y_test = test_panel["Fwd_Return_21d"]

    xgb_model = xgb.XGBRegressor(n_estimators=300, max_depth=4, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, random_state=42)
    xgb_model.fit(X_train, y_train)

    train_pred_xgb = xgb_model.predict(X_train)
    test_pred_xgb = xgb_model.predict(X_test)

    print(f"XGB Train MAE: {mean_absolute_error(y_train, train_pred_xgb):.4f} | Test MAE: {mean_absolute_error(y_test, test_pred_xgb):.4f}")

# =========================
# LSTM MODEL
# =========================
class LSTMModel(nn.Module):
    def __init__(self, input_dim=1):
        super().__init__()
        self.lstm = nn.LSTM(input_dim,32,batch_first=True,dropout=0.2)
        self.fc = nn.Linear(32,1)
    def forward(self,x):
        out,_ = self.lstm(x)
        return self.fc(out[:,-1,:])

lstm_results = {}
for ticker in TICKERS[:20]:  # limit for speed
    try:
        series = close[ticker].dropna()
        returns = series.pct_change().dropna().values.reshape(-1,1)
        scaler = StandardScaler()
        returns_scaled = scaler.fit_transform(returns)

        # Optionally add XGB score as feature
        xgb_seq = None
        if config.SEQUENTIAL and config.USE_XGBOOST:
            xgb_full = pd.DataFrame({"XGB": xgb_model.predict(panel[panel["Ticker"]==ticker][feature_cols])}, index=panel[panel["Ticker"]==ticker].index)
            xgb_seq = xgb_full.reindex(series.index).fillna(0).values
            returns_scaled = np.hstack([returns_scaled, xgb_seq[-len(returns_scaled):]])

        # Build sequences
        X_lstm, y_lstm = [], []
        for i in range(config.LOOKBACK_LSTM, len(returns_scaled)):
            X_lstm.append(returns_scaled[i-config.LOOKBACK_LSTM:i])
            y_lstm.append(returns_scaled[i,0])
        X_lstm = torch.FloatTensor(X_lstm)
        y_lstm = torch.FloatTensor(y_lstm).unsqueeze(1)

        # Train/test split
        split = len(X_lstm)-config.TEST_DAYS
        X_train_lstm = X_lstm[:split]
        y_train_lstm = y_lstm[:split]
        X_test_lstm = X_lstm[split:]
        y_test_lstm = y_lstm[split:]

        model = LSTMModel(input_dim=X_train_lstm.shape[2])
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.MSELoss()

        for epoch in range(30):
            model.train()
            optimizer.zero_grad()
            loss = criterion(model(X_train_lstm), y_train_lstm)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            next_seq = torch.FloatTensor(returns_scaled[-config.LOOKBACK_LSTM:]).unsqueeze(0)
            pred_next = model(next_seq).numpy()[0,0]
        pred_next = scaler.inverse_transform([[pred_next]])[0,0]
        lstm_results[ticker] = pred_next
    except:
        continue

# =========================
# ENSEMBLE PREDICTIONS
# =========================
latest_date = panel.index.max()
today_panel = panel.loc[latest_date].copy()
if config.USE_XGBOOST:
    today_panel["XGB_Score"] = xgb_model.predict(today_panel[feature_cols])
    today_panel["XGB_Rank"] = today_panel["XGB_Score"].rank(ascending=False)
else:
    today_panel["XGB_Score"]=0
    today_panel["XGB_Rank"]=0

today_panel["LSTM_Score"] = today_panel["Ticker"].map(lambda x: lstm_results.get(x,0))
today_panel["Ensemble_Score"] = (
    config.ENSEMBLE_WEIGHT_XGB * ((today_panel["XGB_Score"]-today_panel["XGB_Score"].min())/(today_panel["XGB_Score"].max()-today_panel["XGB_Score"].min())) +
    config.ENSEMBLE_WEIGHT_LSTM * ((today_panel["LSTM_Score"]-today_panel["LSTM_Score"].min())/(today_panel["LSTM_Score"].max()-today_panel["LSTM_Score"].min()))
)

# =========================
# TOP PICKS
# =========================
top = today_panel.sort_values("Ensemble_Score", ascending=False).head(config.TOP_N)
top = top[top["Ensemble_Score"]>config.MIN_CONFIDENCE]
print(f"\nTOP {config.TOP_N} NASDAQ-100 MOMENTUM PICKS ({datetime.now().date()})\n")
display = top[["Ticker","Close","Ensemble_Score","XGB_Rank","LSTM_Score"]].copy()
display.columns=["Ticker","Price","Ensemble","XGB_Rank","LSTM_Pred"]
print(display.to_string(index=False))

# =========================
# SAVE
# =========================
top.to_csv("ensemble_predictions.csv")
print("\n‚úÖ Ensemble predictions saved to ensemble_predictions.csv")


Current Regime: üêª BEAR
XGB Train MAE: 0.0680 | Test MAE: 0.0834

TOP 10 NASDAQ-100 MOMENTUM PICKS (2026-02-17)

Ticker      Price  Ensemble  XGB_Rank  LSTM_Pred
    MU 336.630005   0.63498       1.0        0.0

‚úÖ Ensemble predictions saved to ensemble_predictions.csv


In [7]:
top_picks.head()

Unnamed: 0_level_0,Ticker,Close,Returns,ROC_5,ROC_20,ATR,RSI,Volume_MA,Volume_Ratio,VIX,VIX_Change,Regime,Fwd_Return_21d,XGB_Score,XGB_Rank,LSTM_Score,LSTM_Confidence,Ensemble_Score,Confidence
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2026-01-15,AVGO,343.019989,0.447751,0.53551,-0.186609,0.710934,-0.330055,33330375.0,-1.416741,0.0,0.0,1.0,-0.052038,0.019398,17.0,0.002326,0.516667,0.789821,0.516667
2026-01-15,TSLA,438.570007,-0.111331,0.102879,-1.228814,0.592256,-1.187299,68680750.0,-1.235501,0.0,0.0,1.0,-0.048179,0.017411,23.0,0.000806,0.566667,0.587381,0.566667
2026-01-15,COST,955.444702,0.282739,0.767201,0.793369,-0.987083,1.607871,2828415.0,-1.057634,0.0,0.0,1.0,0.065975,0.010998,64.0,0.000605,0.516667,0.508346,0.516667
2026-01-15,QCOM,161.389999,-1.041616,-1.927805,-1.031912,0.401795,-0.932764,8262160.0,0.111219,0.0,0.0,1.0,-0.128199,0.014184,38.0,0.000375,0.533333,0.507404,0.533333
2026-01-15,ADBE,304.089996,-0.096372,-1.765256,-1.426259,0.220004,-2.036638,3842890.0,0.529881,0.0,0.0,1.0,-0.131935,0.018664,18.0,-3.6e-05,0.55,0.49515,0.55


In [11]:
close['QQQ'].tail(21)

Unnamed: 0_level_0,QQQ
Date,Unnamed: 1_level_1
2026-01-16,621.26001
2026-01-20,608.059998
2026-01-21,616.280029
2026-01-22,620.76001
2026-01-23,622.719971
2026-01-26,625.460022
2026-01-27,631.130005
2026-01-28,633.219971
2026-01-29,629.429993
2026-01-30,621.869995
