# Tomorrow's SPY Price Prediction

This notebook fetches the latest market data, calculates technical indicators, and uses the trained model to predict whether SPY price will go UP or DOWN tomorrow.

**Note**: Uses yfinance for data fetching (MT5 alternative for macOS compatibility). For Windows users with MT5, the code can be easily adapted.


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Optional, Tuple, Any
import joblib
from datetime import datetime, timedelta
import sys

# Add src to path for utilities
sys.path.append(str(Path.cwd().parent))
from src.utils.model_loader import load_trained_model
from src.utils.config import load_config, get_config_value

# Try to import xgboost (may fail on macOS without libomp)
try:
    import xgboost as xgb
    XGBOOST_AVAILABLE = True
except Exception:
    XGBOOST_AVAILABLE = False
    print("⚠ XGBoost not available - will only load sklearn models (.pkl)")

# Try to import yfinance for data fetching
try:
    import yfinance as yf
    YFINANCE_AVAILABLE = True
except ImportError:
    YFINANCE_AVAILABLE = False
    print("⚠ yfinance not available")

# Import technical analysis library
import ta


⚠ XGBoost not available - will only load sklearn models (.pkl)


In [None]:
def fetch_latest_data(symbol: str = "SPY", num_bars: int = 100) -> pd.DataFrame:
    """
    Fetch latest market data for feature calculation.
    
    Note: Uses yfinance (MT5 alternative for macOS compatibility).
    
    For Windows users with MT5, you can replace this with:
    ```python
    import MetaTrader5 as mt5
    mt5.initialize()
    rates = mt5.copy_rates_from_pos(symbol, mt5.TIMEFRAME_D1, 0, num_bars)
    df = pd.DataFrame(rates)
    df['time'] = pd.to_datetime(df['time'], unit='s')
    ```
    
    Args:
        symbol: Trading symbol (default: 'SPY')
        num_bars: Number of bars to fetch (default: 100 for moving averages)
    
    Returns:
        DataFrame with OHLCV data
    """
    if not YFINANCE_AVAILABLE:
        raise ImportError("yfinance is required for data fetching. Install with: pip install yfinance")
    
    print(f"Fetching latest {num_bars} bars for {symbol}...")
    
    # Fetch historical data (need enough for moving averages)
    ticker = yf.Ticker(symbol)
    hist = ticker.history(period="max", interval="1d")
    
    if hist is None or hist.empty:
        raise ValueError(f"Failed to fetch data for {symbol}")
    
    # Take the last num_bars (or all if less)
    hist = hist.tail(num_bars).copy()
    
    # Reset index and rename Date to time
    df = hist.reset_index()
    if 'Date' in df.columns:
        df.rename(columns={'Date': 'time'}, inplace=True)
    
    # Ensure time is datetime
    df['time'] = pd.to_datetime(df['time'])
    
    # Rename columns to lowercase
    column_mapping = {
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Close': 'close',
        'Volume': 'volume'
    }
    
    for old_col, new_col in column_mapping.items():
        if old_col in df.columns:
            df.rename(columns={old_col: new_col}, inplace=True)
    
    # Select only needed columns
    required_cols = ['time', 'open', 'high', 'low', 'close', 'volume']
    df = df[[col for col in required_cols if col in df.columns]].copy()
    
    print(f"✓ Fetched {len(df)} bars")
    print(f"  Date range: {df['time'].min()} to {df['time'].max()}")
    
    return df


def calculate_features_for_prediction(df: pd.DataFrame) -> pd.Series:
    """
    Calculate technical indicators for the most recent bar only.
    
    Uses the same logic as build_features.py:
    - RSI (14 periods)
    - EMA (20 periods)
    - EMA (50 periods)
    - ATR (14 periods)
    
    Args:
        df: DataFrame with OHLCV data (needs enough history for indicators)
    
    Returns:
        Series with feature values for the most recent bar
    """
    if len(df) < 50:  # Need at least 50 bars for EMA(50)
        raise ValueError(f"Insufficient data: need at least 50 bars, got {len(df)}")
    
    # Verify required columns
    required_cols = ['open', 'high', 'low', 'close']
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")
    
    print("Calculating technical indicators...")
    
    # Calculate RSI (14 periods)
    rsi_indicator = ta.momentum.RSIIndicator(close=df['close'], window=14)
    rsi_14 = rsi_indicator.rsi().iloc[-1]
    
    # Calculate EMA (20 periods)
    ema_20_indicator = ta.trend.EMAIndicator(close=df['close'], window=20)
    ema_20 = ema_20_indicator.ema_indicator().iloc[-1]
    
    # Calculate EMA (50 periods)
    ema_50_indicator = ta.trend.EMAIndicator(close=df['close'], window=50)
    ema_50 = ema_50_indicator.ema_indicator().iloc[-1]
    
    # Calculate ATR (14 periods)
    atr_indicator = ta.volatility.AverageTrueRange(
        high=df['high'],
        low=df['low'],
        close=df['close'],
        window=14
    )
    atr = atr_indicator.average_true_range().iloc[-1]
    
    # Get the most recent OHLCV values
    latest = df.iloc[-1]
    
    # Create feature vector (same order as training data)
    features = pd.Series({
        'open': latest['open'],
        'high': latest['high'],
        'low': latest['low'],
        'close': latest['close'],
        'volume': latest['volume'],
        'rsi_14': rsi_14,
        'ema_20': ema_20,
        'ema_50': ema_50,
        'atr': atr
    })
    
    print(f"✓ Calculated features for {latest['time']}")
    
    return features


def load_model(model_path: Optional[str] = None) -> Tuple[Any, str]:
    """
    Load trained model from file.
    
    Args:
        model_path: Optional path to model file
    
    Returns:
        Tuple of (model, model_type)
    """
    # Use utility function for model loading
    if model_path:
        model_name = Path(model_path).stem  # Extract name without extension
        model_path_obj = Path(model_path)
    else:
        model_name = "xgboost_spy_v1"
        model_path_obj = None
    
    model, model_type = load_trained_model(model_name=model_name, model_path=model_path_obj)
    print(f"✓ Loaded {model_type} model")
    
    return model, model_type


## Fetch Latest Data and Calculate Features


In [3]:
# Fetch latest 100 bars (enough for EMA(50) calculation)
symbol = "SPY"
df = fetch_latest_data(symbol=symbol, num_bars=100)

# Display latest bar info
latest = df.iloc[-1]
print(f"\nLatest bar information:")
print(f"  Date: {latest['time']}")
print(f"  Close: ${latest['close']:.2f}")
print(f"  Volume: {latest['volume']:,.0f}")


Fetching latest 100 bars for SPY...
✓ Fetched 100 bars
  Date range: 2025-08-06 00:00:00-04:00 to 2025-12-26 00:00:00-05:00

Latest bar information:
  Date: 2025-12-26 00:00:00-05:00
  Close: $690.31
  Volume: 41,588,400


In [4]:
# Calculate features for the most recent bar
features = calculate_features_for_prediction(df)

# Display calculated features
print("\nCalculated Features:")
print(f"  RSI(14): {features['rsi_14']:.2f}")
print(f"  EMA(20): ${features['ema_20']:.2f}")
print(f"  EMA(50): ${features['ema_50']:.2f}")
print(f"  ATR(14): ${features['atr']:.2f}")

# Prepare feature vector for prediction (same order as training)
feature_columns = ['open', 'high', 'low', 'close', 'volume', 'rsi_14', 'ema_20', 'ema_50', 'atr']
X_pred = features[feature_columns].values.reshape(1, -1)


Calculating technical indicators...
✓ Calculated features for 2025-12-26 00:00:00-05:00

Calculated Features:
  RSI(14): 61.76
  EMA(20): $680.92
  EMA(50): $674.50
  ATR(14): $6.48


## Load Model and Make Prediction


In [5]:
# Load trained model
model, model_type = load_model()

print(f"\nModel type: {model_type}")


✓ Loaded sklearn model from: /Users/rakehsaleem/ai-trading-lab/src/models/xgboost_spy_v1.pkl

Model type: sklearn


In [6]:
# Get prediction probabilities
# predict_proba returns probabilities for [class_0, class_1]
# class_0 = price goes DOWN, class_1 = price goes UP
probabilities = model.predict_proba(X_pred)[0]

prob_down = probabilities[0]  # Probability of price going DOWN
prob_up = probabilities[1]    # Probability of price going UP

# Get binary prediction
prediction = model.predict(X_pred)[0]

# Format output
direction = "UP" if prediction == 1 else "DOWN"
confidence = prob_up * 100 if prediction == 1 else prob_down * 100

print("\n" + "=" * 60)
print("TOMORROW'S PREDICTION")
print("=" * 60)
print(f"Symbol: {symbol}")
print(f"Current Close: ${latest['close']:.2f}")
print(f"Prediction Date: {latest['time'].date()}")
print(f"\nTomorrow's Prediction: {direction}")
print(f"Confidence: {confidence:.1f}%")
print(f"\nProbability Breakdown:")
print(f"  UP:   {prob_up * 100:.1f}%")
print(f"  DOWN: {prob_down * 100:.1f}%")
print("=" * 60)



TOMORROW'S PREDICTION
Symbol: SPY
Current Close: $690.31
Prediction Date: 2025-12-26

Tomorrow's Prediction: UP
Confidence: 68.3%

Probability Breakdown:
  UP:   68.3%
  DOWN: 31.7%


