In [1]:
# Stock Prediction Decision Making Notebook
# Interactive notebook for making daily trading decisions

import os
import sys
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import yfinance as yf

# Configuration
TOP_N_PICKS = 10
INVESTMENT_AMOUNT = 1000

# Path configuration - adjust for notebook location
# Since notebook is in eda-notebooks/, go up one level to project root
PROJECT_ROOT = Path("..").resolve()  # Go up one level from eda-notebooks/
ARTIFACTS_DIR = PROJECT_ROOT / "artifacts"
DATA_DIR = PROJECT_ROOT / "data"
RESULTS_DIR = PROJECT_ROOT / "results"
APP_DIR = PROJECT_ROOT / "app"

print("📊 Stock Prediction Decision Making Notebook")
print("=" * 50)

# Setup paths
if str(APP_DIR) not in sys.path:
    sys.path.append(str(APP_DIR))

print(f"✅ Paths configured:")
print(f"   Project root: {PROJECT_ROOT}")
print(f"   App code: {APP_DIR}")
print(f"   Artifacts: {ARTIFACTS_DIR}")
print(f"   Data: {DATA_DIR}")

📊 Stock Prediction Decision Making Notebook
✅ Paths configured:
   Project root: /Users/sagardhal/Desktop/Practice/personal-stock
   App code: /Users/sagardhal/Desktop/Practice/personal-stock/app
   Artifacts: /Users/sagardhal/Desktop/Practice/personal-stock/artifacts
   Data: /Users/sagardhal/Desktop/Practice/personal-stock/data


In [2]:
# Stock Prediction Decision Making Notebook
# Interactive notebook for making daily trading decisions

import os
import sys
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Configuration
TOP_N_PICKS = 10
INVESTMENT_AMOUNT = 1000

# Path configuration - adjust for notebook location
# Since notebook is in eda-notebooks/, go up one level to project root
PROJECT_ROOT = Path("..").resolve()  # Go up one level from eda-notebooks/
ARTIFACTS_DIR = PROJECT_ROOT / "artifacts"
DATA_DIR = PROJECT_ROOT / "data"
RESULTS_DIR = PROJECT_ROOT / "results"
APP_DIR = PROJECT_ROOT / "app"

print("📊 Stock Prediction Decision Making Notebook")
print("=" * 50)

# Setup paths
if str(APP_DIR) not in sys.path:
    sys.path.append(str(APP_DIR))

print(f"✅ Paths configured:")
print(f"   Project root: {PROJECT_ROOT}")
print(f"   App code: {APP_DIR}")
print(f"   Artifacts: {ARTIFACTS_DIR}")
print(f"   Data: {DATA_DIR}")

📊 Stock Prediction Decision Making Notebook
✅ Paths configured:
   Project root: /Users/sagardhal/Desktop/Practice/personal-stock
   App code: /Users/sagardhal/Desktop/Practice/personal-stock/app
   Artifacts: /Users/sagardhal/Desktop/Practice/personal-stock/artifacts
   Data: /Users/sagardhal/Desktop/Practice/personal-stock/data


In [24]:
# go up one level to the root directory
sys.path.append(os.path.abspath(".."))



# # add project root (parent of notebooks/) to sys.path
# project_root = Path.cwd().parent
# sys.path.insert(0, str(project_root))

from app.predictions import (
    load_latest_data,
    load_model_and_features,
    PredictionComparator,
    _TransformAdapter,
)

# If you also need TrainModel directly in the notebook:
from app.train_model_new import TrainModel   # ✅ absolute package import

from app.stock_pipeline import StockDataPipeline


# Import modules
try:
    from app.predictions import load_model_and_features, _TransformAdapter
    from app.train_model_new import TrainModel
    from app.stock_pipeline import StockDataPipeline
    print("✅ All modules imported successfully")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Make sure your app directory contains all required modules")

✅ All modules imported successfully


In [46]:
# Step 1: Load Tickers
print("\n" + "=" * 50)
print("STEP 1: LOAD TICKERS")
print("=" * 50)


tickers = (pd.read_csv("/Users/sagardhal/Desktop/Practice/personal-stock/ticker/spx_ndx_liq_top250_latest.csv")['Ticker']
        #.head(5)
        .tolist())
tickers
len(tickers)





STEP 1: LOAD TICKERS


250

In [47]:
# Step 2: Check Requirements
print("\n" + "=" * 50)
print("STEP 2: CHECK REQUIREMENTS")
print("=" * 50)

# Check for trained models
artifacts_files = list(ARTIFACTS_DIR.glob("*.joblib")) + list(ARTIFACTS_DIR.glob("*.pkl"))
print(f"🤖 Models found: {len(artifacts_files)}")
for f in artifacts_files:
    print(f"   - {f.name}")

# Check for data files
data_files = list(DATA_DIR.glob("*.parquet")) if DATA_DIR.exists() else []
print(f"📊 Data files found: {len(data_files)}")
for f in data_files[:3]:
    print(f"   - {f.name}")
if len(data_files) > 3:
    print(f"   ... and {len(data_files) - 3} more files")


STEP 2: CHECK REQUIREMENTS
🤖 Models found: 5
   - best_rf_model.joblib
   - random_forest_train_valid_20250904_112953.joblib
   - random_forest_train_only_20250904_000839.joblib
   - random_forest_train_only_20250904_112906.joblib
   - random_forest_train_valid_20250904_000856.joblib
📊 Data files found: 1
   - stock_data_combined_20250904_071145.parquet


In [51]:
# =============================================================================
# STEP 3: TRANSFORM DATA USING YOUR STOCKDATAPIPELINE
# =============================================================================

print("\n" + "=" * 60)
print("STEP 3: TRANSFORM DATA USING STOCKDATAPIPELINE")
print("=" * 60)

def run_stock_pipeline_for_predictions(tickers):
    """Run your StockDataPipeline to get fully transformed data"""
    print("🚀 Running StockDataPipeline for complete feature engineering...")
    
    # Configuration (same as your run_data_extraction.py)
    config = {
        "LOOKBACKS": [1, 3, 7, 30, 90, 252, 365],
        "HORIZONS": [30],
        "BINARY_THRESHOLDS": {30: 1.00},  # 0% gain threshold
    }
    
    print(f"   Configuration:")
    print(f"   - Lookbacks: {config['LOOKBACKS']}")
    print(f"   - Horizons: {config['HORIZONS']} days")  
    print(f"   - Binary threshold: {config['BINARY_THRESHOLDS'][30]:.0%}")
    
    try:
        # Initialize StockDataPipeline (same as extraction script)
        pipeline = StockDataPipeline(
            tickers=tickers,
            lookbacks=config["LOOKBACKS"],
            horizons=config["HORIZONS"],
            binarize_thresholds=config["BINARY_THRESHOLDS"],
        )
        
        # Run complete pipeline: stock data + technical indicators + macro data
        print("   📊 Running complete pipeline...")
        print("      - Step 1: Fetching stock data")
        print("      - Step 2: Adding technical indicators (TA-Lib)")  
        print("      - Step 3: Adding macro indicators")
        print("      - Step 4: Final validation and cleanup")
        
        transformed_data = pipeline.run_complete_pipeline()
        
        print(f"   ✅ StockDataPipeline complete!")
        print(f"      Final shape: {transformed_data.shape}")
        
        # Get latest data for predictions
        latest_date = transformed_data['Date'].max()
        prediction_data = transformed_data[transformed_data['Date'] == latest_date].copy()
        
        print(f"   📅 Prediction data ready:")
        print(f"      Latest date: {latest_date.date()}")
        print(f"      Stocks: {len(prediction_data)}")
        
        # Show feature categories
        feature_categories = {
            "Basic OHLCV": [c for c in transformed_data.columns if c in ["Date", "Ticker", "Open", "High", "Low", "Close", "Volume"]],
            "Growth Features": [c for c in transformed_data.columns if c.startswith("growth_") and "future" not in c],
            "Technical Indicators": [c for c in transformed_data.columns if any(x in c.lower() for x in ["rsi", "macd", "sma", "adx", "cci"])],
            "Candlestick Patterns": [c for c in transformed_data.columns if c.startswith("cdl")],
            "Macro Features": [c for c in transformed_data.columns if c.endswith(("_yoy", "_qoq")) or "btc" in c.lower() or "vix" in c.lower()],
            "Target Variables": [c for c in transformed_data.columns if "future" in c and ("positive" in c or "growth" in c)],
        }
        
        print(f"\n   📈 Features created:")
        total_features = 0
        for category, features in feature_categories.items():
            print(f"      {category}: {len(features)}")
            total_features += len(features)
        print(f"      Total: {total_features} features")
        
        return transformed_data, prediction_data
        
    except Exception as e:
        print(f"   ❌ StockDataPipeline failed: {e}")
        print(f"   This could be due to:")
        print(f"   - API rate limits from yfinance/FRED")
        print(f"   - Missing TA-Lib dependencies") 
        print(f"   - Network connectivity issues")
        print(f"   - Insufficient historical data")
        raise

# Run StockDataPipeline
try:
    print("🔄 Starting StockDataPipeline transformation...")
    full_data, prediction_data = run_stock_pipeline_for_predictions(tickers)
    
    print("✅ StockDataPipeline transformation complete!")
    print(f"   Ready for model predictions: {len(prediction_data)} stocks")
    
except Exception as e:
    print(f"❌ StockDataPipeline failed: {e}")
    print("\nTroubleshooting:")
    print("1. Check internet connection")
    print("2. Verify TA-Lib is installed: pip install TA-Lib")  
    print("3. Try with fewer tickers (reduce TOP_N_PICKS)")
    print("4. Check yfinance/FRED API limits")
    raise



STEP 3: TRANSFORM DATA USING STOCKDATAPIPELINE
🔄 Starting StockDataPipeline transformation...
🚀 Running StockDataPipeline for complete feature engineering...
   Configuration:
   - Lookbacks: [1, 3, 7, 30, 90, 252, 365]
   - Horizons: [30] days
   - Binary threshold: 100%
   📊 Running complete pipeline...
      - Step 1: Fetching stock data
      - Step 2: Adding technical indicators (TA-Lib)
      - Step 3: Adding macro indicators
      - Step 4: Final validation and cleanup
STOCK MARKET DATA PIPELINE
Processing 250 tickers: NVDA, TSLA, AAPL, PLTR, MSFT, AMD, AMZN, META, GOOGL, UNH, AVGO, GOOG, MSTR, LLY, COIN, NFLX, INTC, APP, ORCL, BRK-B, JPM, COST, V, MU, PANW, CRM, SMCI, GEV, WMT, NOW, BAC, XOM, UBER, BA, MA, INTU, MRVL, JNJ, IBM, CSCO, CRWD, TXN, HD, C, CVX, AMAT, CAT, GS, QCOM, ADBE, PEP, TMO, LRCX, ANET, UNP, GE, ASML, SHOP, WFC, ACN, BKNG, PG, ISRG, MCD, PFE, TMUS, PDD, KO, MRK, PM, SBUX, TTD, ABBV, ADI, DIS, CMG, APH, T, KLAC, VST, DASH, VRTX, ETN, CSX, MELI, COF, PYPL, WDAY

[*********************100%***********************]  250 of 250 completed


Created binary target: is_positive_growth_30d_future (threshold=1.0)
Raw stock data: (2109525, 30)
Date range: 1962-01-02 00:00:00 to 2025-09-12 00:00:00
Tickers: AAPL, ABBV, ABNB, ABT, ACN, ADBE, ADI, ADP, ADSK, AEP, AJG, ALB, AMAT, AMD, AMGN, AMT, AMZN, ANET, AON, APH, APO, APP, ARM, ASML, AVGO, AXON, AXP, AZN, AZO, BA, BAC, BDX, BKNG, BLK, BMY, BRK-B, BSX, BX, C, CAH, CARR, CAT, CB, CCI, CCL, CDNS, CEG, CHTR, CI, CL, CMCSA, CME, CMG, CNC, COF, COIN, COP, COR, COST, CRM, CRWD, CSCO, CSX, CTAS, CVS, CVX, DAL, DASH, DDOG, DE, DECK, DELL, DHI, DHR, DIS, DLTR, DOW, DUK, EA, EBAY, EL, ELV, EMR, EQIX, EQT, ETN, EW, EXPE, F, FCX, FDX, FI, FICO, FSLR, FTNT, GE, GEV, GILD, GLW, GM, GOOG, GOOGL, GS, HBAN, HCA, HD, HLT, HON, HPE, HUM, HWM, IBKR, IBM, ICE, IDXX, INTC, INTU, ISRG, IT, JCI, JNJ, JPM, KDP, KEY, KHC, KKR, KLAC, KMI, KO, KR, KVUE, LEN, LHX, LIN, LLY, LMT, LOW, LRCX, LULU, MA, MAR, MCD, MCHP, MCK, MCO, MDLZ, MDT, MELI, META, MMC, MMM, MNST, MO, MPWR, MRK, MRVL, MS, MSCI, MSFT, MSI, MS

In [None]:
# =============================================================================
# STEP 4: PREPARE FOR MODEL INFERENCE USING TRAINMODEL
# =============================================================================

print("\n" + "=" * 60)
print("STEP 4: PREPARE FOR MODEL INFERENCE")
print("=" * 60)

def prepare_for_model_inference(pipeline_data,window_days=7):
    """Use TrainModel to prepare StockDataPipeline output for inference"""
    print("🤖 Using TrainModel for inference preparation...")
    
    try:
        # Create adapter and TrainModel (consistent with predictions.py)
        adapter = _TransformAdapter(pipeline_data)
        tm = TrainModel(adapter)
        
        # Prepare for inference (creates dummy variables, etc.)
        tm.prepare_dataframe(start_date="2000-01-01")
        
        print(f"   ✅ TrainModel preparation complete")
        print(f"      Shape: {tm.df_full.shape}")
        
        # Get latest data for predictions
        latest_date = tm.df_full['Date'].max()
        inference_data = tm.df_full[tm.df_full['Date'] == latest_date].copy()
        
        print(f"   📊 Inference ready: {len(inference_data)} stocks")
        
        return tm, inference_data
        
    except Exception as e:
        print(f"   ❌ TrainModel preparation failed: {e}")
        raise

try:
    # Prepare data for inference
    tm, inference_data = prepare_for_model_inference(full_data)
    
    # Load trained model
    print("\n📂 Loading trained model...")
    model, feature_cols, target_col = load_model_and_features(str(ARTIFACTS_DIR))
    
    print(f"✅ Model loaded:")
    print(f"   Type: {type(model).__name__}")
    print(f"   Features expected: {len(feature_cols)}")
    print(f"   Target: {target_col}")
    
    # Set up TrainModel for inference (same as predictions.py)
    tm.model = model
    tm._inference_feature_columns = feature_cols
    if target_col:
        tm.target_col = target_col
    
    print("✅ Model setup complete")
    
except Exception as e:
    print(f"❌ Model setup failed: {e}")
    print("Make sure you have a trained model in the artifacts directory")
    print("Run: python run_model_training.py --mode basic")
    raise


STEP 4: PREPARE FOR MODEL INFERENCE
🤖 Using TrainModel for inference preparation...
Preparing dataframe for modeling...
Defining feature sets...
Feature Set Summary:
  Growth features: 70
  Technical indicators: 56
  Technical patterns: 61
  Custom numerical: 7
  Macro features: 75
  Categorical (for dummies): 7
  Target columns: 2
  Total numerical features: 206
  Unused columns: 0
Creating dummy variables...
Created 397 dummy variables
Sample dummies: ['month_1', 'month_10', 'month_11', 'month_12', 'month_2']
Filtered data from 2000-01-01
Date range: 2000-01-03 00:00:00 to 2025-09-12 00:00:00
Temporal split created:
  train: 909,539 samples
  validation: 232,029 samples
  test: 238,866 samples
Creating ML datasets...
Total features before filtering: 603
  - Numerical: 206
  - Dummies: 397
Features after removing 'future': 603
Selected target: is_positive_growth_30d_future
ML Dataset Summary:
  Features used: 603
  Train: 909,539 samples
  Validation: 232,029 samples
  Test: 238,866 

In [60]:
# =============================================================================
# STEP 4: PREPARE FOR MODEL INFERENCE USING TRAINMODEL
# =============================================================================

print("\n" + "=" * 60)
print("STEP 4: PREPARE FOR MODEL INFERENCE")
print("=" * 60)

import pandas as pd  # ensure available for Timedelta/to_datetime

def prepare_for_model_inference(pipeline_data, window_days=7):
    """Use TrainModel to prepare StockDataPipeline output for inference."""
    print("🤖 Using TrainModel for inference preparation...")
    try:
        # Create adapter and TrainModel (consistent with predictions.py)
        adapter = _TransformAdapter(pipeline_data)
        tm = TrainModel(adapter)

        # Prepare for inference (creates dummy variables, etc.)
        tm.prepare_dataframe(start_date="2000-01-01")

        print(f"   ✅ TrainModel preparation complete")
        print(f"      Shape: {tm.df_full.shape}")

        # ---- Slice to last `window_days` calendar days (with hardening) ----
        # Safety: ensure Date is datetime
        tm.df_full['Date'] = pd.to_datetime(tm.df_full['Date'])

        latest_date = tm.df_full['Date'].max()
        week_start = latest_date - pd.Timedelta(days=window_days - 1)

        inference_data = tm.df_full[
            (tm.df_full['Date'] >= week_start) &
            (tm.df_full['Date'] <= latest_date)
        ].copy()

        if inference_data.empty:
            raise ValueError(
                f"No inference rows between {week_start.date()} and {latest_date.date()}. "
                "Check upstream dates/timezones or reduce window_days."
            )
        # --------------------------------------------------------------------

        print(f"   📊 Inference window: {week_start.date()} → {latest_date.date()}  |  Rows: {len(inference_data)}")

        return tm, inference_data

    except Exception as e:
        print(f"   ❌ TrainModel preparation failed: {e}")
        raise

try:
    # Prepare data for inference (last 7 calendar days)
    tm, inference_data = prepare_for_model_inference(full_data, window_days=7)

    # Load trained model
    print("\n📂 Loading trained model...")
    model, feature_cols, target_col = load_model_and_features(str(ARTIFACTS_DIR))

    print(f"✅ Model loaded:")
    print(f"   Type: {type(model).__name__}")
    print(f"   Features expected: {len(feature_cols)}")
    print(f"   Target: {target_col}")

    # Set up TrainModel for inference (same as predictions.py)
    tm.model = model
    tm._inference_feature_columns = feature_cols
    if target_col:
        tm.target_col = target_col

    print("✅ Model setup complete")

except Exception as e:
    print(f"❌ Model setup or prediction failed: {e}")
    print("Make sure you have a trained model in the artifacts directory")
    print("Run: python run_model_training.py --mode basic")
    raise



STEP 4: PREPARE FOR MODEL INFERENCE
🤖 Using TrainModel for inference preparation...
Preparing dataframe for modeling...
Defining feature sets...
Feature Set Summary:
  Growth features: 70
  Technical indicators: 56
  Technical patterns: 61
  Custom numerical: 7
  Macro features: 75
  Categorical (for dummies): 7
  Target columns: 2
  Total numerical features: 206
  Unused columns: 0
Creating dummy variables...
Created 397 dummy variables
Sample dummies: ['month_1', 'month_10', 'month_11', 'month_12', 'month_2']
Filtered data from 2000-01-01
Date range: 2000-01-03 00:00:00 to 2025-09-12 00:00:00
Temporal split created:
  train: 909,539 samples
  validation: 232,029 samples
  test: 238,866 samples
Creating ML datasets...
Total features before filtering: 603
  - Numerical: 206
  - Dummies: 397
Features after removing 'future': 603
Selected target: is_positive_growth_30d_future
ML Dataset Summary:
  Features used: 603
  Train: 909,539 samples
  Validation: 232,029 samples
  Test: 238,866 

In [62]:
print(f"Debug info:")
print(f"   Data shape: {tm.df_full.shape if hasattr(tm, 'df_full') else 'N/A'}")
if 'feature_cols' in locals() and hasattr(tm, 'df_full'):
    available = [f for f in feature_cols if f in tm.df_full.columns]
    missing = [f for f in feature_cols if f not in tm.df_full.columns]
    print(f"   Available features (sample): {available}")
    print(f"   Missing features (sample): {missing}")


Debug info:
   Data shape: (1380434, 620)
   Available features (sample): ['growth_1d', 'growth_3d', 'growth_7d', 'growth_30d', 'growth_90d', 'growth_252d', 'growth_365d', 'growth_btc_1d', 'growth_btc_3d', 'growth_btc_7d', 'growth_btc_30d', 'growth_btc_90d', 'growth_btc_252d', 'growth_btc_365d', 'growth_vix_1d', 'growth_vix_3d', 'growth_vix_7d', 'growth_vix_30d', 'growth_vix_90d', 'growth_vix_252d', 'growth_vix_365d', 'growth_dax_1d', 'growth_dax_3d', 'growth_dax_7d', 'growth_dax_30d', 'growth_dax_90d', 'growth_dax_252d', 'growth_dax_365d', 'growth_snp500_1d', 'growth_snp500_3d', 'growth_snp500_7d', 'growth_snp500_30d', 'growth_snp500_90d', 'growth_snp500_252d', 'growth_snp500_365d', 'growth_dji_1d', 'growth_dji_3d', 'growth_dji_7d', 'growth_dji_30d', 'growth_dji_90d', 'growth_dji_252d', 'growth_dji_365d', 'growth_epi_1d', 'growth_epi_3d', 'growth_epi_7d', 'growth_epi_30d', 'growth_epi_90d', 'growth_epi_252d', 'growth_epi_365d', 'growth_gold_1d', 'growth_gold_3d', 'growth_gold_7d', 'gr

In [63]:
# =============================================================================
# STEP 5: GENERATE PREDICTIONS
# =============================================================================

print("\n" + "=" * 60)
print("STEP 5: GENERATE PREDICTIONS")
print("=" * 60)

try:
    # Generate predictions using TrainModel.make_inference (same as predictions.py)
    print("🎯 Generating predictions using TrainModel.make_inference()...")
    prediction_results = tm.make_inference("realtime_probs")
    print(f"✅ Predictions generated!")

    # Use the same 7-day inference window prepared in STEP 4
    # (tm.df_full already contains the column added by make_inference)
    if 'realtime_probs' not in tm.df_full.columns:
        raise ValueError("Prediction column 'realtime_probs' not found after make_inference().")

    # Limit to the window you prepared earlier
    latest_date = inference_data['Date'].max()
    week_start = inference_data['Date'].min()
    prediction_data = tm.df_full[
        (tm.df_full['Date'] >= week_start) &
        (tm.df_full['Date'] <= latest_date)
    ].copy()

    # Keep only rows that have probabilities
    prediction_data = prediction_data.loc[prediction_data['realtime_probs'].notna()].copy()

    if prediction_data.empty:
        raise ValueError(
            f"No rows with 'realtime_probs' between {week_start.date()} and {latest_date.date()}."
        )

    # Add derived columns
    probs = prediction_data['realtime_probs'].astype(float)
    prediction_data['probability'] = probs
    prediction_data['prediction'] = (probs >= 0.5).astype(int)

    # Rank within each Date (highest prob = rank 1)
    prediction_data['rank'] = prediction_data.groupby('Date')['probability'].rank(ascending=False, method='first')
    prediction_data['percentile'] = prediction_data.groupby('Date')['probability'].rank(pct=True)

    # --- Summary prints ---
    print("   📊 Window summary:")
    print(f"      Dates: {week_start.date()} → {latest_date.date()} "
          f"({prediction_data['Date'].nunique()} trading day(s))")
    print(f"      Rows with probs: {len(prediction_data)}")
    print(f"      Probability range: {probs.min():.3f} to {probs.max():.3f}")
    print(f"      Mean probability: {probs.mean():.3f}")
    print(f"      Std deviation: {probs.std():.3f}")

    # Latest-day quick view (preserves your old behavior)
    latest_slice = prediction_data[prediction_data['Date'] == latest_date].copy()
    positive_preds = latest_slice['prediction'].sum()
    print(f"   📅 Latest day: {latest_date.date()} | rows: {len(latest_slice)}")
    print(f"      Positive predictions (>50%): {positive_preds}/{len(latest_slice)}")

    # Feature alignment info (same as before)
    available_features = [f for f in feature_cols if f in tm.df_full.columns]
    print(f"      Feature alignment: {len(available_features)}/{len(feature_cols)} "
          f"({len(available_features)/len(feature_cols):.1%})")

    # If you need these dataframes elsewhere, they are:
    # - prediction_data: full 7-day window with probs/preds/ranks
    # - latest_slice   : latest day only
    # (Return or save as needed in your workflow.)

except Exception as e:
    print(f"❌ Prediction generation failed: {e}")
    # Debug info
    if 'tm' in locals():
        print("Debug info:")
        print(f"   Data shape: {tm.df_full.shape if hasattr(tm, 'df_full') else 'N/A'}")
        if 'feature_cols' in locals() and hasattr(tm, 'df_full'):
            available = [f for f in feature_cols[:10] if f in tm.df_full.columns]
            missing = [f for f in feature_cols[:10] if f not in tm.df_full.columns]
            print(f"   Available features (sample): {available}")
            print(f"   Missing features (sample): {missing}")
    raise



STEP 5: GENERATE PREDICTIONS
🎯 Generating predictions using TrainModel.make_inference()...
Generated predictions 'realtime_probs' and 'realtime_probs_rank'
✅ Predictions generated!
   📊 Window summary:
      Dates: 2025-09-08 → 2025-09-12 (5 trading day(s))
      Rows with probs: 1250
      Probability range: 0.490 to 0.544
      Mean probability: 0.519
      Std deviation: 0.014
   📅 Latest day: 2025-09-12 | rows: 250
      Positive predictions (>50%): 250/250
      Feature alignment: 603/603 (100.0%)


In [65]:
prediction_data.head()

Unnamed: 0,Date,Open,High,Low,Close,adj_close,Volume,Ticker,year,month,...,month_wom_September_w3,month_wom_September_w4,month_wom_September_w5,split,realtime_probs,realtime_probs_rank,probability,prediction,rank,percentile
11268,2025-09-08,239.300003,240.149994,236.339996,237.880005,237.880005,48999500.0,AAPL,2025,9,...,0,0,0,test,0.529511,240.0,0.529511,1,240.0,0.044
11269,2025-09-09,237.0,238.779999,233.360001,234.350006,234.350006,66313900.0,AAPL,2025,9,...,0,0,0,test,0.531044,248.0,0.531044,1,248.0,0.012
11270,2025-09-10,232.190002,232.419998,225.949997,226.789993,226.789993,83440800.0,AAPL,2025,9,...,0,0,0,test,0.5116,207.0,0.5116,1,207.0,0.176
11271,2025-09-11,226.880005,230.449997,226.649994,230.029999,230.029999,50208600.0,AAPL,2025,9,...,0,0,0,test,0.494642,227.0,0.494642,0,227.0,0.096
11272,2025-09-12,229.220001,234.509995,229.020004,234.070007,234.070007,55776500.0,AAPL,2025,9,...,0,0,0,test,0.509812,242.0,0.509812,1,242.0,0.036


In [66]:
# =============================================================================
# STEP 6: CREATE TRADING SIGNALS AND DECISIONS
# =============================================================================

print("\n" + "=" * 60)
print("STEP 6: TRADING SIGNALS & DECISIONS")
print("=" * 60)

def create_trading_signals(data):
    """Create trading signals based on prediction probabilities"""
    df = data.copy()
    
    # Calculate dynamic thresholds
    high_conf = df['probability'].quantile(0.90)  # Top 10%
    med_conf = df['probability'].quantile(0.75)   # Top 25%
    low_conf = df['probability'].quantile(0.60)   # Top 40%
    
    print(f"📊 Dynamic thresholds:")
    print(f"   High confidence (top 10%): {high_conf:.3f}")
    print(f"   Medium confidence (top 25%): {med_conf:.3f}")
    print(f"   Low confidence (top 40%): {low_conf:.3f}")
    
    # Create signals
    conditions = [
        (df['probability'] >= high_conf),
        (df['probability'] >= med_conf),
        (df['probability'] >= low_conf),
        (df['probability'] >= 0.5)
    ]
    
    choices = ['🟢 STRONG BUY', '🟡 BUY', '🟠 CONSIDER', '🔵 WEAK BUY']
    df['signal'] = np.select(conditions, choices, default='🔴 PASS')
    
    # Investment recommendations
    df['investment'] = 0
    df.loc[df['signal'].str.contains('STRONG'), 'investment'] = INVESTMENT_AMOUNT * 1.5
    df.loc[df['signal'] == '🟡 BUY', 'investment'] = INVESTMENT_AMOUNT
    df.loc[df['signal'].str.contains('CONSIDER'), 'investment'] = INVESTMENT_AMOUNT * 0.5
    
    return df

def create_top_picks_analysis(data, top_n=TOP_N_PICKS):
    """Analyze top picks and create actionable recommendations"""
    
    # Sort by probability
    top_picks = data.nlargest(top_n, 'probability')
    
    print(f"\n🎯 TOP {top_n} PICKS FOR {data['Date'].iloc[0].strftime('%Y-%m-%d')}:")
    print("=" * 80)
    
    # Create display
    display_data = []
    for i, (_, stock) in enumerate(top_picks.iterrows(), 1):
        
        display_data.append({
            'Rank': i,
            'Ticker': stock['Ticker'],
            'Probability': f"{stock['probability']:.1%}",
            'Signal': stock['signal'],
            'Investment': f"${int(stock['investment']):,}" if stock['investment'] > 0 else "-",
            'Price': f"${stock.get('Close', 0):.2f}" if 'Close' in stock and stock.get('Close', 0) > 0 else "N/A"
        })
    
    display_df = pd.DataFrame(display_data)
    print(display_df.to_string(index=False))
    
    return top_picks

def create_action_plan(top_picks):
    """Create executable trading action plan"""
    
    print(f"\n📋 ACTION PLAN:")
    print("=" * 60)
    
    # Group by signal type
    strong_buys = top_picks[top_picks['signal'].str.contains('STRONG')]
    buys = top_picks[top_picks['signal'] == '🟡 BUY']
    considers = top_picks[top_picks['signal'].str.contains('CONSIDER')]
    
    total_investment = 0
    
    if len(strong_buys) > 0:
        print(f"🟢 IMMEDIATE ACTION ({len(strong_buys)} stocks):")
        for _, stock in strong_buys.iterrows():
            investment = int(stock['investment'])
            total_investment += investment
            print(f"   • {stock['Ticker']:6s} - ${investment:,} (prob: {stock['probability']:.1%})")
    
    if len(buys) > 0:
        print(f"\n🟡 SECONDARY TARGETS ({len(buys)} stocks):")
        for _, stock in buys.iterrows():
            investment = int(stock['investment'])
            total_investment += investment
            print(f"   • {stock['Ticker']:6s} - ${investment:,} (prob: {stock['probability']:.1%})")
    
    if len(considers) > 0:
        print(f"\n🟠 WATCH LIST ({len(considers)} stocks):")
        for _, stock in considers.iterrows():
            print(f"   • {stock['Ticker']:6s} - Monitor (prob: {stock['probability']:.1%})")
    
    print(f"\n💰 TOTAL INVESTMENT RECOMMENDED: ${total_investment:,}")
    
    return {
        'strong_buys': strong_buys,
        'buys': buys, 
        'considers': considers,
        'total_investment': total_investment
    }

# Generate trading signals and analysis
try:
    # Create signals
    prediction_data = create_trading_signals(prediction_data)
    
    # Analyze top picks
    top_picks = create_top_picks_analysis(prediction_data, TOP_N_PICKS)
    
    # Create action plan
    action_plan = create_action_plan(top_picks)
    
    # Summary statistics
    print(f"\n📊 SUMMARY:")
    print("=" * 40)
    print(f"   Analysis date: {prediction_data['Date'].iloc[0].strftime('%Y-%m-%d')}")
    print(f"   Stocks analyzed: {len(prediction_data)}")
    print(f"   Strong buy signals: {len(action_plan['strong_buys'])}")
    print(f"   Buy signals: {len(action_plan['buys'])}")
    print(f"   Watch list: {len(action_plan['considers'])}")
    print(f"   Total capital needed: ${action_plan['total_investment']:,}")
    
except Exception as e:
    print(f"❌ Signal generation failed: {e}")
    raise


STEP 6: TRADING SIGNALS & DECISIONS
📊 Dynamic thresholds:
   High confidence (top 10%): 0.537
   Medium confidence (top 25%): 0.534
   Low confidence (top 40%): 0.524

🎯 TOP 10 PICKS FOR 2025-09-08:
 Rank Ticker Probability       Signal Investment    Price
    1    DOW       54.4% 🟢 STRONG BUY     $1,500   $23.98
    2   KVUE       54.3% 🟢 STRONG BUY     $1,500   $18.87
    3   KVUE       54.2% 🟢 STRONG BUY     $1,500   $18.43
    4    DOW       54.1% 🟢 STRONG BUY     $1,500   $24.14
    5    PFE       54.1% 🟢 STRONG BUY     $1,500   $24.71
    6    BLK       54.1% 🟢 STRONG BUY     $1,500 $1105.67
    7   DELL       54.1% 🟢 STRONG BUY     $1,500  $121.29
    8    COR       54.1% 🟢 STRONG BUY     $1,500  $297.86
    9     PM       54.0% 🟢 STRONG BUY     $1,500  $164.74
   10   AXON       54.0% 🟢 STRONG BUY     $1,500  $731.98

📋 ACTION PLAN:
🟢 IMMEDIATE ACTION (10 stocks):
   • DOW    - $1,500 (prob: 54.4%)
   • KVUE   - $1,500 (prob: 54.3%)
   • KVUE   - $1,500 (prob: 54.2%)
   • DOW 

In [57]:
# =============================================================================
# SAVE RESULTS AND FINAL SUMMARY
# =============================================================================

print("\n" + "=" * 60)
print("SAVE RESULTS")
print("=" * 60)

try:
    # Create results directory
    RESULTS_DIR.mkdir(exist_ok=True)
    
    # Timestamp for files
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    
    # Save predictions
    predictions_file = RESULTS_DIR / f"realtime_predictions_{timestamp}.csv"
    save_cols = ['Date', 'Ticker', 'probability', 'signal', 'investment']
    if 'Close' in prediction_data.columns:
        save_cols.append('Close')
    
    prediction_data[save_cols].to_csv(predictions_file, index=False)
    
    # Save action plan
    action_file = RESULTS_DIR / f"action_plan_{timestamp}.txt"
    with open(action_file, 'w') as f:
        f.write(f"Trading Action Plan - {datetime.now().strftime('%Y-%m-%d %H:%M')}\n")
        f.write("=" * 60 + "\n\n")
        
        f.write("STRONG BUY:\n")
        for _, stock in action_plan['strong_buys'].iterrows():
            f.write(f"  {stock['Ticker']} - ${int(stock['investment']):,} ({stock['probability']:.1%})\n")
        
        f.write("\nBUY:\n")
        for _, stock in action_plan['buys'].iterrows():
            f.write(f"  {stock['Ticker']} - ${int(stock['investment']):,} ({stock['probability']:.1%})\n")
        
        f.write(f"\nTotal Investment: ${action_plan['total_investment']:,}\n")
    
    print(f"💾 Results saved:")
    print(f"   Predictions: {predictions_file}")
    print(f"   Action plan: {action_file}")
    
except Exception as e:
    print(f"⚠️ Could not save results: {e}")

# =============================================================================
# FINAL SUMMARY
# =============================================================================

print("\n" + "=" * 60)
print("🚀 REAL-TIME ANALYSIS COMPLETE")
print("=" * 60)

if 'action_plan' in locals():
    print(f"✅ Analysis successful!")
    print(f"📅 Data date: {prediction_data['Date'].iloc[0].strftime('%Y-%m-%d')}")
    print(f"🎯 Stocks analyzed: {len(prediction_data)}")
    print(f"💰 Total recommendations: ${action_plan['total_investment']:,}")
    
    if len(action_plan['strong_buys']) > 0:
        best_pick = action_plan['strong_buys'].iloc[0]
        print(f"\n🏆 TOP RECOMMENDATION:")
        print(f"   {best_pick['Ticker']} - {best_pick['probability']:.1%} confidence")
        print(f"   Investment: ${int(best_pick['investment']):,}")
    
    print(f"\n⚠️ IMPORTANT REMINDERS:")
    print("• Set stop losses at -15% to -20%")
    print("• Don't invest more than 5-10% per position")
    print("• Monitor positions daily")
    print("• This is based on historical patterns only")
    
    print(f"\n🔄 NEXT STEPS:")
    print("1. Execute strong buy signals")
    print("2. Set stop loss orders")
    print("3. Monitor throughout trading day")
    print("4. Re-run notebook daily for fresh signals")
    
else:
    print("❌ Analysis incomplete - check errors above")

print(f"\n📊 Performance tracking:")
print(f"   Results saved in: {RESULTS_DIR}")
print(f"   Track actual vs predicted outcomes")
print(f"   Adjust model/thresholds based on results")


SAVE RESULTS
💾 Results saved:
   Predictions: /Users/sagardhal/Desktop/Practice/personal-stock/results/realtime_predictions_20250913_1338.csv
   Action plan: /Users/sagardhal/Desktop/Practice/personal-stock/results/action_plan_20250913_1338.txt

🚀 REAL-TIME ANALYSIS COMPLETE
✅ Analysis successful!
📅 Data date: 2025-09-12
🎯 Stocks analyzed: 250
💰 Total recommendations: $15,000

🏆 TOP RECOMMENDATION:
   KVUE - 52.3% confidence
   Investment: $1,500

⚠️ IMPORTANT REMINDERS:
• Set stop losses at -15% to -20%
• Don't invest more than 5-10% per position
• Monitor positions daily
• This is based on historical patterns only

🔄 NEXT STEPS:
1. Execute strong buy signals
2. Set stop loss orders
3. Monitor throughout trading day
4. Re-run notebook daily for fresh signals

📊 Performance tracking:
   Results saved in: /Users/sagardhal/Desktop/Practice/personal-stock/results
   Track actual vs predicted outcomes
   Adjust model/thresholds based on results


In [14]:
# Load model
try:
    model, feature_cols, target_col = load_model_and_features(str(ARTIFACTS_DIR))
    tm.model = model
    tm._inference_feature_columns = feature_cols
    if target_col:
        tm.target_col = target_col
    
    print(f"✅ Model loaded successfully")
    print(f"   Features: {len(feature_cols)}")
    print(f"   Target: {tm.target_col}")
    
    # Check feature availability
    available_features = [f for f in feature_cols if f in tm.df_full.columns]
    missing_features = [f for f in feature_cols if f not in tm.df_full.columns]
    
    feature_coverage = len(available_features) / len(feature_cols)
    print(f"   Feature coverage: {feature_coverage:.1%} ({len(available_features)}/{len(feature_cols)})")
    
    if feature_coverage < 0.8:
        print(f"⚠️ Warning: Low feature coverage ({feature_coverage:.1%})")
        print("Model and data may be incompatible")
    
except Exception as e:
    print(f"❌ Model loading failed: {e}")
    raise


[load_model_and_features] Using model file: best_rf_model.joblib
[load_model_and_features] feature_names_in_: 603 features
✅ Model loaded successfully
   Features: 603
   Target: is_positive_growth_30d_future
   Feature coverage: 100.0% (603/603)


In [16]:
# Step 4: Generate Predictions (only if everything loaded)
#if not missing_requirements and 'tm' in locals() and 'model' in locals():
print("\n" + "=" * 50)
print("STEP 4: GENERATE PREDICTIONS")
print("=" * 50)

# Create prediction comparator
comparator = PredictionComparator(tm.df_full, tm.target_col)

# Add manual rule-based predictions
try:
    comparator.add_manual_predictions()
    print("✅ Manual predictions created")
except Exception as e:
    print(f"⚠️ Manual predictions failed: {e}")

# Add ML predictions
try:
    comparator.add_ml_predictions(model, feature_cols)
    print("✅ ML predictions created")
    ml_success = True
except Exception as e:
    print(f"⚠️ ML predictions failed: {e}")
    # Create fallback probability column
    comparator.df['rf_prob_30d'] = 0.5
    ml_success = False

# Add additional strategies if ML worked
if ml_success and 'rf_prob_30d' in comparator.df.columns:
    try:
        comparator.add_ml_thresholds_from_validation("rf_prob_30d")
        print("✅ Adaptive thresholds created")
    except Exception as e:
        print(f"⚠️ Adaptive thresholds failed: {e}")
    
    try:
        comparator.add_daily_topn(proba_col="rf_prob_30d", n=3)
        comparator.add_daily_topn(proba_col="rf_prob_30d", n=5)
        print("✅ Top-K strategies created")
    except Exception as e:
        print(f"⚠️ Top-K strategies failed: {e}")

print(f"📊 Total strategies created: {len(comparator.prediction_cols)}")


STEP 4: GENERATE PREDICTIONS
Creating manual rule-based predictions...
Manual prediction summary:
  pred0_manual_cci: 2.5% positive predictions
  pred1_manual_prev_g1: 58.2% positive predictions
  pred2_manual_prev_g1_and_snp: 0.0% positive predictions
  pred3_manual_declining_rates: 47.6% positive predictions
  pred4_manual_fed_easing: 40.5% positive predictions
  pred5_manual_vix_contrarian: 18.9% positive predictions
  pred6_manual_stock_btc_momentum: 0.2% positive predictions
✅ Manual predictions created
[add_ml_predictions] Non-finite detected in: ['growth_1d', 'growth_3d', 'growth_7d', 'growth_30d', 'growth_90d', 'growth_252d', 'growth_365d', 'growth_btc_1d', 'growth_btc_3d', 'growth_btc_7d', 'growth_btc_30d', 'growth_btc_90d']...
ML prediction summary:
  pred10_rf_thresh_21: 100.0% positive predictions
  pred11_rf_thresh_50: 92.6% positive predictions
  pred12_rf_thresh_65: 20.4% positive predictions
  pred13_rf_thresh_80: 2.1% positive predictions
  pred14_rf_thresh_90: 0.1% p

In [17]:
print("\n" + "=" * 50)
print("STEP 5: ANALYZE YOUR TICKERS")
print("=" * 50)

# Get latest date and filter to your tickers
latest_date = comparator.df['Date'].max()
print(f"📅 Latest data date: {latest_date}")

# Filter to your tickers and recent data
your_data = comparator.df[
    (comparator.df['Ticker'].isin(tickers)) & 
    (comparator.df['Date'] >= latest_date - timedelta(days=7))
].copy()

print(f"📊 Your tickers in recent data: {your_data['Ticker'].nunique()}/{len(tickers)}")

if len(your_data) == 0:
    print("❌ No data found for your tickers in recent period")
    print("Your tickers might not be in the processed dataset")
    available_tickers = comparator.df['Ticker'].unique()[:20]
    print(f"Available tickers (sample): {', '.join(available_tickers)}")
else:
    # Get most recent data for each ticker
    latest_by_ticker = your_data.loc[your_data.groupby('Ticker')['Date'].idxmax()]
    
    # Sort by prediction probability
    prob_col = 'rf_prob_30d' if 'rf_prob_30d' in latest_by_ticker.columns else None
    if prob_col and latest_by_ticker[prob_col].std() > 0:
        latest_by_ticker = latest_by_ticker.sort_values(prob_col, ascending=False)
        prob_source = "ML Model"
    else:
        # Fallback to manual prediction
        manual_cols = [c for c in latest_by_ticker.columns if c.startswith('pred') and 'manual' in c]
        if manual_cols:
            prob_col = manual_cols[0]
            latest_by_ticker = latest_by_ticker.sort_values(prob_col, ascending=False)
            prob_source = "Manual Rules"
        else:
            prob_col = None
            prob_source = "None"
    
    # Top picks
    top_picks = latest_by_ticker.head(TOP_N_PICKS)
    
    print(f"\n🎯 TOP {len(top_picks)} PICKS (sorted by {prob_source}):")
    print("-" * 60)
    
    for i, (_, stock) in enumerate(top_picks.iterrows(), 1):
        ticker = stock['Ticker']
        date = stock['Date'].strftime('%Y-%m-%d')
        
        if prob_col and prob_col == 'rf_prob_30d':
            prob_value = f"{stock[prob_col]*100:.1f}%"
        elif prob_col:
            prob_value = f"{stock[prob_col]:.0f}"
        else:
            prob_value = "N/A"
        
        print(f"{i:2d}. {ticker:6s} | Prob: {prob_value:6s} | Date: {date} | Investment: ${INVESTMENT_AMOUNT:,}")



STEP 5: ANALYZE YOUR TICKERS
📅 Latest data date: 2025-09-03 00:00:00
📊 Your tickers in recent data: 0/250
❌ No data found for your tickers in recent period
Your tickers might not be in the processed dataset
Available tickers (sample): AAPL, ADBE, AMAT, AMD, AMZN, APP, AVGO, BA, BAC, BRK-B, C, CAT, COIN, COST, CRM, CRWD, CSCO, CVX, GEV, GOOG


In [22]:
# Step 5: Analyze Your Tickers
print("\n" + "=" * 50)
print("STEP 5: ANALYZE YOUR TICKERS")
print("=" * 50)

# Get latest date and filter to your tickers
latest_date = comparator.df['Date'].max()
print(f"📅 Latest data date: {latest_date}")

# Filter to your tickers and recent data
your_data = comparator.df[
    (comparator.df['Ticker'].isin(tickers)) & 
    (comparator.df['Date'] >= latest_date - timedelta(days=7))
].copy()

print(f"📊 Your tickers in recent data: {your_data['Ticker'].nunique()}/{len(tickers)}")

if len(your_data) == 0:
    print("❌ No data found for your tickers in recent period")
    print("Your tickers might not be in the processed dataset")
    available_tickers = comparator.df['Ticker'].unique()[:20]
    print(f"Available tickers (sample): {', '.join(available_tickers)}")
else:
    # Get most recent data for each ticker
    latest_by_ticker = your_data.loc[your_data.groupby('Ticker')['Date'].idxmax()]
    
    # Sort by prediction probability
    prob_col = 'rf_prob_30d' if 'rf_prob_30d' in latest_by_ticker.columns else None
    if prob_col and latest_by_ticker[prob_col].std() > 0:
        latest_by_ticker = latest_by_ticker.sort_values(prob_col, ascending=False)
        prob_source = "ML Model"
    else:
        # Fallback to manual prediction
        manual_cols = [c for c in latest_by_ticker.columns if c.startswith('pred') and 'manual' in c]
        if manual_cols:
            prob_col = manual_cols[0]
            latest_by_ticker = latest_by_ticker.sort_values(prob_col, ascending=False)
            prob_source = "Manual Rules"
        else:
            prob_col = None
            prob_source = "None"
    
    # Top picks
    top_picks = latest_by_ticker.head(TOP_N_PICKS)
    
    print(f"\n🎯 TOP {len(top_picks)} PICKS (sorted by {prob_source}):")
    print("-" * 60)
    
    for i, (_, stock) in enumerate(top_picks.iterrows(), 1):
        ticker = stock['Ticker']
        date = stock['Date'].strftime('%Y-%m-%d')
        
        if prob_col and prob_col == 'rf_prob_30d':
            prob_value = f"{stock[prob_col]*100:.1f}%"
        elif prob_col:
            prob_value = f"{stock[prob_col]:.0f}"
        else:
            prob_value = "N/A"
        
        print(f"{i:2d}. {ticker:6s} | Prob: {prob_value:6s} | Date: {date} | Investment: ${INVESTMENT_AMOUNT:,}")



STEP 5: ANALYZE YOUR TICKERS
📅 Latest data date: 2025-09-03 00:00:00
📊 Your tickers in recent data: 0/250
❌ No data found for your tickers in recent period
Your tickers might not be in the processed dataset
Available tickers (sample): AAPL, ADBE, AMAT, AMD, AMZN, APP, AVGO, BA, BAC, BRK-B, C, CAT, COIN, COST, CRM, CRWD, CSCO, CVX, GEV, GOOG


In [23]:
# Step 6: Decision Matrix
print("\n" + "=" * 50)
print("STEP 6: DECISION MATRIX")
print("=" * 50)

decisions = []
total_strong_buy = 0
total_buy = 0

for _, stock in top_picks.iterrows():
    ticker = stock['Ticker']
    
    # Determine probability and action
    if prob_col == 'rf_prob_30d':
        prob = stock[prob_col]
        prob_display = f"{prob*100:.1f}%"
        
        if prob >= 0.8:
            action = "🟢 STRONG BUY"
            total_strong_buy += 1
        elif prob >= 0.7:
            action = "🟡 BUY"
            total_buy += 1
        elif prob >= 0.6:
            action = "🟠 CONSIDER"
        else:
            action = "🔴 WAIT"
    else:
        prob_display = "Manual"
        action = "🟡 BUY" if stock.get(prob_col, 0) > 0 else "🔴 WAIT"
        if action == "🟡 BUY":
            total_buy += 1
    
    decisions.append({
        'Rank': len(decisions) + 1,
        'Ticker': ticker,
        'Signal': prob_display,
        'Action': action,
        'Investment': f"${INVESTMENT_AMOUNT:,}"
    })

# Display decision table
decision_df = pd.DataFrame(decisions)
print(decision_df.to_string(index=False))

# Summary
print(f"\n💰 INVESTMENT SUMMARY:")
print(f"   Strong Buy signals: {total_strong_buy}")
print(f"   Buy signals: {total_buy}")
total_positions = total_strong_buy + total_buy
total_investment = total_positions * INVESTMENT_AMOUNT
print(f"   Total positions: {total_positions}")
print(f"   Total investment: ${total_investment:,}")

# Action plan
print(f"\n📋 ACTION PLAN:")
strong_buys = [d['Ticker'] for d in decisions if 'STRONG' in d['Action']]
buys = [d['Ticker'] for d in decisions if d['Action'] == '🟡 BUY']

if strong_buys:
    print(f"🟢 IMMEDIATE: Buy {', '.join(strong_buys)}")
if buys:
    print(f"🟡 SECONDARY: Consider {', '.join(buys)}")
if not strong_buys and not buys:
    print("🔴 WAIT: No strong signals today")


STEP 6: DECISION MATRIX


NameError: name 'top_picks' is not defined

In [None]:
# Step 7: Save Results
print("\n" + "=" * 50)
print("STEP 7: SAVE RESULTS")
print("=" * 50)

# Create results directory
RESULTS_DIR.mkdir(exist_ok=True)

# Save with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
results_file = RESULTS_DIR / f"notebook_decisions_{timestamp}.csv"

# Create tracking dataframe
tracking_df = pd.DataFrame(decisions)
tracking_df['Analysis_Date'] = datetime.now()
tracking_df['Data_Date'] = latest_date
tracking_df['Model_Source'] = prob_source
tracking_df['Ticker_Count'] = len(tickers)

# Save
tracking_df.to_csv(results_file, index=False)
print(f"💾 Results saved: {results_file}")

# Also save just the buy signals for easy reference
buy_signals = tracking_df[tracking_df['Action'].str.contains('BUY')]
if len(buy_signals) > 0:
    buy_file = RESULTS_DIR / f"buy_signals_{timestamp}.csv"
    buy_signals[['Ticker', 'Action', 'Investment']].to_csv(buy_file, index=False)
    print(f"💾 Buy signals saved: {buy_file}")

# Final Summary
print("\n" + "=" * 50)
print("ANALYSIS COMPLETE")
print("=" * 50)

if missing_requirements:
print("❌ Analysis incomplete due to missing requirements")
print("Complete the setup steps above and restart")
elif 'decisions' in locals():
print("✅ Analysis complete!")
print(f"📊 Analyzed {len(top_picks)} stocks from {len(tickers)} tickers")
print(f"💰 Investment recommendations: ${total_investment:,}")
print(f"📅 Based on data through: {latest_date}")

print(f"\n🔄 To refresh analysis:")
print("1. Update data: python run_data_extraction.py")
print("2. Retrain model (optional): python run_model_training.py")
print("3. Re-run this notebook")
else:
print("⚠️ Analysis incomplete - check errors above")

print(f"\n📝 Remember:")
print("• Set stop losses at -15% to -20%")
print("• Monitor positions daily")
print("• Diversify - don't put more than 5-10% in any single position")
print("• Past performance doesn't guarantee future results")