# AI Stock Investment Tool - Colab Training

Train and compare all model types (including Hybrid Multi-Modal) on GPU, then serve the API for your frontend.

**Before starting:**
1. Go to **Runtime > Change runtime type > T4 GPU**
2. Run cells **in order** from top to bottom
3. If repo is private, you'll need a [GitHub Personal Access Token](https://github.com/settings/tokens) with `repo` scope

**Sections:**
- 1-2: Setup & environment
- 3: Fetch data & build features (50-ticker universe)
- 4-6: Configure, train, and compare all models (+ dashboard)
- 7: Save results to Drive
- 8: Optuna hyperparameter search (optional)
- 9: Export production artifacts (best model)
- 9b: Prediction visualizations & model scorecard
- 10: Serve API via ngrok (connect to frontend)
- 11: Download artifacts to local machine

## 1. Setup

In [None]:
!pip install -q yfinance lightgbm torch optuna pyarrow scikit-learn scipy pandas numpy matplotlib feedparser pyngrok

In [None]:
import os
os.chdir("/content")

# Clean previous clone if any
!rm -rf AI-stock-investment-tool

# Try public clone first, fall back to token auth
REPO = "https://github.com/kevin6598/AI-stock-investment-tool.git"
ret = os.system("git clone %s 2>/dev/null" % REPO)

if ret != 0:
    from getpass import getpass
    print("Public clone failed -- repo is private.")
    print("Create a token at: https://github.com/settings/tokens (repo scope)")
    token = getpass("Paste your GitHub token: ")
    os.system("git clone https://%s@github.com/kevin6598/AI-stock-investment-tool.git" % token)
    del token

os.chdir("/content/AI-stock-investment-tool")
print("Working dir: %s" % os.getcwd())
!git log --oneline -3

In [None]:
import torch, sys
print(f"Python: {sys.version}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
else:
    print("WARNING: No GPU detected. Go to Runtime > Change runtime type > T4 GPU")

## 2. Mount Google Drive & Load Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

# Configure paths -- artifacts persist on Google Drive across sessions
DRIVE_DIR = "/content/drive/MyDrive/ai_stock_tool"
os.makedirs(DRIVE_DIR, exist_ok=True)

DATA_PATH = os.path.join(DRIVE_DIR, "dataset.parquet")
OUTPUT_DIR = os.path.join(DRIVE_DIR, "models_registry")
ARTIFACT_DIR = os.path.join(DRIVE_DIR, "artifacts")
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(ARTIFACT_DIR, exist_ok=True)

print("Drive dir:    %s" % DRIVE_DIR)
print("Data path:    %s" % DATA_PATH)
print("Output dir:   %s" % OUTPUT_DIR)
print("Artifact dir: %s" % ARTIFACT_DIR)

## 3. Fetch Data & Build Features

Fetches 50-ticker universe with ticker embeddings (needed for Hybrid Multi-Modal).
If you already built the dataset, skip Option A and use Option B to load from Drive.

In [None]:
# === Option A: Build dataset from scratch (50-ticker universe) ===

TICKERS = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "TSLA", "BRK-B", "JPM", "JNJ",
    "V", "PG", "UNH", "HD", "MA", "DIS", "PYPL", "BAC", "NFLX", "ADBE",
    "CRM", "CMCSA", "XOM", "VZ", "KO", "INTC", "PEP", "ABT", "CSCO", "TMO",
    "COST", "MRK", "WMT", "AVGO", "ACN", "CVX", "NKE", "LLY", "MCD", "TXN",
    "QCOM", "DHR", "UPS", "BMY", "PM", "LIN", "NEE", "ORCL", "RTX", "HON",
]
PERIOD = "5y"
FORWARD_HORIZONS = [21, 63, 126]  # 1M, 3M, 6M

from data.stock_api import get_historical_data, get_stock_info
from training.feature_engineering import (
    build_panel_dataset, cross_sectional_normalize, add_ticker_embedding_column,
)

print("Fetching stock data for %d tickers..." % len(TICKERS))
stock_dfs = {}
stock_infos = {}
for ticker in TICKERS:
    try:
        df = get_historical_data(ticker, period=PERIOD)
        if not df.empty and len(df) > 300:
            stock_dfs[ticker] = df
            stock_infos[ticker] = get_stock_info(ticker) or {}
            print("  %s: %d rows" % (ticker, len(df)))
    except Exception as e:
        print("  WARN: %s failed: %s" % (ticker, e))

market_df = get_historical_data("SPY", period=PERIOD)
print("  SPY (market): %d rows" % len(market_df))

valid_tickers = sorted(stock_dfs.keys())
print("\nValid tickers: %d / %d" % (len(valid_tickers), len(TICKERS)))

print("Building features...")
panel = build_panel_dataset(stock_dfs, stock_infos, market_df, FORWARD_HORIZONS)
panel = cross_sectional_normalize(panel)
panel, ticker_to_id = add_ticker_embedding_column(panel, valid_tickers)
print("Panel shape: %s" % str(panel.shape))

# Save to Drive
panel.to_parquet(DATA_PATH)
print("\nSaved to %s" % DATA_PATH)

In [None]:
# === Option B: Load existing dataset from Drive ===

import pandas as pd

panel = pd.read_parquet(DATA_PATH)
valid_tickers = panel.index.get_level_values(1).unique().tolist()
print("Loaded panel: %s" % str(panel.shape))
print("Tickers (%d): %s" % (len(valid_tickers), valid_tickers))
print("Date range: %s to %s" % (
    panel.index.get_level_values(0).min(),
    panel.index.get_level_values(0).max(),
))

In [None]:
# --- Dual Sentiment Engine Demo ---
# Shows both FinBERT and Sentence Embedding outputs + IC validation

from training.feature_engineering import validate_sentiment_ic
import pandas as pd

# Run IC validation on the panel's NLP features
nlp_cols = [c for c in panel.columns if c.startswith("nlp_")]
print("NLP sentiment features (%d):" % len(nlp_cols))
for c in nlp_cols:
    print("  %s" % c)

# Validate IC against 1M forward returns
TARGET_COL_IC = "fwd_return_21d"
if TARGET_COL_IC in panel.columns and nlp_cols:
    # Use a representative subset for IC computation
    sample_panel = panel.dropna(subset=[TARGET_COL_IC])
    validated_panel, ic_report = validate_sentiment_ic(
        sample_panel[nlp_cols].head(10000),
        sample_panel[TARGET_COL_IC].head(10000),
        ic_threshold=0.01,
    )

    print("\nPer-feature IC values:")
    print("-" * 50)
    for feat, ic_val in sorted(ic_report.items(), key=lambda x: abs(x[1]), reverse=True):
        status = "PASS" if abs(ic_val) >= 0.01 else "FAIL"
        print("  %s: IC=%.4f [%s]" % (feat.ljust(30), ic_val, status))

    n_pass = sum(1 for v in ic_report.values() if abs(v) >= 0.01)
    print("\nSummary: %d/%d features passed IC threshold" % (n_pass, len(ic_report)))
else:
    print("Skipping IC validation (target or NLP cols not available)")

## 3b. Dual Sentiment Engine & IC Validation

Run the dual sentiment engine (FinBERT + Sentence Embedding) on a sample ticker
and validate per-feature IC against forward returns.

## 4. Configure Models

In [None]:
from training.model_config import ModelConfig, ConfigGrid

# All 5 model types in one comparison
configs = [
    ModelConfig(model_type="elastic_net", learning_rate=0.1, epochs=1),
    ModelConfig(model_type="lightgbm", learning_rate=0.05, epochs=500,
                extra_params={"num_leaves": 31, "max_depth": 6}),
    ModelConfig(model_type="lightgbm", learning_rate=0.01, epochs=500,
                extra_params={"num_leaves": 63, "max_depth": 8}),
    ModelConfig(model_type="lstm_attention", learning_rate=1e-3, epochs=100,
                dropout=0.2, sequence_length=60),
    ModelConfig(model_type="transformer", learning_rate=3e-4, epochs=100,
                dropout=0.2, sequence_length=60),
    ModelConfig(model_type="hybrid_multimodal", learning_rate=1e-3, epochs=50,
                dropout=0.2, batch_size=64,
                extra_params={
                    "n_tickers": len(valid_tickers),
                    "hidden_dim": 128,
                    "fusion_dim": 128,
                    "vae_latent_dim": 16,
                    "patience": 10,
                }),
]

print("Total configs to train: %d" % len(configs))
for i, c in enumerate(configs):
    print("  [%d] %s | lr=%s | dropout=%s | epochs=%s" % (
        i, c.model_type, c.learning_rate, c.dropout, c.epochs))

## 5. Train with Walk-Forward Validation

In [None]:
from training.model_config import MultiConfigRunner
from training.model_selection import WalkForwardConfig

# Define walk-forward settings
HORIZON = "1M"  # Change to "3M" or "6M" as needed
TARGET_COL = "fwd_return_21d"  # Must match horizon: 21d=1M, 63d=3M, 126d=6M

wf_config = WalkForwardConfig(
    train_start="2015-01-01",
    test_end="2025-01-01",
    train_min_months=36,
    val_months=6,
    test_months=6,
    step_months=6,
    embargo_days=21,
    expanding=True,
)

# Feature columns (exclude targets, _close, and ticker_id)
feature_cols = [
    c for c in panel.columns
    if not c.startswith("fwd_return_")
    and not c.startswith("residual_return_")
    and not c.startswith("ranked_target_")
    and c not in ("_close", "ticker_id")
]
print("Features: %d" % len(feature_cols))
print("Target: %s" % TARGET_COL)
print("Horizon: %s" % HORIZON)

In [None]:
# Optional: Prune low-importance features to speed up training
from training.feature_engineering import prune_features

sample = panel.head(5000)  # Use a sample for feature selection
_, selected_cols = prune_features(
    sample[feature_cols],
    sample[TARGET_COL],
    importance_threshold=0.005,
)
print(f"Pruned: {len(feature_cols)} -> {len(selected_cols)} features")

# Uncomment to use pruned features:
# feature_cols = selected_cols

In [None]:
import time

runner = MultiConfigRunner(save_to_registry=False)

print("Starting training...")
print("=" * 60)
t0 = time.time()

results = runner.run(
    configs=configs,
    panel=panel,
    target_col=TARGET_COL,
    feature_cols=feature_cols,
    wf_config=wf_config,
    horizon=HORIZON,
)

total_time = time.time() - t0
print("=" * 60)
print(f"Training complete in {total_time:.1f}s")
print(f"Configs evaluated: {len(results)}")

## 6. Compare Results

In [None]:
import pandas as pd

# Build results table
rows = []
for r in results:
    ev = r.evaluation
    rows.append({
        "Model": r.config.model_type,
        "LR": r.config.learning_rate,
        "Dropout": r.config.dropout,
        "IC": round(ev.mean_ic, 4),
        "ICIR": round(ev.icir, 2),
        "Sharpe": round(ev.mean_sharpe, 2),
        "Max DD": round(ev.mean_mdd, 4),
        "Calmar": round(ev.mean_calmar, 2),
        "Hit Ratio": round(ev.mean_hit_ratio, 4),
        "Folds": len(ev.fold_results),
        "Time (s)": round(r.training_time, 1),
    })

df_results = pd.DataFrame(rows)
df_results = df_results.sort_values("IC", ascending=False).reset_index(drop=True)
print("\nModel Comparison (sorted by IC):")
print("=" * 80)
display(df_results)

In [None]:
# Statistical comparison
from training.model_comparison import ModelComparisonEngine

evaluations = [r.evaluation for r in results]
engine = ModelComparisonEngine()
report = engine.compare(evaluations)

print("Rankings by IC:")
for name, val in report.rankings.get("ic", []):
    print(f"  {name}: {val:.4f}")

print(f"\nStability scores:")
for name, score in report.stability_scores.items():
    print(f"  {name}: {score:.4f}")

print(f"\nBest per horizon: {report.best_per_horizon}")

if report.significance_tests:
    print(f"\nSignificance tests:")
    for key, test in report.significance_tests.items():
        sig = "YES" if test["ttest_significant_5pct"] else "no"
        print(f"  {key}: p={test['ttest_p_value']:.4f} (significant: {sig})")

In [None]:
# Visualize results
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# IC by model
axes[0].barh(df_results["Model"] + " (lr=" + df_results["LR"].astype(str) + ")", df_results["IC"])
axes[0].set_xlabel("Mean IC")
axes[0].set_title("Information Coefficient")

# Sharpe by model
axes[1].barh(df_results["Model"] + " (lr=" + df_results["LR"].astype(str) + ")", df_results["Sharpe"])
axes[1].set_xlabel("Mean Sharpe")
axes[1].set_title("Sharpe Ratio")

# Training time
axes[2].barh(df_results["Model"] + " (lr=" + df_results["LR"].astype(str) + ")", df_results["Time (s)"])
axes[2].set_xlabel("Seconds")
axes[2].set_title("Training Time")

plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "comparison.png"), dpi=150, bbox_inches="tight")
plt.show()

In [None]:
# === Model Comparison Dashboard ===
# 4-panel view: radar chart, grouped bars, color table, winner summary

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.patches import FancyBboxPatch
import numpy as np

# --- Prepare data ---
model_labels = []
for _, row in df_results.iterrows():
    lbl = "%s (lr=%s)" % (row["Model"], row["LR"])
    model_labels.append(lbl)

metric_names = ["IC", "ICIR", "Sharpe", "Hit Ratio", "Calmar"]
raw_values = df_results[metric_names].values.astype(float)

# Normalize each metric to 0-1 for radar chart
mins = raw_values.min(axis=0)
maxs = raw_values.max(axis=0)
ranges = maxs - mins
ranges[ranges == 0] = 1.0
norm_values = (raw_values - mins) / ranges

n_models = len(model_labels)
n_metrics = len(metric_names)
cmap = plt.cm.get_cmap("tab10")
colors = [cmap(i) for i in range(n_models)]

fig = plt.figure(figsize=(18, 14))
gs = gridspec.GridSpec(2, 2, hspace=0.35, wspace=0.3)

# --- Panel 1: Radar chart ---
ax_radar = fig.add_subplot(gs[0, 0], polar=True)
angles = np.linspace(0, 2 * np.pi, n_metrics, endpoint=False).tolist()
angles.append(angles[0])  # close the polygon

for i in range(n_models):
    vals = norm_values[i].tolist()
    vals.append(vals[0])
    ax_radar.plot(angles, vals, "o-", color=colors[i], linewidth=2, label=model_labels[i])
    ax_radar.fill(angles, vals, alpha=0.08, color=colors[i])

ax_radar.set_xticks(angles[:-1])
ax_radar.set_xticklabels(metric_names, fontsize=10)
ax_radar.set_ylim(0, 1.1)
ax_radar.set_title("Normalized Metric Profiles", fontsize=13, fontweight="bold", pad=20)
ax_radar.legend(loc="upper right", bbox_to_anchor=(1.35, 1.15), fontsize=8)

# --- Panel 2: Grouped bar chart ---
ax_bar = fig.add_subplot(gs[0, 1])
bar_metrics = ["IC", "ICIR", "Sharpe"]
x = np.arange(len(bar_metrics))
width = 0.8 / n_models

for i in range(n_models):
    offsets = x + (i - n_models / 2.0 + 0.5) * width
    vals = [df_results.iloc[i][m] for m in bar_metrics]
    ax_bar.bar(offsets, vals, width, color=colors[i], label=model_labels[i], edgecolor="white")

ax_bar.set_xticks(x)
ax_bar.set_xticklabels(bar_metrics, fontsize=11)
ax_bar.set_title("Key Metrics Comparison", fontsize=13, fontweight="bold")
ax_bar.legend(fontsize=7, loc="upper right")
ax_bar.axhline(y=0, color="gray", linewidth=0.5, linestyle="--")
ax_bar.grid(axis="y", alpha=0.3)

# --- Panel 3: Color-coded metrics table ---
ax_table = fig.add_subplot(gs[1, 0])
ax_table.axis("off")

table_metrics = ["IC", "ICIR", "Sharpe", "Hit Ratio", "Calmar", "Max DD", "Time (s)"]
cell_text = []
cell_colors = []

for i in range(n_models):
    row_text = []
    row_colors = []
    for m in table_metrics:
        val = df_results.iloc[i][m]
        row_text.append("%.4f" % val if m in ("IC", "Hit Ratio", "Max DD") else "%.2f" % val)
        # Color: green for best, red for worst per column
        col_vals = df_results[m].values
        if m == "Max DD":
            # Lower is better for max drawdown
            is_best = (val == col_vals.min())
            is_worst = (val == col_vals.max())
        elif m == "Time (s)":
            is_best = (val == col_vals.min())
            is_worst = (val == col_vals.max())
        else:
            is_best = (val == col_vals.max())
            is_worst = (val == col_vals.min())

        if n_models == 1:
            row_colors.append("#ffffff")
        elif is_best:
            row_colors.append("#c6efce")
        elif is_worst:
            row_colors.append("#ffc7ce")
        else:
            row_colors.append("#ffffff")
    cell_text.append(row_text)
    cell_colors.append(row_colors)

tbl = ax_table.table(
    cellText=cell_text,
    rowLabels=[lbl[:25] for lbl in model_labels],
    colLabels=table_metrics,
    cellColours=cell_colors,
    loc="center",
    cellLoc="center",
)
tbl.auto_set_font_size(False)
tbl.set_fontsize(9)
tbl.scale(1.0, 1.4)
ax_table.set_title("Metrics Table (green=best, red=worst)", fontsize=13, fontweight="bold", pad=15)

# --- Panel 4: Winner summary ---
ax_winner = fig.add_subplot(gs[1, 1])
ax_winner.axis("off")

# Composite score: IC + 0.5*ICIR + 0.3*Sharpe + 0.2*HitRatio + 0.1*Calmar
composite = (
    norm_values[:, 0] * 1.0    # IC
    + norm_values[:, 1] * 0.5  # ICIR
    + norm_values[:, 2] * 0.3  # Sharpe
    + norm_values[:, 3] * 0.2  # Hit Ratio
    + norm_values[:, 4] * 0.1  # Calmar
)
overall_idx = int(np.argmax(composite))

lines = []
lines.append("OVERALL WINNER")
lines.append("  %s" % model_labels[overall_idx])
lines.append("  Composite score: %.3f / %.3f" % (composite[overall_idx], 2.1))
lines.append("")

# Category winners
for j, m in enumerate(metric_names):
    if m == "Max DD":
        best_idx = int(np.argmin(raw_values[:, j]))
    else:
        best_idx = int(np.argmax(raw_values[:, j]))
    lines.append("Best %s: %s (%.4f)" % (m, model_labels[best_idx], raw_values[best_idx, j]))

lines.append("")
lines.append("Fastest: %s (%.1fs)" % (
    model_labels[int(df_results["Time (s)"].values.argmin())],
    df_results["Time (s)"].min(),
))

text = "\n".join(lines)
ax_winner.text(
    0.05, 0.95, text, transform=ax_winner.transAxes,
    fontsize=11, verticalalignment="top", fontfamily="monospace",
    bbox=dict(boxstyle="round,pad=0.5", facecolor="#f0f0f0", edgecolor="#cccccc"),
)
ax_winner.set_title("Winner Summary", fontsize=13, fontweight="bold", pad=15)

fig.suptitle("Model Comparison Dashboard", fontsize=16, fontweight="bold", y=1.01)
plt.savefig(os.path.join(OUTPUT_DIR, "dashboard.png"), dpi=150, bbox_inches="tight")
plt.show()
print("Dashboard saved to %s/dashboard.png" % OUTPUT_DIR)

## 7. Save Best Models to Drive

In [None]:
import json

# Save all results
results_data = []
for i, r in enumerate(results):
    model_dir = os.path.join(OUTPUT_DIR, f"{r.config.model_type}_v{i}")
    os.makedirs(model_dir, exist_ok=True)

    # Save config
    with open(os.path.join(model_dir, "config.json"), "w") as f:
        json.dump(r.config.to_json(), f, indent=2)

    # Save metrics
    metrics = {
        "mean_ic": r.evaluation.mean_ic,
        "icir": r.evaluation.icir,
        "mean_sharpe": r.evaluation.mean_sharpe,
        "mean_mdd": r.evaluation.mean_mdd,
        "mean_calmar": r.evaluation.mean_calmar,
        "mean_hit_ratio": r.evaluation.mean_hit_ratio,
        "n_folds": len(r.evaluation.fold_results),
    }
    with open(os.path.join(model_dir, "metrics.json"), "w") as f:
        json.dump(metrics, f, indent=2)

    results_data.append({
        "config": r.config.to_json(),
        "training_time": r.training_time,
        "best_params": r.best_params,
        "metrics": metrics,
    })

    print(f"Saved: {model_dir}")

# Save combined results JSON (for local import)
results_path = os.path.join(OUTPUT_DIR, "results.json")
with open(results_path, "w") as f:
    json.dump(results_data, f, indent=2)

print(f"\nAll results saved to {OUTPUT_DIR}")
print(f"Import locally with: DataExporter.import_results('{results_path}')")

## 8. (Optional) Hyperparameter Search with Optuna

In [None]:
# Run Optuna HP search for the best model type
from training.hyperparameter_search import HyperparameterSearcher

best_model_type = results[0].config.model_type
print("Running Optuna HP search for: %s" % best_model_type)

searcher = HyperparameterSearcher(
    model_type=best_model_type,
    panel=panel,
    target_col=TARGET_COL,
    feature_cols=feature_cols,
    outer_config=wf_config,
    n_trials=20,
    inner_folds_count=3,
)

search_results = searcher.search()

print("\nBest params per fold:")
for entry in search_results.get("best_params_per_fold", []):
    print("  Fold %s (inner IC=%.4f): %s" % (entry["fold"], entry["inner_ic"], entry["params"]))

eval_result = search_results.get("evaluation")
if eval_result:
    print("\nEvaluation after HP search:")
    print("  IC: %.4f" % eval_result.mean_ic)
    print("  ICIR: %.2f" % eval_result.icir)
    print("  Sharpe: %.2f" % eval_result.mean_sharpe)

## 9. Export Production Artifacts

Export the best model from section 6 as production artifacts for the FastAPI backend.

In [None]:
import json
import pickle
import numpy as np
from datetime import datetime
from training.models import create_model
from training.model_selection import compute_prediction_metrics

# Use the best model from training results (sorted by IC)
best_result = results[0]
best_config = best_result.config
print("Best model: %s (IC=%.4f, Sharpe=%.2f)" % (
    best_config.model_type, best_result.evaluation.mean_ic, best_result.evaluation.mean_sharpe))

# Retrain best model on full data for production
print("\nRetraining %s on full dataset for production..." % best_config.model_type)

X = panel[feature_cols].values.astype(np.float32)
y = panel[TARGET_COL].values.astype(np.float32)
np.nan_to_num(X, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
np.nan_to_num(y, copy=False, nan=0.0, posinf=0.0, neginf=0.0)

split = int(len(X) * 0.85)
val_split = int(len(X) * 0.95)

model = create_model(best_config.model_type, best_config.to_dict())
model.fit(X[:split], y[:split], X[split:val_split], y[split:val_split], feature_names=feature_cols)

# Evaluate on held-out test set
test_preds = model.predict(X[val_split:])
valid_mask = ~np.isnan(test_preds)
test_metrics = compute_prediction_metrics(y[val_split:][valid_mask], test_preds[valid_mask])
print("Production model test IC: %.4f, Hit ratio: %.4f" % (test_metrics.ic, test_metrics.hit_ratio))

In [None]:
# Save all artifacts to Drive and repo
os.makedirs(ARTIFACT_DIR, exist_ok=True)

# 1. Model
model_path = os.path.join(ARTIFACT_DIR, "model.pkl")
with open(model_path, "wb") as f:
    pickle.dump(model, f)
print("Model saved: %s" % model_path)

if hasattr(model, 'net'):
    import torch
    model.net.eval()
    torch.save(model.net.state_dict(), os.path.join(ARTIFACT_DIR, "model.pt"))
    print("State dict saved")

# 2. Feature scaler
if hasattr(model, 'scaler'):
    with open(os.path.join(ARTIFACT_DIR, "feature_scaler.pkl"), "wb") as f:
        pickle.dump(model.scaler, f)
    print("Scaler saved")

# 3. Config
config_data = {
    "model_type": best_config.model_type,
    "horizons": ["1M", "3M", "6M"],
    "horizon_days": [21, 63, 126],
    "n_features": len(feature_cols),
    "n_tickers": len(valid_tickers),
}
config_data.update(best_config.extra_params)
with open(os.path.join(ARTIFACT_DIR, "config.json"), "w") as f:
    json.dump(config_data, f, indent=2)

# 4. Feature columns
with open(os.path.join(ARTIFACT_DIR, "feature_columns.json"), "w") as f:
    json.dump(feature_cols, f)

# 5. Ticker list
with open(os.path.join(ARTIFACT_DIR, "ticker_list.json"), "w") as f:
    json.dump(valid_tickers, f)

# 6. Training metadata
metadata = {
    "version": "%s_v%s" % (best_config.model_type, datetime.now().strftime("%Y%m%d_%H%M%S")),
    "trained_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "model_type": best_config.model_type,
    "n_tickers": len(valid_tickers),
    "n_features": len(feature_cols),
    "n_samples": len(X),
    "train_size": split,
    "test_ic": float(test_metrics.ic),
    "test_hit_ratio": float(test_metrics.hit_ratio),
    "walkforward_ic": float(best_result.evaluation.mean_ic),
    "walkforward_sharpe": float(best_result.evaluation.mean_sharpe),
    "tickers": valid_tickers,
}
with open(os.path.join(ARTIFACT_DIR, "training_metadata.json"), "w") as f:
    json.dump(metadata, f, indent=2)

print("\nAll artifacts exported to: %s" % ARTIFACT_DIR)
print("Version: %s" % metadata["version"])

# Copy to repo for API serving
LOCAL_ARTIFACTS = "/content/AI-stock-investment-tool/artifacts"
os.makedirs(LOCAL_ARTIFACTS, exist_ok=True)
!cp -r {ARTIFACT_DIR}/* {LOCAL_ARTIFACTS}/
print("Copied to repo artifacts/ for API serving")

## 9b. Top 10 Stock Picks

Run the Top 10 engine on the trained model to generate market-specific picks.

In [None]:
# --- Top 10 Engine Demo ---
# Uses the trained production model to generate Top 10 picks

from engine.top10 import Top10Engine
from training.feature_engineering import build_feature_matrix
from data.stock_api import get_historical_data, get_stock_info
import numpy as np

# Build a predict function using the trained model
_market_df = get_historical_data("SPY", period="2y")

def colab_predict(ticker, horizon="1M"):
    """Prediction wrapper for Colab Top 10 demo."""
    stock_df = get_historical_data(ticker, period="2y")
    if stock_df.empty:
        raise ValueError("No data for %s" % ticker)
    info = get_stock_info(ticker) or {}
    feat = build_feature_matrix(stock_df, info, _market_df, [21], ticker=ticker)
    if feat.empty:
        raise ValueError("No features for %s" % ticker)
    fc = [c for c in feat.columns if not c.startswith("fwd_return_") and c != "_close"]
    X_pred = feat[fc].values[-1:].astype(np.float32)
    np.nan_to_num(X_pred, copy=False, nan=0.0)
    point = float(model.predict(X_pred)[0])
    return {
        "ticker": ticker,
        "point_estimate": point,
        "probability_up": 0.5 + point * 5,
        "p_up": max(0.0, min(1.0, 0.5 + point * 5)),
        "confidence": 0.5,
        "risk_score": 0.4,
        "direction": "UP" if point > 0 else "DOWN",
        "meta_trade_probability": 0.5,
        "uncertainty": 0.3,
        "quantiles": {"q10": point - 0.05, "p10": point - 0.05},
    }

engine = Top10Engine(
    predict_fn=colab_predict,
    model_version=metadata.get("version", "unknown"),
)

# Generate Top 10 for US market
print("Generating Top 10 picks for US market (1M horizon)...")
result = engine.select(market="US", horizon="1M")

if result.stocks:
    print("\nTop 10 US Picks:")
    print("=" * 80)
    print("%-4s %-8s %-5s %7s %8s %8s %7s %8s" % (
        "Rank", "Ticker", "Dir", "Score", "P(Up)", "Return", "Conf", "Weight"))
    print("-" * 80)
    for s in result.stocks:
        print("%-4d %-8s %-5s %7.3f %7.1f%% %+7.2f%% %6.1f%% %7.1f%%" % (
            s.rank, s.ticker, s.direction, s.score,
            s.p_up * 100, s.expected_return * 100,
            s.confidence * 100, s.allocation_weight * 100))
    print("-" * 80)
    print("Candidates: %d | Pass rate: %.1f%%" % (
        result.total_candidates, result.pass_rate * 100))
else:
    print("No stocks passed the filter criteria.")

## 9b. Prediction Visualizations

Visualize the production model's predictions on a specific ticker.
Change `TICKER` below to analyze a different stock.

In [None]:
# === Prediction Analysis for a Specific Ticker ===
# 4-panel visualization: scatter, cumulative PnL, confidence bands, rolling hit ratio

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
from scipy import stats

TICKER = "AAPL"  # <-- Change this to analyze a different stock

# --- Extract ticker data from panel ---
try:
    ticker_data = panel.loc[(slice(None), TICKER), :].droplevel(1)
except KeyError:
    print("Ticker %s not found in panel. Available: %s" % (TICKER, valid_tickers[:10]))
    raise

ticker_X = ticker_data[feature_cols].values.astype(np.float32)
ticker_y = ticker_data[TARGET_COL].values.astype(np.float32)
np.nan_to_num(ticker_X, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
np.nan_to_num(ticker_y, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
dates = ticker_data.index

# Use only the test portion (last 5% mirrors val_split ratio from cell above)
n_total = len(ticker_X)
t_split = int(n_total * 0.95)
test_X = ticker_X[t_split:]
test_y = ticker_y[t_split:]
test_dates = dates[t_split:]

preds = model.predict(test_X)
valid = ~np.isnan(preds)
preds_v = preds[valid]
actual_v = test_y[valid]
dates_v = test_dates[valid]

print("Ticker: %s | Test samples: %d | Valid predictions: %d" % (TICKER, len(test_y), int(valid.sum())))

# --- Figure ---
fig = plt.figure(figsize=(18, 14))
gs = gridspec.GridSpec(2, 2, hspace=0.32, wspace=0.28)

# ============================================================
# Panel 1: Predicted vs Actual scatter with regression line
# ============================================================
ax1 = fig.add_subplot(gs[0, 0])

errors = np.abs(preds_v - actual_v)
scatter = ax1.scatter(actual_v, preds_v, c=errors, cmap="RdYlGn_r", alpha=0.6, s=20, edgecolors="none")
plt.colorbar(scatter, ax=ax1, label="Absolute Error", shrink=0.8)

# Regression line
if len(preds_v) > 2:
    slope, intercept, r_val, p_val, _ = stats.linregress(actual_v, preds_v)
    x_line = np.linspace(actual_v.min(), actual_v.max(), 100)
    ax1.plot(x_line, slope * x_line + intercept, "r--", linewidth=2, label="Regression")
    # Perfect prediction line
    ax1.plot(x_line, x_line, "k:", linewidth=1, alpha=0.5, label="Perfect")

    # IC (Spearman)
    ic_val, _ = stats.spearmanr(actual_v, preds_v)
    ax1.text(0.05, 0.95,
             "IC=%.3f  R2=%.3f\nSlope=%.3f  n=%d" % (ic_val, r_val ** 2, slope, len(preds_v)),
             transform=ax1.transAxes, fontsize=9, verticalalignment="top",
             bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.8))

ax1.set_xlabel("Actual %s" % TARGET_COL, fontsize=10)
ax1.set_ylabel("Predicted", fontsize=10)
ax1.set_title("Predicted vs Actual (%s)" % TICKER, fontsize=13, fontweight="bold")
ax1.legend(fontsize=8)
ax1.grid(alpha=0.3)

# ============================================================
# Panel 2: Cumulative PnL -- model signal vs buy-and-hold
# ============================================================
ax2 = fig.add_subplot(gs[0, 1])

# Signal returns: go long when prediction > 0, short otherwise
signal = np.where(preds_v > 0, 1.0, -1.0)
signal_returns = signal * actual_v
cum_signal = np.cumsum(signal_returns)
cum_bh = np.cumsum(actual_v)

ax2.plot(dates_v, cum_signal, color="#2196F3", linewidth=2, label="Model Signal")
ax2.plot(dates_v, cum_bh, color="#9E9E9E", linewidth=1.5, linestyle="--", label="Buy & Hold")

# Green/red fill for excess return
excess = cum_signal - cum_bh
ax2.fill_between(dates_v, cum_bh, cum_signal,
                 where=(excess >= 0), color="#4CAF50", alpha=0.15, label="Excess > 0")
ax2.fill_between(dates_v, cum_bh, cum_signal,
                 where=(excess < 0), color="#F44336", alpha=0.15, label="Excess < 0")

total_signal = cum_signal[-1] if len(cum_signal) > 0 else 0
total_bh = cum_bh[-1] if len(cum_bh) > 0 else 0
ax2.text(0.05, 0.95,
         "Signal: %.2f%%\nBuy&Hold: %.2f%%\nExcess: %.2f%%" % (
             total_signal * 100, total_bh * 100, (total_signal - total_bh) * 100),
         transform=ax2.transAxes, fontsize=9, verticalalignment="top",
         bbox=dict(boxstyle="round", facecolor="lightyellow", alpha=0.8))

ax2.set_xlabel("Date", fontsize=10)
ax2.set_ylabel("Cumulative Return", fontsize=10)
ax2.set_title("Cumulative PnL (%s)" % TICKER, fontsize=13, fontweight="bold")
ax2.legend(fontsize=8, loc="lower right")
ax2.grid(alpha=0.3)
fig.autofmt_xdate()

# ============================================================
# Panel 3: Confidence bands (quantile predictions)
# ============================================================
ax3 = fig.add_subplot(gs[1, 0])

has_quantiles = False
try:
    q_preds = model.predict_quantiles(test_X, [0.10, 0.50, 0.90])
    q10 = q_preds[0.10][valid]
    q50 = q_preds[0.50][valid]
    q90 = q_preds[0.90][valid]
    has_quantiles = True
except Exception as e:
    print("Quantile prediction not available: %s" % e)

if has_quantiles:
    ax3.fill_between(dates_v, q10, q90, alpha=0.2, color="#2196F3", label="10th-90th percentile")
    ax3.plot(dates_v, q50, color="#2196F3", linewidth=1.5, label="Median prediction")
    ax3.scatter(dates_v, actual_v, color="#F44336", s=12, zorder=5, alpha=0.7, label="Actual")

    # Coverage: what fraction of actuals fall within the band?
    inside = np.sum((actual_v >= q10) & (actual_v <= q90))
    coverage = inside / len(actual_v) * 100 if len(actual_v) > 0 else 0
    band_width = np.mean(q90 - q10)
    ax3.text(0.05, 0.95,
             "Coverage: %.1f%% (target 80%%)\nMean band width: %.4f" % (coverage, band_width),
             transform=ax3.transAxes, fontsize=9, verticalalignment="top",
             bbox=dict(boxstyle="round", facecolor="lightcyan", alpha=0.8))
    ax3.legend(fontsize=8)
else:
    ax3.text(0.5, 0.5, "Quantile predictions\nnot available for\nthis model type",
             transform=ax3.transAxes, fontsize=14, ha="center", va="center", color="gray")

ax3.set_xlabel("Date", fontsize=10)
ax3.set_ylabel("Return", fontsize=10)
ax3.set_title("Confidence Bands (%s)" % TICKER, fontsize=13, fontweight="bold")
ax3.grid(alpha=0.3)

# ============================================================
# Panel 4: Rolling hit ratio (63-day window)
# ============================================================
ax4 = fig.add_subplot(gs[1, 1])

window = min(63, max(10, len(preds_v) // 3))
hits = (np.sign(preds_v) == np.sign(actual_v)).astype(float)

if len(hits) >= window:
    rolling_hit = np.convolve(hits, np.ones(window) / window, mode="valid")
    roll_dates = dates_v[window - 1:]

    ax4.plot(roll_dates, rolling_hit, color="#2196F3", linewidth=2, label="%d-day rolling" % window)
    ax4.axhline(y=0.5, color="gray", linewidth=1, linestyle="--", label="50% baseline")

    # Green/red fill around 50%
    ax4.fill_between(roll_dates, 0.5, rolling_hit,
                     where=(rolling_hit >= 0.5), color="#4CAF50", alpha=0.2)
    ax4.fill_between(roll_dates, 0.5, rolling_hit,
                     where=(rolling_hit < 0.5), color="#F44336", alpha=0.2)

    avg_hit = np.mean(hits)
    ax4.text(0.05, 0.95,
             "Overall hit ratio: %.1f%%\nWindow: %d days" % (avg_hit * 100, window),
             transform=ax4.transAxes, fontsize=9, verticalalignment="top",
             bbox=dict(boxstyle="round", facecolor="lightyellow", alpha=0.8))
    ax4.legend(fontsize=8)
else:
    ax4.text(0.5, 0.5, "Not enough data\nfor rolling window\n(%d < %d)" % (len(hits), window),
             transform=ax4.transAxes, fontsize=14, ha="center", va="center", color="gray")

ax4.set_xlabel("Date", fontsize=10)
ax4.set_ylabel("Hit Ratio", fontsize=10)
ax4.set_title("Rolling Hit Ratio (%s)" % TICKER, fontsize=13, fontweight="bold")
ax4.set_ylim(0.2, 0.8)
ax4.grid(alpha=0.3)

fig.suptitle("Prediction Analysis: %s (%s)" % (TICKER, best_config.model_type),
             fontsize=16, fontweight="bold", y=1.01)
plt.savefig(os.path.join(OUTPUT_DIR, "prediction_%s.png" % TICKER), dpi=150, bbox_inches="tight")
plt.show()
print("Prediction analysis saved to %s/prediction_%s.png" % (OUTPUT_DIR, TICKER))

In [None]:
# === Model Scorecard ===
# Text summary + visual 3x3 scorecard grid

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np

# --- Text Summary ---
ev = best_result.evaluation
print("=" * 60)
print("MODEL SCORECARD")
print("=" * 60)
print("")
print("Model Configuration:")
print("  Type:          %s" % best_config.model_type)
print("  Learning rate: %s" % best_config.learning_rate)
print("  Dropout:       %s" % best_config.dropout)
print("  Epochs:        %s" % best_config.epochs)
if best_config.extra_params:
    for k, v in best_config.extra_params.items():
        print("  %s: %s" % (k, v))

print("")
print("Walk-Forward Validation (%d folds):" % len(ev.fold_results))
print("  Mean IC:       %.4f" % ev.mean_ic)
print("  ICIR:          %.2f" % ev.icir)
print("  Mean Sharpe:   %.2f" % ev.mean_sharpe)
print("  Mean Max DD:   %.4f" % ev.mean_mdd)
print("  Mean Calmar:   %.2f" % ev.mean_calmar)
print("  Mean Hit Ratio:%.4f" % ev.mean_hit_ratio)
print("  Overfit Ratio: %.2f" % ev.overfit_ratio)

print("")
print("Production Test Set:")
print("  Test IC:       %.4f" % test_metrics.ic)
print("  Test Hit Ratio:%.4f" % test_metrics.hit_ratio)
print("  Test RMSE:     %.6f" % test_metrics.rmse)
print("  Test MAE:      %.6f" % test_metrics.mae)
print("  Test Samples:  %d" % test_metrics.n_samples)

# Uncertainty estimate
has_uncertainty = False
try:
    sample_X = X[val_split:val_split + 100]
    unc_mean, unc_var = model.predict_with_uncertainty(sample_X, n_mc_passes=20)
    mean_uncertainty = float(np.mean(np.sqrt(unc_var)))
    has_uncertainty = True
    print("")
    print("Uncertainty Estimate (MC dropout, 100 samples):")
    print("  Mean StdDev:   %.6f" % mean_uncertainty)
except Exception as e:
    print("")
    print("Uncertainty estimation not available: %s" % e)

print("")
print("=" * 60)

# --- Visual Scorecard: 3x3 grid ---
# Define 9 metrics with thresholds (good/bad)
scorecard_items = [
    ("IC", ev.mean_ic, 0.03, True),           # good if > 0.03
    ("ICIR", ev.icir, 0.5, True),              # good if > 0.5
    ("Sharpe", ev.mean_sharpe, 0.5, True),     # good if > 0.5
    ("Hit Ratio", ev.mean_hit_ratio, 0.52, True),  # good if > 52%
    ("Calmar", ev.mean_calmar, 0.5, True),     # good if > 0.5
    ("Max DD", ev.mean_mdd, 0.15, False),      # good if < 15%
    ("Overfit", ev.overfit_ratio, 3.0, False),  # good if < 3.0
    ("Test IC", test_metrics.ic, 0.02, True),  # good if > 0.02
    ("Test Hit", test_metrics.hit_ratio, 0.50, True),  # good if > 50%
]

fig, axes = plt.subplots(3, 3, figsize=(12, 10))
fig.suptitle("Model Scorecard: %s" % best_config.model_type, fontsize=16, fontweight="bold", y=1.02)

for idx, (name, value, threshold, higher_is_better) in enumerate(scorecard_items):
    row = idx // 3
    col = idx % 3
    ax = axes[row][col]
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.axis("off")

    if higher_is_better:
        is_good = value >= threshold
    else:
        is_good = value <= threshold

    bg_color = "#c6efce" if is_good else "#ffc7ce"
    indicator = "PASS" if is_good else "WARN"
    ind_color = "#006100" if is_good else "#9c0006"

    # Background box
    rect = plt.Rectangle((0.05, 0.05), 0.9, 0.9, linewidth=2,
                          edgecolor=ind_color, facecolor=bg_color, alpha=0.3)
    ax.add_patch(rect)

    # Metric name
    ax.text(0.5, 0.78, name, ha="center", va="center", fontsize=13, fontweight="bold")

    # Value
    if abs(value) >= 10:
        fmt = "%.1f"
    elif abs(value) >= 1:
        fmt = "%.2f"
    else:
        fmt = "%.4f"
    ax.text(0.5, 0.48, fmt % value, ha="center", va="center", fontsize=20,
            fontweight="bold", color=ind_color)

    # Threshold reference
    if higher_is_better:
        ref_text = "(> %s)" % (fmt % threshold)
    else:
        ref_text = "(< %s)" % (fmt % threshold)
    ax.text(0.5, 0.25, ref_text, ha="center", va="center", fontsize=9, color="gray")

    # Status badge
    ax.text(0.5, 0.1, indicator, ha="center", va="center", fontsize=10,
            fontweight="bold", color=ind_color,
            bbox=dict(boxstyle="round,pad=0.2", facecolor=bg_color, edgecolor=ind_color, alpha=0.6))

plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "scorecard.png"), dpi=150, bbox_inches="tight")
plt.show()
print("Scorecard saved to %s/scorecard.png" % OUTPUT_DIR)

## 10. Serve API via ngrok (Connect to Frontend)

Start the FastAPI backend on Colab and expose it via ngrok so your local Next.js frontend can call it.

**Prerequisites:** You need a free [ngrok auth token](https://dashboard.ngrok.com/get-started/your-authtoken). Free tier gives 1 tunnel.

In [None]:
import subprocess
import time
from pyngrok import ngrok

# --- Configure ngrok ---
# Get your free auth token at: https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = ""  # <-- Paste your token here

if NGROK_AUTH_TOKEN:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
else:
    print("WARNING: No ngrok auth token set. Tunnel may not work.")
    print("Get one at: https://dashboard.ngrok.com/get-started/your-authtoken")

# Make sure artifacts are in the repo dir
LOCAL_ARTIFACTS = "/content/AI-stock-investment-tool/artifacts"
assert os.path.exists(os.path.join(LOCAL_ARTIFACTS, "model.pkl")), \
    "No model.pkl found! Run sections 9-10 first to train and export."

# Install uvicorn if not present
!pip install -q uvicorn fastapi pydantic python-multipart

# Start FastAPI in background
os.chdir("/content/AI-stock-investment-tool")
server_proc = subprocess.Popen(
    ["python", "-m", "uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
)
time.sleep(3)

# Check if server started successfully
if server_proc.poll() is not None:
    print("ERROR: Server failed to start!")
    print(server_proc.stderr.read().decode())
else:
    print("FastAPI server started (PID: %d)" % server_proc.pid)

    # Open ngrok tunnel
    public_url = ngrok.connect(8000)
    print("\n" + "=" * 60)
    print("PUBLIC API URL: %s" % public_url)
    print("=" * 60)
    print("\nAPI Docs:  %s/docs" % public_url)
    print("Health:    %s/api/v1/health" % public_url)
    print("Predict:   %s/api/v1/predict" % public_url)
    print("\n--- To connect your local frontend ---")
    print("Option A (PowerShell): set env var before npm run dev:")
    print('  $env:NEXT_PUBLIC_API_URL="%s"' % public_url)
    print("  cd frontend; npm run dev")
    print("\nOption B: Create frontend/.env.local with:")
    print("  NEXT_PUBLIC_API_URL=%s" % public_url)
    print("\nKeep this cell running! The tunnel closes when the runtime stops.")

In [None]:
# Quick test: verify API is responding
import urllib.request
import json

test_url = "http://localhost:8000/api/v1/health"
try:
    resp = urllib.request.urlopen(test_url, timeout=5)
    data = json.loads(resp.read().decode())
    print("Health check OK:")
    for k, v in data.items():
        print("  %s: %s" % (k, v))
except Exception as e:
    print("Health check failed: %s" % e)
    print("Check server logs:")
    if server_proc.poll() is not None:
        print(server_proc.stderr.read().decode()[-500:])

## 11. Download Artifacts to Local Machine

If you prefer to run the API locally instead of via ngrok, download the trained artifacts.

In [None]:
# Zip artifacts and download to your local machine
!cd {ARTIFACT_DIR} && zip -r /content/artifacts.zip .

print("Artifact contents:")
!ls -lh {ARTIFACT_DIR}

print("\nTotal zip size:")
!ls -lh /content/artifacts.zip

# Download via browser
from google.colab import files
files.download("/content/artifacts.zip")

print("\nAfter downloading, on your local machine:")
print("  1. Unzip into your project root:")
print("     unzip artifacts.zip -d artifacts/")
print("  2. Start the API:")
print("     python -m api.main")
print("  3. Start the frontend:")
print("     cd frontend && npm run dev")

## 12. Cleanup

Stop the API server and ngrok tunnel when done.

In [None]:
# Stop server and tunnel
try:
    ngrok.disconnect(public_url)
    ngrok.kill()
    print("ngrok tunnel closed")
except Exception:
    pass

try:
    server_proc.terminate()
    server_proc.wait(timeout=5)
    print("FastAPI server stopped")
except Exception:
    pass