{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "setup_instructions"
   },
   "source": [
    "# 🚀 Trading Bot ML Training - Google Colab Edition
",
    "
",
    "## 📋 SETUP INSTRUCTIONS (REQUIRED):
",
    "
",
    "### 1. Replace Repository URL
",
    "```python
",
    "GITHUB_REPO_URL = "<YOUR_REPO_URL>"  # ← REPLACE THIS!
",
    "```
",
    "
",
    "### 2. Choose Training Mode
",
    "- **`fast_test=True`** (default): Quick test run with synthetic data (5 minutes)
",
    "- **`fast_test=False`**: Full training with real data (30-60 minutes)
",
    "
",
    "### 3. Private Repository?
",
    "If your repo is private, use:
",
    "```python
",
    "# GITHUB_REPO_URL = "https://<TOKEN>@github.com/owner/repo.git"
",
    "```
",
    "Replace `<TOKEN>` with your GitHub personal access token.
",
    "
",
    "### 4. Alternative: Upload ZIP
",
    "Instead of cloning, you can upload your repo as a ZIP file and uncomment the ZIP upload section.
",
    "
",
    "---
",
    "
",
    "## 🎯 What This Notebook Does:
",
    "1. **Clone** your trading bot repository
",
    "2. **Install** all dependencies automatically
",
    "3. **Mount** Google Drive for artifact storage
",
    "4. **Train** LightGBM and XGBoost models using your existing modules
",
    "5. **Save** trained models to repo and Google Drive
",
    "6. **Validate** model artifacts
",
    "7. **Download** results to your local machine
",
    "
",
    "## 📦 Output Artifacts:
",
    "- `models/{symbol}/{timestamp}/{model_id}/` - Model files (pkl, meta.json)
",
    "- `runs/colab-{timestamp}/manifest.json` - Training manifest
",
    "- Google Drive backup (if mounted)
",
    "- ZIP download for local machine
",
    "
",
    "**Ready? Let's start! 👇**"
   ]
  },

# ⚙️ Configuration Section

**IMPORTANT: Modify these variables before running!**

In [None]:
# =============================================================================
# 🔧 USER CONFIGURATION - MODIFY THESE VALUES!
# =============================================================================

# TODO: Replace with your GitHub repository URL
GITHUB_REPO_URL = "<YOUR_REPO_URL>"  # Example: "https://github.com/username/trading-bot.git"

# Training Configuration
SYMBOL = "BTC-USD"
INTERVAL = "1m"

CFG = {
    "fast_test": True,        # Set to False for full training
    "horizon": 5,             # Future periods for prediction
    "pos_thresh": 0.002,      # Positive class threshold (0.2%)
    "n_splits": 2,            # Cross-validation splits (fast_test)
    "seed": 42,               # Random seed
    "n_periods": 1000 if True else 5000,  # Dataset size (will be set based on fast_test)
}

# Update n_periods based on fast_test
CFG["n_periods"] = 1000 if CFG["fast_test"] else 5000

# Paths (will be set after repo clone)
REPO_NAME = None  # Will be extracted from GITHUB_REPO_URL
REPO_PATH = None  # Will be set to /content/{REPO_NAME}
MODEL_SAVE_REPO_PATH = None  # Will be set to {REPO_PATH}/models/
MODEL_SAVE_DRIVE_PATH = "/content/drive/MyDrive/models/"

# Status flags
DRIVE_MOUNTED = False
REPO_CLONED = False

print("✅ Configuration loaded")
print(f"🎯 Training Mode: {'Fast Test' if CFG['fast_test'] else 'Full Training'}")
print(f"📊 Symbol: {SYMBOL} | Interval: {INTERVAL}")
print(f"🔢 Dataset Size: {CFG['n_periods']} periods")

if GITHUB_REPO_URL == "<YOUR_REPO_URL>":
    print("⚠️  WARNING: Please replace GITHUB_REPO_URL with your actual repository URL!")
    print("   Example: GITHUB_REPO_URL = 'https://github.com/username/trading-bot.git'")

# 📥 Repository Setup

Clone your trading bot repository and set up the Python environment.

In [None]:
import os
import sys
import subprocess
import shutil
from pathlib import Path
import json
from datetime import datetime

def extract_repo_name(url):
    """Extract repository name from GitHub URL"""
    if url.endswith('.git'):
        url = url[:-4]
    return url.split('/')[-1]

def clone_repository(repo_url):
    """Clone the repository"""
    global REPO_NAME, REPO_PATH, MODEL_SAVE_REPO_PATH, REPO_CLONED
    
    if repo_url == "<YOUR_REPO_URL>":
        print("❌ ERROR: Please replace GITHUB_REPO_URL with your actual repository URL!")
        return False
    
    try:
        print(f"🔄 Cloning repository: {repo_url}")
        
        # Extract repo name
        REPO_NAME = extract_repo_name(repo_url)
        REPO_PATH = f"/content/{REPO_NAME}"
        MODEL_SAVE_REPO_PATH = f"{REPO_PATH}/models/"
        
        # Remove existing directory if it exists
        if os.path.exists(REPO_PATH):
            print(f"🗑️  Removing existing directory: {REPO_PATH}")
            shutil.rmtree(REPO_PATH)
        
        # Clone repository
        result = subprocess.run(
            ["git", "clone", repo_url, REPO_PATH],
            capture_output=True,
            text=True,
            cwd="/content"
        )
        
        if result.returncode != 0:
            print(f"❌ Git clone failed: {result.stderr}")
            print("💡 If this is a private repo, make sure you're using a personal access token:")
            print("   https://<TOKEN>@github.com/username/repo.git")
            return False
        
        # Add to Python path
        if REPO_PATH not in sys.path:
            sys.path.insert(0, REPO_PATH)
        
        print(f"✅ Repository cloned successfully to: {REPO_PATH}")
        print(f"📁 Python path updated: {REPO_PATH}")
        
        # Show repository structure
        print("\n📂 Repository structure:")
        for root, dirs, files in os.walk(REPO_PATH):
            # Limit depth to avoid clutter
            level = root.replace(REPO_PATH, '').count(os.sep)
            if level < 3:
                indent = ' ' * 2 * level
                print(f"{indent}{os.path.basename(root)}/")
                subindent = ' ' * 2 * (level + 1)
                for file in files[:5]:  # Show only first 5 files per directory
                    print(f"{subindent}{file}")
                if len(files) > 5:
                    print(f"{subindent}... and {len(files) - 5} more files")
        
        REPO_CLONED = True
        return True
        
    except Exception as e:
        print(f"❌ Error cloning repository: {e}")
        return False

# Clone the repository
clone_success = clone_repository(GITHUB_REPO_URL)

if not clone_success:
    print("\n🔄 Alternative: Upload ZIP file")
    print("If cloning failed, you can upload your repo as a ZIP file instead.")
    print("Uncomment and run the next cell to use ZIP upload.")

In [None]:
# # ALTERNATIVE: Upload ZIP file (uncomment if git clone failed)
# from google.colab import files
# import zipfile

# print("📦 Upload your repository as a ZIP file:")
# uploaded = files.upload()

# if uploaded:
#     zip_name = list(uploaded.keys())[0]
#     print(f"📥 Extracting {zip_name}...")
    
#     with zipfile.ZipFile(zip_name, 'r') as zip_ref:
#         zip_ref.extractall('/content')
    
#     # Find extracted directory
#     for item in os.listdir('/content'):
#         if os.path.isdir(f'/content/{item}') and item != 'sample_data':
#             REPO_NAME = item
#             REPO_PATH = f'/content/{item}'
#             MODEL_SAVE_REPO_PATH = f'{REPO_PATH}/models/'
#             break
    
#     if REPO_PATH and REPO_PATH not in sys.path:
#         sys.path.insert(0, REPO_PATH)
    
#     print(f"✅ ZIP extracted to: {REPO_PATH}")
#     REPO_CLONED = True

# 📦 Install Dependencies

Install required packages for ML training.

In [None]:
def install_dependencies():
    """Install required dependencies"""
    
    if not REPO_CLONED:
        print("❌ Repository not cloned yet. Please run the clone cell first.")
        return False
    
    print("📦 Installing dependencies...")
    
    # Check if requirements.txt exists
    requirements_path = f"{REPO_PATH}/requirements.txt"
    if os.path.exists(requirements_path):
        print(f"📋 Found requirements.txt, installing from file...")
        result = subprocess.run(
            ["pip", "install", "-r", requirements_path, "--quiet"],
            capture_output=True,
            text=True
        )
        if result.returncode == 0:
            print("✅ Requirements installed successfully")
        else:
            print(f"⚠️  Some packages failed to install: {result.stderr}")
            print("Continuing with fallback installation...")
    
    # Install core ML packages
    core_packages = [
        "lightgbm",
        "scikit-learn",
        "pandas",
        "numpy",
        "matplotlib",
        "seaborn",
        "joblib",
        "optuna"
    ]
    
    print("🔧 Installing core ML packages...")
    for package in core_packages:
        try:
            result = subprocess.run(
                ["pip", "install", package, "--quiet"],
                capture_output=True,
                text=True,
                timeout=120
            )
            if result.returncode == 0:
                print(f"✅ {package}")
            else:
                print(f"⚠️  {package} - installation warning")
        except subprocess.TimeoutExpired:
            print(f"⏰ {package} - installation timeout, continuing...")
        except Exception as e:
            print(f"❌ {package} - error: {e}")
    
    # Optional packages (with fallback)
    optional_packages = {
        "xgboost": "XGBoost models",
        "catboost": "CatBoost models",
        "shap": "Model explainability"
    }
    
    print("\n🔧 Installing optional packages...")
    for package, description in optional_packages.items():
        try:
            result = subprocess.run(
                ["pip", "install", package, "--quiet"],
                capture_output=True,
                text=True,
                timeout=180
            )
            if result.returncode == 0:
                print(f"✅ {package} - {description}")
            else:
                print(f"⚠️  {package} - failed, will skip {description}")
        except Exception as e:
            print(f"⚠️  {package} - failed ({description}), continuing...")
    
    print("\n📋 Verifying installations...")
    
    # Verify key packages
    verification_results = {}
    test_imports = {
        "pandas": "pd",
        "numpy": "np", 
        "sklearn": None,
        "lightgbm": "lgb",
        "joblib": None
    }
    
    for module, alias in test_imports.items():
        try:
            if alias:
                exec(f"import {module} as {alias}")
            else:
                exec(f"import {module}")
            verification_results[module] = "✅"
        except ImportError:
            verification_results[module] = "❌"
    
    print("Verification results:")
    for module, status in verification_results.items():
        print(f"  {status} {module}")
    
    # Check optional packages
    optional_verification = {}
    for package in ["xgboost", "catboost", "shap"]:
        try:
            exec(f"import {package}")
            optional_verification[package] = "✅"
        except ImportError:
            optional_verification[package] = "⚠️  (will skip)"
    
    print("\nOptional packages:")
    for package, status in optional_verification.items():
        print(f"  {status} {package}")
    
    print("\n✅ Dependency installation complete!")
    return True

# Install dependencies
install_success = install_dependencies()