# 💾 Mount Google Drive

Mount Google Drive to save trained models for long-term storage.

In [None]:
def mount_google_drive():
    """Mount Google Drive safely"""
    global DRIVE_MOUNTED
    
    try:
        from google.colab import drive
        print("🔄 Mounting Google Drive...")
        drive.mount('/content/drive')
        
        # Verify mount
        if os.path.exists('/content/drive/MyDrive'):
            print("✅ Google Drive mounted successfully")
            print(f"📁 Drive path: {MODEL_SAVE_DRIVE_PATH}")
            
            # Create models directory in Drive if it doesn't exist
            os.makedirs(MODEL_SAVE_DRIVE_PATH, exist_ok=True)
            DRIVE_MOUNTED = True
            return True
        else:
            print("❌ Drive mount verification failed")
            return False
            
    except ImportError:
        print("⚠️  Not running in Google Colab - Drive mount skipped")
        return False
    except Exception as e:
        print(f"⚠️  Drive mount failed: {e}")
        print("Continuing without Drive backup...")
        return False

# Mount Google Drive
mount_success = mount_google_drive()

if mount_success:
    print("💡 Models will be saved to both repo and Google Drive")
else:
    print("💡 Models will be saved to repo only")

# 📥 Import Modules

Import the trading bot modules and verify everything is working.

In [None]:
import warnings
warnings.filterwarnings('ignore')

# Core libraries
import pandas as pd
import numpy as np
import json
import joblib
from datetime import datetime, timedelta
from pathlib import Path
import hashlib
import subprocess

# ML libraries
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, mean_squared_error, r2_score

# Optional libraries (with fallbacks)
try:
    import xgboost as xgb
    XGB_AVAILABLE = True
    print("✅ XGBoost available")
except ImportError:
    XGB_AVAILABLE = False
    print("⚠️  XGBoost not available - will skip XGBoost models")

try:
    import optuna
    OPTUNA_AVAILABLE = True
    print("✅ Optuna available")
except ImportError:
    OPTUNA_AVAILABLE = False
    print("⚠️  Optuna not available - will skip hyperparameter optimization")

# Set random seeds
np.random.seed(CFG['seed'])

print(f"\n🔧 Core libraries imported successfully")
print(f"🎯 Random seed: {CFG['seed']}")

In [None]:
def import_trading_modules():
    """Import trading bot modules with fallbacks"""
    
    if not REPO_CLONED:
        print("❌ Repository not available for module imports")
        return False
    
    print("🔄 Importing trading bot modules...")
    
    # Try to import existing modules
    modules_imported = {}
    
    # Feature engineering
    try:
        from arbi.ai.feature_engineering_v2 import compute_features_deterministic, load_feature_schema
        modules_imported['feature_engineering'] = True
        print("✅ Feature engineering module")
    except ImportError as e:
        print(f"⚠️  Feature engineering module not found: {e}")
        modules_imported['feature_engineering'] = False
    
    # Training module
    try:
        from arbi.ai.training_v2 import generate_synthetic_ohlcv_data
        modules_imported['training'] = True
        print("✅ Training module")
    except ImportError:
        try:
            from arbi.ai.train_lgbm import train_and_validate_lgbm
            modules_imported['training'] = True
            print("✅ LightGBM training module")
        except ImportError as e:
            print(f"⚠️  Training module not found: {e}")
            modules_imported['training'] = False
    
    # Model registry
    try:
        from arbi.ai.registry import ModelRegistry
        modules_imported['registry'] = True
        print("✅ Model registry")
    except ImportError as e:
        print(f"⚠️  Model registry not found: {e}")
        modules_imported['registry'] = False
    
    # Inference module
    try:
        from arbi.ai.inference_v2 import ProductionInferenceEngine
        modules_imported['inference'] = True
        print("✅ Inference engine")
    except ImportError:
        try:
            from arbi.ai.inference import InferenceEngine
            modules_imported['inference'] = True
            print("✅ Inference engine (v1)")
        except ImportError as e:
            print(f"⚠️  Inference module not found: {e}")
            modules_imported['inference'] = False
    
    imported_count = sum(modules_imported.values())
    total_count = len(modules_imported)
    
    print(f"\n📊 Module Import Summary: {imported_count}/{total_count} modules imported")
    
    if imported_count == 0:
        print("⚠️  No trading bot modules found - will use fallback implementations")
        return False
    elif imported_count < total_count:
        print("⚠️  Some modules missing - will use fallbacks where needed")
        return True
    else:
        print("✅ All modules imported successfully")
        return True

# Import trading bot modules
modules_available = import_trading_modules()

# 🏋️ Model Training

Train LightGBM and XGBoost models using your existing modules or fallback implementations.

In [None]:
def create_fallback_features(df):
    """Create basic technical indicators as fallback"""
    features = pd.DataFrame(index=df.index)
    
    # Price features
    features['returns'] = df['close'].pct_change()
    features['log_returns'] = np.log(df['close'] / df['close'].shift(1))
    features['price_ma5'] = df['close'].rolling(5).mean()
    features['price_ma20'] = df['close'].rolling(20).mean()
    features['price_ratio_ma5'] = df['close'] / features['price_ma5']
    features['price_ratio_ma20'] = df['close'] / features['price_ma20']
    
    # Volume features
    features['volume_ma5'] = df['volume'].rolling(5).mean()
    features['volume_ratio'] = df['volume'] / features['volume_ma5']
    features['volume_price_trend'] = features['volume_ratio'] * features['returns']
    
    # Volatility
    features['volatility'] = features['returns'].rolling(20).std()
    features['volatility_ratio'] = features['returns'].abs() / features['volatility']
    
    # RSI
    delta = df['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    features['rsi'] = 100 - (100 / (1 + rs))
    
    # MACD
    exp1 = df['close'].ewm(span=12).mean()
    exp2 = df['close'].ewm(span=26).mean()
    features['macd'] = exp1 - exp2
    features['macd_signal'] = features['macd'].ewm(span=9).mean()
    features['macd_hist'] = features['macd'] - features['macd_signal']
    
    return features.dropna()

def generate_fallback_ohlcv_data(n_periods=1000, symbol="BTC-USD"):
    """Generate synthetic OHLCV data"""
    dates = pd.date_range(start='2023-01-01', periods=n_periods, freq='1H')
    
    # Random walk with drift and regime changes
    np.random.seed(CFG['seed'])
    
    # Create regime changes
    regime_changes = np.random.choice(n_periods, size=5, replace=False)
    regime_changes.sort()
    
    returns = []
    current_vol = 0.01
    
    for i in range(n_periods):
        # Change volatility at regime boundaries
        if i in regime_changes:
            current_vol = np.random.uniform(0.005, 0.02)
        
        # Generate return with current volatility
        ret = np.random.normal(0.00005, current_vol)
        returns.append(ret)
    
    returns = np.array(returns)
    prices = 50000 * np.exp(np.cumsum(returns))
    
    data = []
    for i, (date, price) in enumerate(zip(dates, prices)):
        high = price * (1 + abs(np.random.normal(0, 0.005)))
        low = price * (1 - abs(np.random.normal(0, 0.005)))
        open_price = prices[i-1] if i > 0 else price
        volume = np.random.uniform(100, 1000) * (1 + abs(returns[i]) * 10)
        
        data.append({
            'timestamp': date,
            'open': open_price,
            'high': high,
            'low': low,
            'close': price,
            'volume': volume
        })
    
    return pd.DataFrame(data)

def create_training_dataset(n_periods, symbol):
    """Create training dataset with features and labels"""
    
    print(f"🔄 Creating training dataset ({n_periods} periods)...")
    
    # Generate or load OHLCV data
    try:
        if modules_available:
            from arbi.ai.training_v2 import generate_synthetic_ohlcv_data
            df = generate_synthetic_ohlcv_data(n_periods, symbol)
            print("✅ Using repository OHLCV generation")
        else:
            raise ImportError("Using fallback")
    except:
        df = generate_fallback_ohlcv_data(n_periods, symbol)
        print("✅ Using fallback OHLCV generation")
    
    # Compute features
    try:
        if modules_available:
            from arbi.ai.feature_engineering_v2 import compute_features_deterministic
            feature_result = compute_features_deterministic(df, symbol)
            feature_df = feature_result.features
            print("✅ Using repository feature engineering")
        else:
            raise ImportError("Using fallback")
    except:
        feature_df = create_fallback_features(df)
        print("✅ Using fallback feature engineering")
    
    # Create labels
    future_periods = CFG['horizon']
    threshold = CFG['pos_thresh']
    
    # Calculate future returns
    future_returns = df['close'].shift(-future_periods) / df['close'] - 1
    
    # Binary classification: 1 if return > threshold, 0 otherwise
    labels_binary = (future_returns > threshold).astype(int)
    
    # Regression target: actual future return
    labels_regression = future_returns
    
    # Remove rows where we can't calculate future returns
    valid_mask = ~future_returns.isna()
    
    feature_df = feature_df[valid_mask].reset_index(drop=True)
    labels_binary = labels_binary[valid_mask].reset_index(drop=True)
    labels_regression = labels_regression[valid_mask].reset_index(drop=True)
    timestamps = df['timestamp'][valid_mask].reset_index(drop=True)
    
    print(f"✅ Dataset created:")
    print(f"  Samples: {len(feature_df)}")
    print(f"  Features: {feature_df.shape[1]}")
    print(f"  Positive class: {labels_binary.sum()}/{len(labels_binary)} ({100*labels_binary.mean():.1f}%)")
    print(f"  Regression target range: {labels_regression.min():.4f} to {labels_regression.max():.4f}")
    
    return feature_df, labels_binary, labels_regression, timestamps

# Create training dataset
X, y_binary, y_regression, timestamps = create_training_dataset(CFG['n_periods'], SYMBOL)