# BITCOIN4Traders - Google Colab Training

**Anleitung:**
1. Gehe zu `Runtime > Change runtime type` und wähle **GPU (T4)**
2. Führe alle Zellen der Reihe nach aus
3. Das Modell wird automatisch auf Google Drive gespeichert
4. Bei Unterbrechung: Zelle 1-4 erneut ausführen, dann Resume-Zelle

---

## Zelle 1: GPU prüfen

In [None]:
import torch

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f'GPU verfügbar: {gpu_name}')
    print(f'GPU Speicher: {gpu_mem:.1f} GB')
    DEVICE = 'cuda'
else:
    print('WARNUNG: Keine GPU gefunden! Gehe zu Runtime > Change runtime type > GPU')
    DEVICE = 'cpu'

print(f'Verwende Device: {DEVICE}')

## Zelle 2: Google Drive mounten (für persistente Speicherung)

In [None]:
from google.colab import drive
import os

# Google Drive mounten
drive.mount('/content/drive')

# Projektordner auf Drive erstellen
DRIVE_PROJECT_DIR = '/content/drive/MyDrive/BITCOIN4Traders'
DRIVE_MODEL_DIR = f'{DRIVE_PROJECT_DIR}/models'
DRIVE_DATA_DIR = f'{DRIVE_PROJECT_DIR}/data'
DRIVE_LOG_DIR = f'{DRIVE_PROJECT_DIR}/logs'

os.makedirs(DRIVE_MODEL_DIR, exist_ok=True)
os.makedirs(DRIVE_DATA_DIR, exist_ok=True)
os.makedirs(DRIVE_LOG_DIR, exist_ok=True)

print(f'Google Drive gemountet.')
print(f'Modelle werden gespeichert in: {DRIVE_MODEL_DIR}')
print(f'Daten werden gespeichert in: {DRIVE_DATA_DIR}')

## Zelle 3: Repository klonen / Projekt hochladen

In [None]:
import os

PROJECT_DIR = '/content/BITCOIN4Traders'

if not os.path.exists(PROJECT_DIR):
    print('Klone Projekt von GitHub...')
    !git clone https://github.com/juancarlosrial76-code/BITCOIN4Traders.git {PROJECT_DIR}
    print('Fertig!')
else:
    print('Projekt bereits vorhanden. Aktualisiere...')
    !git -C {PROJECT_DIR} pull
    print('Aktualisiert!')

print(f'Arbeitsverzeichnis: {PROJECT_DIR}')


## Zelle 4: Dependencies installieren

In [None]:
%%time
print('Installiere Dependencies...')

!pip install -q ccxt loguru pyarrow pandas numpy scipy gymnasium stable-baselines3 ta yfinance numba hmmlearn scikit-learn pyyaml pydantic python-dotenv tqdm joblib matplotlib plotly omegaconf

# Sicherstellen dass ccxt wirklich installiert ist
import importlib
for pkg in ['ccxt', 'loguru', 'pyarrow', 'gymnasium', 'omegaconf']:
    try:
        importlib.import_module(pkg)
        print(f'  OK: {pkg}')
    except ImportError:
        print(f'  FEHLT: {pkg} - installiere nochmals...')
        import subprocess
        subprocess.run(['pip', 'install', '-q', pkg], check=True)

print('Installation abgeschlossen!')


## Zelle 5: Python-Pfad setzen

In [None]:
import sys
import os

PROJECT_DIR = '/content/BITCOIN4Traders'
SRC_DIR = os.path.join(PROJECT_DIR, 'src')

# Pfade hinzufügen
for path in [PROJECT_DIR, SRC_DIR]:
    if path not in sys.path:
        sys.path.insert(0, path)

# In Projektordner wechseln
os.chdir(PROJECT_DIR)
print(f'Arbeitsverzeichnis: {os.getcwd()}')
print(f'Python-Pfad enthält: {SRC_DIR}')

# Notwendige Verzeichnisse erstellen
dirs = [
    'data/cache',
    'data/processed', 
    'data/models/adversarial',
    'logs/training'
]
for d in dirs:
    os.makedirs(d, exist_ok=True)

print('Verzeichnisse erstellt.')

## Zelle 6: Daten von Drive laden oder herunterladen

In [None]:
import shutil
import os

DRIVE_DATA_DIR = '/content/drive/MyDrive/BITCOIN4Traders/data'
LOCAL_CACHE_DIR = '/content/BITCOIN4Traders/data/cache'

# Prüfen ob gecachte Daten auf Drive vorhanden
drive_cache_files = []
if os.path.exists(DRIVE_DATA_DIR):
    drive_cache_files = [f for f in os.listdir(DRIVE_DATA_DIR) if f.endswith('.parquet')]

if drive_cache_files:
    print(f'Lade gecachte Daten von Drive: {drive_cache_files}')
    for fname in drive_cache_files:
        src = os.path.join(DRIVE_DATA_DIR, fname)
        dst = os.path.join(LOCAL_CACHE_DIR, fname)
        shutil.copy2(src, dst)
        print(f'  Kopiert: {fname}')
    print('Daten erfolgreich von Drive geladen!')
else:
    print('Keine gecachten Daten auf Drive gefunden.')
    print('Daten werden beim Training von Binance heruntergeladen.')
    print('(Dies dauert einige Minuten beim ersten Start)')

## Zelle 7: Training-Konfiguration

In [None]:
import torch

# ===== TRAINING-EINSTELLUNGEN =====
# Diese Werte kannst du anpassen

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# Datensatz
SYMBOL = 'BTC/USDT'        # Handelspaar
TIMEFRAME = '1h'            # Zeitrahmen: 1m, 5m, 15m, 1h, 4h, 1d
START_DATE = '2022-01-01'   # Startdatum (mehr Daten = besseres Training)
END_DATE = None              # None = bis heute
EXCHANGE = 'binance'

# Training
N_ITERATIONS = 500           # Anzahl Trainingsiterationen
STEPS_PER_ITER = 2048        # Schritte pro Iteration
SAVE_FREQUENCY = 25          # Speichern alle N Iterationen (öfter als Standard)

# Checkpoint (für Resume)
RESUME_CHECKPOINT = None     # Pfad zu Checkpoint, z.B. 'data/models/adversarial/checkpoint_iter_100.pth'

print('Konfiguration:')
print(f'  Device:     {DEVICE}')
print(f'  Symbol:     {SYMBOL}')
print(f'  Timeframe:  {TIMEFRAME}')
print(f'  Start:      {START_DATE}')
print(f'  Iterationen: {N_ITERATIONS}')
print(f'  Save alle:  {SAVE_FREQUENCY} Iterationen')

## Zelle 8: Daten laden & Features berechnen

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from loguru import logger
import sys, os, importlib.util

# Pfade
PROJECT_DIR = '/content/BITCOIN4Traders'
SRC_DIR = os.path.join(PROJECT_DIR, 'src')

# Prüfe ob Projekt vorhanden
if not os.path.exists(SRC_DIR):
    raise RuntimeError(f"FEHLER: {SRC_DIR} nicht gefunden! Führe zuerst Zelle 2 (git clone) aus.")

# Prüfe ob ccxt_loader vorhanden
ccxt_loader_path = os.path.join(SRC_DIR, 'data', 'ccxt_loader.py')
if not os.path.exists(ccxt_loader_path):
    raise RuntimeError(f"FEHLER: {ccxt_loader_path} nicht gefunden!")

# Prüfe ob ccxt installiert
try:
    import ccxt
except ImportError:
    print("ccxt fehlt - installiere...")
    import subprocess
    subprocess.run(['pip', 'install', '-q', 'ccxt'], check=True)
    import ccxt

# Logging
logger.remove()
logger.add(sys.stdout, format="{time:HH:mm:ss} | {level} | {message}", level="INFO")

# Verzeichnisse mit absolutem Pfad
cache_dir = Path(os.path.join(PROJECT_DIR, 'data', 'cache'))
processed_dir = Path(os.path.join(PROJECT_DIR, 'data', 'processed'))
cache_dir.mkdir(parents=True, exist_ok=True)
processed_dir.mkdir(parents=True, exist_ok=True)

# Daten laden
cached_files = list(cache_dir.glob('*.parquet'))

if cached_files:
    logger.info(f'Lade gecachte Daten: {cached_files[0]}')
    price_data = pd.read_parquet(cached_files[0])
    logger.success(f'Geladen: {len(price_data)} Candles')
else:
    logger.info(f'Lade Daten von {EXCHANGE}...')

    # Lade ccxt_loader direkt über Dateipfad
    spec = importlib.util.spec_from_file_location("ccxt_loader", ccxt_loader_path)
    ccxt_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(ccxt_module)
    CCXTDataLoader = ccxt_module.CCXTDataLoader
    DataLoaderConfig = ccxt_module.DataLoaderConfig

    config = DataLoaderConfig(
        exchange_id=EXCHANGE,
        exchange_type='spot',
        rate_limit_ms=100,
        cache_dir=cache_dir,
        processed_dir=processed_dir,
        compression='snappy',
    )
    loader = CCXTDataLoader(config)
    price_data = loader.download_and_cache(
        symbol=SYMBOL,
        timeframe=TIMEFRAME,
        start_date=START_DATE,
        end_date=END_DATE,
        force_refresh=False,
    )
    logger.success(f'Heruntergeladen: {len(price_data)} Candles')

    # Daten auf Drive sichern
    import shutil
    drive_data_dir = '/content/drive/MyDrive/BITCOIN4Traders/data'
    if os.path.exists('/content/drive/MyDrive'):
        os.makedirs(drive_data_dir, exist_ok=True)
        for f in list(cache_dir.glob('*.parquet')):
            dst = os.path.join(drive_data_dir, f.name)
            shutil.copy2(str(f), dst)
            logger.info(f'Drive gespeichert: {dst}')

print(f'\nDatensatz: {len(price_data)} Zeilen')
print(f'Zeitraum: {price_data.index[0]} bis {price_data.index[-1]}')
price_data.head()


## Zelle 9: Feature Engineering

In [None]:
from features.feature_engine import FeatureEngine, FeatureConfig

logger.info('Feature Engineering...')

feature_config = FeatureConfig(
    volatility_window=20,
    ou_window=20,
    rolling_mean_window=20,
    use_log_returns=True,
    scaler_type='standard',
    save_scaler=True,
    scaler_path=processed_dir,
    dropna_strategy='rolling',
    min_valid_rows=1000,
)

engine = FeatureEngine(feature_config)

# Chronologischer Split: 70% Train, 15% Val, 15% Test
n = len(price_data)
train_idx = int(n * 0.70)
val_idx = int(n * 0.85)

train_data = price_data.iloc[:train_idx]
val_data = price_data.iloc[train_idx:val_idx]
test_data = price_data.iloc[val_idx:]

logger.info(f'Split: Train={len(train_data)}, Val={len(val_data)}, Test={len(test_data)}')

# Fit NUR auf Trainingsdaten (kein Data Leakage!)
logger.info('Fit FeatureEngine auf Trainingsdaten...')
train_features = engine.fit_transform(train_data)

logger.info('Transformiere Val und Test...')
val_features = engine.transform(val_data)
test_features = engine.transform(test_data)

# Indizes angleichen
common_train = train_data.index.intersection(train_features.index)
train_price = train_data.loc[common_train]
train_feat = train_features.loc[common_train]

logger.success(f'Features berechnet: {train_feat.shape[1]} Features, {len(train_price)} Trainingssamples')
print(f'Feature-Spalten: {list(train_feat.columns[:5])}...')

## Zelle 10: Environment erstellen

In [None]:
from environment.config_integrated_env import ConfigIntegratedTradingEnv
from environment.config_system import EnvironmentConfig, load_environment_config_from_yaml

config_path = Path('config/environment/realistic_env.yaml')

if config_path.exists():
    env_config = load_environment_config_from_yaml(str(config_path))
    logger.info('Environment-Config geladen')
else:
    env_config = EnvironmentConfig()
    logger.warning('Verwende Standard-Config')

env = ConfigIntegratedTradingEnv(train_price, train_feat, env_config)

logger.success('Trading Environment erstellt')
print(f'Observation Space: {env.observation_space.shape}')
print(f'Action Space: {env.action_space.n}')

## Zelle 11: Trainer erstellen

In [None]:
from agents.ppo_agent import PPOConfig
from training.adversarial_trainer import AdversarialTrainer, AdversarialConfig

state_dim = env.observation_space.shape[0]
n_actions = env.action_space.n

# Trader (optimiert für Profit)
trader_config = PPOConfig(
    state_dim=state_dim,
    hidden_dim=128,
    n_actions=n_actions,
    actor_lr=3e-4,
    critic_lr=1e-3,
    gamma=0.99,
    gae_lambda=0.95,
    clip_epsilon=0.2,
    n_epochs=10,
    batch_size=64,
    use_recurrent=True,
    rnn_type='GRU',
    entropy_coef=0.01,
    value_loss_coef=0.5,
    max_grad_norm=0.5,
    target_kl=0.01,
)

# Adversary (erschafft schwierige Szenarien)
adversary_config = PPOConfig(
    state_dim=state_dim,
    hidden_dim=128,
    n_actions=n_actions,
    actor_lr=1e-4,
    critic_lr=5e-4,
    gamma=0.99,
    gae_lambda=0.95,
    clip_epsilon=0.2,
    n_epochs=10,
    batch_size=64,
    use_recurrent=True,
    rnn_type='GRU',
    entropy_coef=0.02,
)

# Training-Konfiguration
training_config = AdversarialConfig(
    n_iterations=N_ITERATIONS,
    steps_per_iteration=STEPS_PER_ITER,
    trader_config=trader_config,
    adversary_config=adversary_config,
    adversary_start_iteration=100,
    adversary_strength=0.1,
    save_frequency=SAVE_FREQUENCY,
    log_frequency=10,
    checkpoint_dir='data/models/adversarial',
)

trainer = AdversarialTrainer(env, training_config, device=DEVICE)

logger.success('Trainer erstellt')
print(f'State dim: {state_dim}, Actions: {n_actions}')
print(f'Device: {DEVICE}')
print(f'Iterationen: {N_ITERATIONS}')

## Zelle 12: [Optional] Von Checkpoint weitermachen

In [None]:
from loguru import logger
import os
import shutil

DRIVE_MODEL_DIR = '/content/drive/MyDrive/BITCOIN4Traders/models'
LOCAL_MODEL_DIR = '/content/BITCOIN4Traders/data/models/adversarial'

# Prüfe ob Checkpoints auf Drive vorhanden
drive_checkpoints = []
if os.path.exists(DRIVE_MODEL_DIR):
    drive_checkpoints = sorted(
        [f for f in os.listdir(DRIVE_MODEL_DIR) if f.endswith('.pth')]
    )

if drive_checkpoints:
    latest = drive_checkpoints[-1]
    src = os.path.join(DRIVE_MODEL_DIR, latest)
    dst = os.path.join(LOCAL_MODEL_DIR, latest)
    shutil.copy2(src, dst)
    
    logger.info(f'Lade Checkpoint: {latest}')
    try:
        trainer.load_checkpoint(dst)
        logger.success(f'Checkpoint geladen: {latest}')
    except Exception as e:
        logger.error(f'Fehler beim Laden: {e}')
else:
    logger.info('Kein Checkpoint gefunden - starte Training von Anfang an')

# Manuell einen Checkpoint angeben:
# RESUME_CHECKPOINT = 'data/models/adversarial/checkpoint_iter_200.pth'
# if RESUME_CHECKPOINT and os.path.exists(RESUME_CHECKPOINT):
#     trainer.load_checkpoint(RESUME_CHECKPOINT)

## Zelle 13: Auto-Save Callback einrichten

In [None]:
import os
import shutil
import glob

def sync_models_to_drive():
    """Kopiert alle lokalen Checkpoints auf Google Drive."""
    local_dir = '/content/BITCOIN4Traders/data/models/adversarial'
    drive_dir = '/content/drive/MyDrive/BITCOIN4Traders/models'
    
    checkpoints = glob.glob(os.path.join(local_dir, '*.pth'))
    for cp in checkpoints:
        fname = os.path.basename(cp)
        dst = os.path.join(drive_dir, fname)
        shutil.copy2(cp, dst)
    
    if checkpoints:
        print(f'Drive sync: {len(checkpoints)} Checkpoint(s) gespeichert')

# Test
sync_models_to_drive()
print('Auto-Save Funktion bereit.')

## Zelle 14: TRAINING STARTEN

> **Tipp:** Halte die Seite aktiv (z.B. Tab offen lassen) um Session-Timeouts zu vermeiden.

In [None]:
import time

logger.info('=' * 60)
logger.info('TRAINING STARTET')
logger.info('=' * 60)

start_time = time.time()

try:
    # Training ausführen
    trainer.train()
    
    elapsed = (time.time() - start_time) / 3600
    logger.success(f'Training abgeschlossen! Dauer: {elapsed:.1f} Stunden')

except KeyboardInterrupt:
    logger.warning('Training unterbrochen (KeyboardInterrupt)')
    logger.info('Speichere aktuellen Stand...')

except Exception as e:
    logger.error(f'Fehler: {e}')
    import traceback
    traceback.print_exc()

finally:
    # Immer auf Drive speichern!
    logger.info('Synchronisiere mit Google Drive...')
    sync_models_to_drive()
    logger.success('Modell auf Drive gesichert!')

## Zelle 15: Evaluation

In [None]:
logger.info('Evaluiere trainiertes Modell...')

try:
    metrics = trainer.evaluate(n_episodes=100)
    
    print('\n=== Evaluationsergebnisse ===')
    for key, value in metrics.items():
        if isinstance(value, float):
            print(f'  {key}: {value:.4f}')
        else:
            print(f'  {key}: {value}')

except Exception as e:
    logger.error(f'Evaluation fehlgeschlagen: {e}')
    import traceback
    traceback.print_exc()

## Zelle 16: Checkpoints auf Drive anzeigen

In [None]:
import os

DRIVE_MODEL_DIR = '/content/drive/MyDrive/BITCOIN4Traders/models'

print('Gespeicherte Modelle auf Google Drive:')
print('=' * 50)

if os.path.exists(DRIVE_MODEL_DIR):
    files = sorted(os.listdir(DRIVE_MODEL_DIR))
    total_mb = 0
    for f in files:
        path = os.path.join(DRIVE_MODEL_DIR, f)
        size_mb = os.path.getsize(path) / 1e6
        total_mb += size_mb
        print(f'  {f:40s}  {size_mb:.1f} MB')
    print(f'\nGesamt: {len(files)} Dateien, {total_mb:.1f} MB')
else:
    print('Kein Modellordner auf Drive gefunden.')