# 🌊 DRL Coastal Emergency Warning System - Colab Training

Train DRL agents with free GPU acceleration on Google Colab.

**Features:**
- ✅ Automatic setup from GitHub
- ✅ Free Tesla T4 GPU (15-30 min training)
- ✅ Uses your existing configuration files
- ✅ Download trained models
- ✅ Compatible with local dashboard

**Workflow:**
1. Configure experiment locally (dashboard)
2. Push config to GitHub
3. Run training on Colab (this notebook)
4. Download checkpoint
5. Evaluate locally (dashboard)

---

## 📦 Setup & Installation

Run this cell first to clone repo and install dependencies.

In [None]:
%%capture
# Clone repository
import os
if os.path.exists('DRL_Synthetic'):
    print("✅ Repository already cloned")
else:
    !git clone https://github.com/marcellosano/DRL_Synthetic.git
    print("✅ Repository cloned")

%cd DRL_Synthetic

# Install dependencies
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 -q
!pip install numpy pandas matplotlib seaborn scikit-learn pyyaml -q

print("\n✅ Setup complete!")

# Check GPU
import torch
if torch.cuda.is_available():
    print(f"\n🚀 GPU Available: {torch.cuda.get_device_name(0)}")
    print(f"   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("\n⚠️  No GPU available. Training will be slow.")

## ⚙️ Configuration

Select which configuration to train with.

In [None]:
# Available configurations
CONFIGS = {
    'quick_test': 'config/experiments/quick_test.yaml',
    'reward_tuning': 'config/experiments/reward_tuning.yaml',
    'hyperparameter_sweep': 'config/experiments/hyperparameter_sweep.yaml',
    'base': 'config/base.yaml',
}

# SELECT YOUR CONFIG HERE
selected_config = 'quick_test'  # Change this to train different configs

config_path = CONFIGS[selected_config]
print(f"📋 Selected configuration: {selected_config}")
print(f"   Path: {config_path}")

# Display configuration
import yaml
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

print(f"\n🎯 Training Settings:")
print(f"   Episodes: {config['training']['episodes']}")
print(f"   Batch Size: {config['training']['batch_size']}")
print(f"   Learning Rate: {config['training']['learning_rate']}")
print(f"\n💰 Reward Weights:")
print(f"   Lives Saved: {config['reward']['lives_saved_weight']}")
print(f"   Early Warning Bonus: {config['reward']['early_warning_bonus']}")
print(f"   False Alarm Penalty: {config['reward']['false_alarm_penalty']}")

## 🚀 Training

Run training with progress updates.

In [None]:
from dashboard.utils.trainer import TrainingSession
from IPython.display import clear_output
import time

# Create training session
print("🎓 Initializing training session...")
session = TrainingSession(config_path, run_name=f"colab_{selected_config}")

print(f"✅ Training session created: {session.run.name}")
print(f"   Run directory: {session.run_dir}")
print(f"   Total episodes: {session.total_episodes}")
print(f"\n🚀 Starting training...\n")

# Add callback for progress updates
def progress_callback(metrics):
    """Display training progress"""
    episode = metrics['episode']
    if episode % 10 == 0 or episode < 10:
        clear_output(wait=True)
        progress = (episode / session.total_episodes) * 100
        
        print(f"🎓 Training Progress: {progress:.1f}%")
        print(f"   Episode: {episode}/{session.total_episodes}")
        print(f"   Reward: {metrics.get('reward', 0):.2f}")
        print(f"   Lives Lost: {metrics.get('lives_lost', 0)}")
        print(f"   Policy Loss: {metrics.get('policy_loss', 0):.4f}")
        print(f"   Value Loss: {metrics.get('value_loss', 0):.4f}")
        print(f"\n{'█' * int(progress/2)}{'░' * (50-int(progress/2))}")

session.add_callback(progress_callback)

# Train
start_time = time.time()
session.train()
elapsed = time.time() - start_time

print(f"\n✅ Training complete!")
print(f"   Total time: {elapsed/60:.1f} minutes")
print(f"   Episodes: {session.total_episodes}")
print(f"   Final reward: {session.metrics['episode_rewards'][-1]:.2f}")
print(f"   Average reward (last 100): {sum(session.metrics['episode_rewards'][-100:])/100:.2f}")

## 📊 Training Results

Visualize training metrics.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Create visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Training Results', fontsize=16, fontweight='bold')

# Episode Rewards
ax1 = axes[0, 0]
episodes = list(range(len(session.metrics['episode_rewards'])))
rewards = session.metrics['episode_rewards']
ax1.plot(episodes, rewards, alpha=0.3, label='Reward')
if len(rewards) >= 10:
    ma = np.convolve(rewards, np.ones(10)/10, mode='valid')
    ax1.plot(range(len(ma)), ma, linewidth=2, label='MA(10)')
ax1.set_xlabel('Episode')
ax1.set_ylabel('Reward')
ax1.set_title('Episode Rewards')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Lives Lost
ax2 = axes[0, 1]
lives_lost = session.metrics['lives_lost']
ax2.plot(episodes, lives_lost, color='red', linewidth=2)
ax2.set_xlabel('Episode')
ax2.set_ylabel('Lives Lost')
ax2.set_title('Lives Lost per Episode')
ax2.grid(True, alpha=0.3)

# Policy Loss
ax3 = axes[1, 0]
if session.metrics['policy_loss']:
    ax3.plot(episodes, session.metrics['policy_loss'], color='orange', linewidth=2)
    ax3.set_xlabel('Episode')
    ax3.set_ylabel('Loss')
    ax3.set_title('Policy Loss')
    ax3.grid(True, alpha=0.3)

# Cumulative Damage
ax4 = axes[1, 1]
if session.metrics['cumulative_damage']:
    ax4.plot(episodes, session.metrics['cumulative_damage'], color='purple', linewidth=2)
    ax4.set_xlabel('Episode')
    ax4.set_ylabel('Damage ($)')
    ax4.set_title('Cumulative Damage')
    ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Summary statistics
print("\n📈 Training Summary:")
print(f"   Mean Reward: {np.mean(rewards):.2f} ± {np.std(rewards):.2f}")
print(f"   Mean Lives Lost: {np.mean(lives_lost):.1f} ± {np.std(lives_lost):.1f}")
print(f"   Best Reward: {max(rewards):.2f} (Episode {rewards.index(max(rewards))})")
print(f"   Worst Reward: {min(rewards):.2f} (Episode {rewards.index(min(rewards))})")

## 💾 Download Results

Download trained model and metrics to use in local dashboard.

In [None]:
from google.colab import files
import shutil
import json

print("📦 Preparing files for download...")

# Create download package
download_dir = f"colab_results_{session.run.name}"
os.makedirs(download_dir, exist_ok=True)

# Copy checkpoint
checkpoint_path = session.checkpoint_dir / "final.pt"
if checkpoint_path.exists():
    shutil.copy(checkpoint_path, f"{download_dir}/model.pt")
    print(f"   ✅ Checkpoint copied: model.pt")

# Copy metrics
if session.metrics_file.exists():
    shutil.copy(session.metrics_file, f"{download_dir}/metrics.json")
    print(f"   ✅ Metrics copied: metrics.json")

# Copy config
config_snapshot = session.run_dir / "config.yaml"
if config_snapshot.exists():
    shutil.copy(config_snapshot, f"{download_dir}/config.yaml")
    print(f"   ✅ Config copied: config.yaml")

# Create summary
summary = {
    'run_name': session.run.name,
    'config': selected_config,
    'episodes': session.total_episodes,
    'training_time_minutes': elapsed / 60,
    'final_reward': session.metrics['episode_rewards'][-1],
    'mean_reward': float(np.mean(session.metrics['episode_rewards'])),
    'mean_lives_lost': float(np.mean(session.metrics['lives_lost'])),
}
with open(f"{download_dir}/summary.json", 'w') as f:
    json.dump(summary, f, indent=2)
print(f"   ✅ Summary created: summary.json")

# Create zip file
print("\n🗜️  Creating zip archive...")
shutil.make_archive(download_dir, 'zip', download_dir)
zip_file = f"{download_dir}.zip"

print(f"\n✅ Package ready: {zip_file}")
print(f"   Size: {os.path.getsize(zip_file) / 1e6:.1f} MB")

# Download
print("\n📥 Downloading...")
files.download(zip_file)

print("\n✅ Download complete!")
print("\n📋 Next Steps:")
print("   1. Extract the zip file locally")
print("   2. Copy model.pt to your runs directory")
print("   3. Open dashboard → Evaluation → Load model.pt")
print("   4. Run evaluation and compare with other models")

## 🔄 Batch Training (Optional)

Train multiple configurations in sequence.

In [None]:
# OPTIONAL: Train multiple configs
# Uncomment and modify as needed

# configs_to_train = [
#     'config/experiments/quick_test.yaml',
#     'config/experiments/reward_tuning.yaml',
# ]

# for config_path in configs_to_train:
#     print(f"\n{'='*60}")
#     print(f"Training: {config_path}")
#     print(f"{'='*60}\n")
#     
#     session = TrainingSession(config_path)
#     session.add_callback(progress_callback)
#     session.train()
#     
#     print(f"✅ Completed: {session.run.name}")
#     print(f"   Final reward: {session.metrics['episode_rewards'][-1]:.2f}")

print("Batch training cell (commented out by default)")