## üìã 1. Setup & Environment Check

In [None]:
import sys
import os
from pathlib import Path

# Check GPU
gpu_info = !nvidia-smi
print('\n'.join(gpu_info))

# Check CUDA
import torch
print(f"\n{'='*60}")
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"{'='*60}\n")

## üíæ 2. Mount Google Drive

In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Create project directory in Drive
PROJECT_DIR = Path('/content/drive/MyDrive/RL_Robot_Navigation')
PROJECT_DIR.mkdir(parents=True, exist_ok=True)

# Create subdirectories
(PROJECT_DIR / 'checkpoints').mkdir(exist_ok=True)
(PROJECT_DIR / 'results').mkdir(exist_ok=True)
(PROJECT_DIR / 'logs').mkdir(exist_ok=True)
(PROJECT_DIR / 'videos').mkdir(exist_ok=True)

print(f"‚úÖ Project directory: {PROJECT_DIR}")
print(f"‚úÖ Subdirectories created")

## üì• 3. Clone Repository & Install Dependencies

In [None]:
# Clone repository (replace with your GitHub URL)
REPO_URL = "https://github.com/YOUR_USERNAME/robot_navigation_rl.git"
REPO_DIR = Path('/content/robot_navigation_rl')

if REPO_DIR.exists():
    print("üìÇ Repository already exists, pulling latest changes...")
    !cd {REPO_DIR} && git pull
else:
    print("üì• Cloning repository...")
    !git clone {REPO_URL} {REPO_DIR}

# Change to project directory
os.chdir(REPO_DIR)
print(f"\n‚úÖ Working directory: {os.getcwd()}")

In [None]:
# Install dependencies
print("üì¶ Installing dependencies...")
!pip install -q torch torchvision torchaudio
!pip install -q gymnasium numpy matplotlib pandas
!pip install -q tensorboard wandb optuna
!pip install -q tqdm psutil GPUtil
!pip install -q imageio imageio-ffmpeg pillow

# Install project in development mode
!pip install -q -e .

print("\n‚úÖ All dependencies installed!")

## ‚öôÔ∏è 4. Configuration

In [None]:
# Training configuration
config = {
    # Environment
    'env_size': (10, 10),
    'num_obstacles': 5,
    'num_goals': 3,
    
    # Agent
    'algorithm': 'DQN',  # or 'RainbowDQN', 'AdaptiveDQN'
    'hidden_dims': [128, 128],
    'learning_rate': 1e-3,
    'gamma': 0.99,
    'batch_size': 64,
    'buffer_size': 50000,
    'target_update': 500,
    
    # Training
    'num_episodes': 2000,
    'max_steps': 200,
    'epsilon_start': 1.0,
    'epsilon_end': 0.01,
    'epsilon_decay': 0.995,
    
    # Checkpointing
    'checkpoint_interval': 100,
    'save_best_only': True,
    
    # Paths (on Google Drive)
    'checkpoint_dir': str(PROJECT_DIR / 'checkpoints'),
    'results_dir': str(PROJECT_DIR / 'results'),
    'log_dir': str(PROJECT_DIR / 'logs'),
    'video_dir': str(PROJECT_DIR / 'videos'),
    
    # Resource management
    'use_amp': True,  # Mixed precision
    'num_workers': 2,  # Data loading
    'prefetch_factor': 2,
    
    # Monitoring
    'wandb_project': 'robot-navigation-colab',
    'wandb_enabled': False,  # Set to True if using WandB
    'tensorboard_enabled': True,
}

# Email notification (optional)
NOTIFICATION_EMAIL = "your_email@example.com"  # Change this
SEND_EMAIL_NOTIFICATION = False  # Set to True to enable

print("‚öôÔ∏è Configuration:")
for key, value in config.items():
    print(f"  {key}: {value}")

## üîÑ 5. Auto-Resume Helper

In [None]:
import json
import glob
from pathlib import Path

def find_latest_checkpoint(checkpoint_dir):
    """Find the latest checkpoint in directory."""
    checkpoint_files = glob.glob(str(Path(checkpoint_dir) / '*.pt'))
    if not checkpoint_files:
        return None
    
    # Sort by modification time
    latest = max(checkpoint_files, key=os.path.getmtime)
    return latest

def load_training_state(checkpoint_path):
    """Load training state from checkpoint."""
    if checkpoint_path is None or not Path(checkpoint_path).exists():
        return None
    
    checkpoint = torch.load(checkpoint_path)
    return checkpoint

# Check for existing checkpoint
latest_checkpoint = find_latest_checkpoint(config['checkpoint_dir'])
if latest_checkpoint:
    print(f"‚úÖ Found checkpoint: {latest_checkpoint}")
    resume_training = input("Resume from this checkpoint? (y/n): ").lower() == 'y'
    if resume_training:
        config['resume_from'] = latest_checkpoint
        print("üîÑ Will resume training from checkpoint")
    else:
        config['resume_from'] = None
        print("üÜï Starting fresh training")
else:
    config['resume_from'] = None
    print("üÜï No checkpoint found, starting fresh training")

## üèãÔ∏è 6. Training Setup

In [None]:
import sys
sys.path.insert(0, str(REPO_DIR))

# Import project modules
from src.agents.dqn_agent import DQNAgent
from src.environment.robot_env import RobotNavigationEnv
from src.training.trainer import Trainer
from src.utils.logger import setup_logger

import torch
import numpy as np
from tqdm.notebook import tqdm

# Set random seeds
torch.manual_seed(42)
np.random.seed(42)

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nüñ•Ô∏è Using device: {device}")

# Create environment
env = RobotNavigationEnv(
    size=config['env_size'],
    num_obstacles=config['num_obstacles'],
    num_goals=config['num_goals']
)

# Create agent
agent = DQNAgent(
    state_dim=env.observation_space.shape[0],
    action_dim=env.action_space.n,
    hidden_dims=config['hidden_dims'],
    learning_rate=config['learning_rate'],
    gamma=config['gamma'],
    buffer_size=config['buffer_size'],
    batch_size=config['batch_size'],
    device=device
)

# Setup logger
logger = setup_logger(
    log_dir=config['log_dir'],
    tensorboard=config['tensorboard_enabled']
)

print("\n‚úÖ Training setup complete!")

## üöÄ 7. Training Loop with Auto-Save

In [None]:
import time
import signal
from datetime import datetime, timedelta

# Colab timeout handling
COLAB_TIMEOUT = 12 * 3600  # 12 hours
SAVE_BEFORE_TIMEOUT = 10 * 60  # Save 10 minutes before timeout
start_time = time.time()

def check_timeout():
    """Check if approaching Colab timeout."""
    elapsed = time.time() - start_time
    return elapsed > (COLAB_TIMEOUT - SAVE_BEFORE_TIMEOUT)

# Training metrics
episode_rewards = []
episode_lengths = []
success_rates = []
losses = []
best_reward = float('-inf')

# Load checkpoint if resuming
start_episode = 0
if config['resume_from']:
    checkpoint = load_training_state(config['resume_from'])
    if checkpoint:
        agent.load_state_dict(checkpoint['agent_state'])
        start_episode = checkpoint['episode']
        best_reward = checkpoint.get('best_reward', float('-inf'))
        print(f"‚úÖ Resumed from episode {start_episode}")

# Progress bar
pbar = tqdm(range(start_episode, config['num_episodes']), desc="Training")

try:
    for episode in pbar:
        # Check for timeout
        if check_timeout():
            print("\n‚ö†Ô∏è Approaching Colab timeout, saving checkpoint...")
            checkpoint_path = Path(config['checkpoint_dir']) / f'checkpoint_timeout_ep{episode}.pt'
            torch.save({
                'episode': episode,
                'agent_state': agent.state_dict(),
                'best_reward': best_reward,
                'metrics': {
                    'rewards': episode_rewards,
                    'lengths': episode_lengths,
                    'success_rates': success_rates,
                    'losses': losses
                }
            }, checkpoint_path)
            print(f"üíæ Saved checkpoint: {checkpoint_path}")
            break
        
        # Training episode
        state = env.reset()
        episode_reward = 0
        episode_loss = []
        done = False
        steps = 0
        
        while not done and steps < config['max_steps']:
            # Select action
            epsilon = max(
                config['epsilon_end'],
                config['epsilon_start'] * (config['epsilon_decay'] ** episode)
            )
            action = agent.select_action(state, epsilon)
            
            # Take step
            next_state, reward, done, info = env.step(action)
            
            # Store transition
            agent.store_transition(state, action, reward, next_state, done)
            
            # Train agent
            if agent.can_train():
                loss = agent.train_step()
                episode_loss.append(loss)
            
            episode_reward += reward
            state = next_state
            steps += 1
        
        # Update target network
        if episode % config['target_update'] == 0:
            agent.update_target_network()
        
        # Record metrics
        episode_rewards.append(episode_reward)
        episode_lengths.append(steps)
        if episode_loss:
            losses.append(np.mean(episode_loss))
        
        # Calculate success rate (last 100 episodes)
        recent_rewards = episode_rewards[-100:]
        success_rate = sum(r > 0 for r in recent_rewards) / len(recent_rewards)
        success_rates.append(success_rate)
        
        # Update progress bar
        pbar.set_postfix({
            'reward': f'{episode_reward:.2f}',
            'success': f'{success_rate:.2%}',
            'epsilon': f'{epsilon:.3f}',
            'steps': steps
        })
        
        # Log to TensorBoard
        if config['tensorboard_enabled']:
            logger.log_scalar('reward', episode_reward, episode)
            logger.log_scalar('success_rate', success_rate, episode)
            logger.log_scalar('epsilon', epsilon, episode)
            if episode_loss:
                logger.log_scalar('loss', np.mean(episode_loss), episode)
        
        # Save checkpoint
        if episode % config['checkpoint_interval'] == 0:
            if config['save_best_only']:
                if episode_reward > best_reward:
                    best_reward = episode_reward
                    checkpoint_path = Path(config['checkpoint_dir']) / 'best_model.pt'
                    torch.save({
                        'episode': episode,
                        'agent_state': agent.state_dict(),
                        'best_reward': best_reward,
                        'metrics': {
                            'rewards': episode_rewards,
                            'lengths': episode_lengths,
                            'success_rates': success_rates,
                            'losses': losses
                        }
                    }, checkpoint_path)
            else:
                checkpoint_path = Path(config['checkpoint_dir']) / f'checkpoint_ep{episode}.pt'
                torch.save({
                    'episode': episode,
                    'agent_state': agent.state_dict(),
                    'best_reward': best_reward,
                    'metrics': {
                        'rewards': episode_rewards,
                        'lengths': episode_lengths,
                        'success_rates': success_rates,
                        'losses': losses
                    }
                }, checkpoint_path)

except KeyboardInterrupt:
    print("\n‚ö†Ô∏è Training interrupted, saving checkpoint...")
    checkpoint_path = Path(config['checkpoint_dir']) / f'checkpoint_interrupted_ep{episode}.pt'
    torch.save({
        'episode': episode,
        'agent_state': agent.state_dict(),
        'best_reward': best_reward,
        'metrics': {
            'rewards': episode_rewards,
            'lengths': episode_lengths,
            'success_rates': success_rates,
            'losses': losses
        }
    }, checkpoint_path)
    print(f"üíæ Saved checkpoint: {checkpoint_path}")

print("\n‚úÖ Training complete!")
print(f"üìä Final metrics:")
print(f"  - Episodes trained: {len(episode_rewards)}")
print(f"  - Best reward: {best_reward:.2f}")
print(f"  - Final success rate: {success_rates[-1]:.2%}")
print(f"  - Average reward (last 100): {np.mean(episode_rewards[-100:]):.2f}")

## üìä 8. Quick Visualizations

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Create figure with subplots
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Training Results', fontsize=16, fontweight='bold')

# Plot 1: Episode Rewards
ax1 = axes[0, 0]
ax1.plot(episode_rewards, alpha=0.3, label='Raw')
if len(episode_rewards) > 50:
    moving_avg = np.convolve(episode_rewards, np.ones(50)/50, mode='valid')
    ax1.plot(range(49, len(episode_rewards)), moving_avg, linewidth=2, label='MA(50)')
ax1.set_xlabel('Episode')
ax1.set_ylabel('Reward')
ax1.set_title('Episode Rewards')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Success Rate
ax2 = axes[0, 1]
ax2.plot(success_rates, linewidth=2, color='green')
ax2.axhline(y=0.7, color='r', linestyle='--', label='70% Target')
ax2.set_xlabel('Episode')
ax2.set_ylabel('Success Rate')
ax2.set_title('Success Rate (Last 100 Episodes)')
ax2.set_ylim([0, 1])
ax2.legend()
ax2.grid(True, alpha=0.3)

# Plot 3: Episode Lengths
ax3 = axes[1, 0]
ax3.plot(episode_lengths, alpha=0.5)
if len(episode_lengths) > 50:
    moving_avg = np.convolve(episode_lengths, np.ones(50)/50, mode='valid')
    ax3.plot(range(49, len(episode_lengths)), moving_avg, linewidth=2, color='orange')
ax3.set_xlabel('Episode')
ax3.set_ylabel('Steps')
ax3.set_title('Episode Lengths')
ax3.grid(True, alpha=0.3)

# Plot 4: Loss
ax4 = axes[1, 1]
if losses:
    ax4.plot(losses, alpha=0.5)
    if len(losses) > 50:
        moving_avg = np.convolve(losses, np.ones(50)/50, mode='valid')
        ax4.plot(range(49, len(losses)), moving_avg, linewidth=2, color='red')
    ax4.set_xlabel('Episode')
    ax4.set_ylabel('Loss')
    ax4.set_title('Training Loss')
    ax4.set_yscale('log')
    ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(str(PROJECT_DIR / 'results' / 'training_summary.png'), dpi=150, bbox_inches='tight')
plt.show()

print("\nüíæ Plot saved to:", PROJECT_DIR / 'results' / 'training_summary.png')

## üìß 9. Email Notification (Optional)

In [None]:
if SEND_EMAIL_NOTIFICATION:
    try:
        import smtplib
        from email.mime.text import MIMEText
        from email.mime.multipart import MIMEMultipart
        
        # Email content
        subject = "üöÄ RL Training Complete - Google Colab"
        body = f"""
        Training completed successfully!
        
        üìä Summary:
        - Episodes: {len(episode_rewards)}
        - Best Reward: {best_reward:.2f}
        - Final Success Rate: {success_rates[-1]:.2%}
        - Average Reward (last 100): {np.mean(episode_rewards[-100:]):.2f}
        
        üìÅ Results saved to: {PROJECT_DIR}
        
        Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
        """
        
        # Note: You'll need to configure SMTP settings
        # For Gmail, you may need an app password
        # This is a placeholder - configure with your SMTP server
        print("üìß Email notification:")
        print(subject)
        print(body)
        print("\n‚ö†Ô∏è Configure SMTP settings to enable actual email sending")
        
    except Exception as e:
        print(f"‚ö†Ô∏è Could not send email: {e}")
else:
    print("üìß Email notification disabled")

## üíæ 10. Save Final Results

In [None]:
import json
from datetime import datetime

# Save metrics to JSON
metrics = {
    'training_date': datetime.now().isoformat(),
    'config': config,
    'episodes_trained': len(episode_rewards),
    'best_reward': float(best_reward),
    'final_success_rate': float(success_rates[-1]) if success_rates else 0.0,
    'episode_rewards': [float(r) for r in episode_rewards],
    'episode_lengths': [int(l) for l in episode_lengths],
    'success_rates': [float(sr) for sr in success_rates],
    'losses': [float(l) for l in losses] if losses else []
}

metrics_path = PROJECT_DIR / 'results' / f'metrics_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
with open(metrics_path, 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"‚úÖ Metrics saved to: {metrics_path}")

# Save final model
final_model_path = PROJECT_DIR / 'checkpoints' / 'final_model.pt'
torch.save({
    'episode': len(episode_rewards),
    'agent_state': agent.state_dict(),
    'best_reward': best_reward,
    'config': config
}, final_model_path)

print(f"‚úÖ Final model saved to: {final_model_path}")

print("\n" + "="*60)
print("üéâ ALL DONE! Training completed successfully.")
print("="*60)
print(f"\nüìÅ All results saved to Google Drive: {PROJECT_DIR}")
print("\nüí° Next steps:")
print("   1. Download results from Google Drive")
print("   2. Run visualizations locally")
print("   3. Evaluate model on test scenarios")
print("   4. Resume training if needed (checkpoint saved)")

## üé¨ 11. Test Trained Agent (Optional)

In [None]:
# Test the trained agent
print("üß™ Testing trained agent...\n")

test_episodes = 10
test_rewards = []
test_successes = []

for ep in range(test_episodes):
    state = env.reset()
    episode_reward = 0
    done = False
    steps = 0
    
    while not done and steps < config['max_steps']:
        # Use greedy policy (epsilon=0)
        action = agent.select_action(state, epsilon=0.0)
        next_state, reward, done, info = env.step(action)
        
        episode_reward += reward
        state = next_state
        steps += 1
    
    test_rewards.append(episode_reward)
    test_successes.append(episode_reward > 0)
    
    print(f"  Episode {ep+1}: Reward={episode_reward:.2f}, Steps={steps}, Success={episode_reward > 0}")

print(f"\nüìä Test Results:")
print(f"  - Average Reward: {np.mean(test_rewards):.2f} ¬± {np.std(test_rewards):.2f}")
print(f"  - Success Rate: {sum(test_successes)/len(test_successes):.2%}")
print(f"  - Best Reward: {max(test_rewards):.2f}")
print(f"  - Worst Reward: {min(test_rewards):.2f}")