## üìã 1. Environment Setup

In [None]:
import os
import sys
from pathlib import Path

# Check GPU
import torch
print(f"{'='*60}")
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"{'='*60}\n")

# Setup paths
KAGGLE_WORKING = Path('/kaggle/working')
KAGGLE_INPUT = Path('/kaggle/input')
KAGGLE_TEMP = Path('/kaggle/temp')

# Create directories
(KAGGLE_WORKING / 'checkpoints').mkdir(exist_ok=True)
(KAGGLE_WORKING / 'results').mkdir(exist_ok=True)
(KAGGLE_WORKING / 'logs').mkdir(exist_ok=True)

print("üìÅ Kaggle directories:")
print(f"  Working: {KAGGLE_WORKING}")
print(f"  Input: {KAGGLE_INPUT}")
print(f"  Temp: {KAGGLE_TEMP}")

## üì¶ 2. Install Dependencies

In [None]:
%%capture
# Install required packages (silent installation)
!pip install -q gymnasium
!pip install -q tensorboard wandb optuna
!pip install -q psutil GPUtil
!pip install -q imageio imageio-ffmpeg

print("‚úÖ Dependencies installed!")

## üì• 3. Load Dataset (if using Kaggle Dataset)

In [None]:
# If you've uploaded the project as a Kaggle dataset:
# 1. Upload your project to Kaggle Datasets
# 2. Add it as input to this notebook
# 3. Uncomment and modify the path below

# PROJECT_DATASET = KAGGLE_INPUT / 'robot-navigation-rl'
# if PROJECT_DATASET.exists():
#     sys.path.insert(0, str(PROJECT_DATASET))
#     print(f"‚úÖ Loaded project from dataset: {PROJECT_DATASET}")
# else:
#     print("‚ö†Ô∏è Project dataset not found")

# Alternative: Clone from GitHub
REPO_URL = "https://github.com/YOUR_USERNAME/robot_navigation_rl.git"
REPO_DIR = KAGGLE_WORKING / 'robot_navigation_rl'

if not REPO_DIR.exists():
    print("üì• Cloning repository...")
    !git clone {REPO_URL} {REPO_DIR}
    sys.path.insert(0, str(REPO_DIR))
    print(f"‚úÖ Repository cloned to: {REPO_DIR}")
else:
    sys.path.insert(0, str(REPO_DIR))
    print(f"‚úÖ Using existing repository: {REPO_DIR}")

## ‚öôÔ∏è 4. Configuration

In [None]:
# Training configuration optimized for Kaggle
config = {
    # Environment
    'env_size': (10, 10),
    'num_obstacles': 5,
    'num_goals': 3,
    
    # Agent
    'algorithm': 'RainbowDQN',  # Use advanced algorithm
    'hidden_dims': [256, 256],  # Larger network for Kaggle GPU
    'learning_rate': 1e-3,
    'gamma': 0.99,
    'batch_size': 128,  # Larger batch for better GPU utilization
    'buffer_size': 100000,
    'target_update': 500,
    
    # Training (optimized for Kaggle 9-hour GPU quota)
    'num_episodes': 3000,
    'max_steps': 200,
    'epsilon_start': 1.0,
    'epsilon_end': 0.01,
    'epsilon_decay': 0.995,
    
    # Kaggle-specific
    'checkpoint_interval': 200,
    'save_best_only': True,
    'auto_save_before_timeout': True,
    'timeout_buffer_minutes': 15,  # Save 15 min before timeout
    
    # Paths
    'checkpoint_dir': str(KAGGLE_WORKING / 'checkpoints'),
    'results_dir': str(KAGGLE_WORKING / 'results'),
    'log_dir': str(KAGGLE_WORKING / 'logs'),
    
    # Resource optimization
    'use_amp': True,
    'gradient_accumulation_steps': 1,
    'num_workers': 2,
}

print("‚öôÔ∏è Configuration loaded")
for key, value in config.items():
    print(f"  {key}: {value}")

## üèãÔ∏è 5. Setup Training

In [None]:
# Import project modules
from src.agents.dqn_agent import DQNAgent
from src.environment.robot_env import RobotNavigationEnv
from src.utils.logger import setup_logger

import numpy as np
from tqdm.notebook import tqdm
import time

# Set seeds
torch.manual_seed(42)
np.random.seed(42)

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üñ•Ô∏è Device: {device}\n")

# Create environment
env = RobotNavigationEnv(
    size=config['env_size'],
    num_obstacles=config['num_obstacles'],
    num_goals=config['num_goals']
)
print(f"üåç Environment created")

# Create agent
agent = DQNAgent(
    state_dim=env.observation_space.shape[0],
    action_dim=env.action_space.n,
    hidden_dims=config['hidden_dims'],
    learning_rate=config['learning_rate'],
    gamma=config['gamma'],
    buffer_size=config['buffer_size'],
    batch_size=config['batch_size'],
    device=device
)
print(f"ü§ñ Agent created: {config['algorithm']}")

# Logger
logger = setup_logger(log_dir=config['log_dir'], tensorboard=True)
print(f"üìù Logger initialized\n")

print("‚úÖ Training setup complete!")

## üöÄ 6. Training with GPU Quota Management

In [None]:
import json
from datetime import datetime

# Kaggle GPU quota: 30 hours/week, ~9 hours per session
KAGGLE_SESSION_TIMEOUT = 9 * 3600  # 9 hours in seconds
SAVE_BEFORE_TIMEOUT = config['timeout_buffer_minutes'] * 60  # Convert to seconds

start_time = time.time()

def should_save_and_exit():
    """Check if we should save and exit before timeout."""
    elapsed = time.time() - start_time
    return elapsed > (KAGGLE_SESSION_TIMEOUT - SAVE_BEFORE_TIMEOUT)

def get_remaining_time():
    """Get remaining time in hours."""
    elapsed = time.time() - start_time
    remaining = (KAGGLE_SESSION_TIMEOUT - elapsed) / 3600
    return max(0, remaining)

# Metrics tracking
metrics = {
    'episode_rewards': [],
    'episode_lengths': [],
    'success_rates': [],
    'losses': [],
    'best_reward': float('-inf'),
    'training_time': 0
}

# Load checkpoint if exists
checkpoint_path = Path(config['checkpoint_dir']) / 'latest_checkpoint.pt'
start_episode = 0

if checkpoint_path.exists():
    print(f"üîÑ Loading checkpoint: {checkpoint_path}")
    checkpoint = torch.load(checkpoint_path)
    agent.load_state_dict(checkpoint['agent_state'])
    start_episode = checkpoint['episode']
    metrics.update(checkpoint['metrics'])
    print(f"‚úÖ Resumed from episode {start_episode}\n")

print(f"üèÅ Starting training from episode {start_episode}")
print(f"‚è±Ô∏è Estimated remaining time: {get_remaining_time():.2f} hours\n")

# Training loop
pbar = tqdm(range(start_episode, config['num_episodes']), desc="Training", initial=start_episode, total=config['num_episodes'])

try:
    for episode in pbar:
        # Check timeout
        if should_save_and_exit():
            print("\n‚ö†Ô∏è Approaching Kaggle timeout, saving and exiting...")
            break
        
        # Episode
        state = env.reset()
        episode_reward = 0
        episode_loss = []
        done = False
        steps = 0
        
        # Epsilon decay
        epsilon = max(
            config['epsilon_end'],
            config['epsilon_start'] * (config['epsilon_decay'] ** episode)
        )
        
        while not done and steps < config['max_steps']:
            action = agent.select_action(state, epsilon)
            next_state, reward, done, info = env.step(action)
            
            agent.store_transition(state, action, reward, next_state, done)
            
            if agent.can_train():
                loss = agent.train_step()
                episode_loss.append(loss)
            
            episode_reward += reward
            state = next_state
            steps += 1
        
        # Target network update
        if episode % config['target_update'] == 0:
            agent.update_target_network()
        
        # Record metrics
        metrics['episode_rewards'].append(episode_reward)
        metrics['episode_lengths'].append(steps)
        if episode_loss:
            metrics['losses'].append(np.mean(episode_loss))
        
        # Success rate
        recent_rewards = metrics['episode_rewards'][-100:]
        success_rate = sum(r > 0 for r in recent_rewards) / len(recent_rewards)
        metrics['success_rates'].append(success_rate)
        
        # Update best
        if episode_reward > metrics['best_reward']:
            metrics['best_reward'] = episode_reward
        
        # Progress bar
        pbar.set_postfix({
            'reward': f"{episode_reward:.2f}",
            'success': f"{success_rate:.2%}",
            'best': f"{metrics['best_reward']:.2f}",
            'time_left': f"{get_remaining_time():.1f}h"
        })
        
        # Checkpoint
        if episode % config['checkpoint_interval'] == 0 or episode == config['num_episodes'] - 1:
            checkpoint_data = {
                'episode': episode + 1,
                'agent_state': agent.state_dict(),
                'metrics': metrics,
                'config': config,
                'timestamp': datetime.now().isoformat()
            }
            
            # Save latest
            torch.save(checkpoint_data, checkpoint_path)
            
            # Save best
            if config['save_best_only'] and episode_reward == metrics['best_reward']:
                best_path = Path(config['checkpoint_dir']) / 'best_model.pt'
                torch.save(checkpoint_data, best_path)
            
            # Also save numbered checkpoint every 500 episodes
            if episode % 500 == 0:
                numbered_path = Path(config['checkpoint_dir']) / f'checkpoint_ep{episode}.pt'
                torch.save(checkpoint_data, numbered_path)

except KeyboardInterrupt:
    print("\n‚ö†Ô∏è Training interrupted by user")

finally:
    # Always save final state
    print("\nüíæ Saving final checkpoint...")
    metrics['training_time'] = time.time() - start_time
    
    final_checkpoint = {
        'episode': episode,
        'agent_state': agent.state_dict(),
        'metrics': metrics,
        'config': config,
        'timestamp': datetime.now().isoformat(),
        'final': True
    }
    
    torch.save(final_checkpoint, checkpoint_path)
    torch.save(final_checkpoint, Path(config['checkpoint_dir']) / 'final_model.pt')
    
    print("‚úÖ Checkpoint saved!")
    print(f"\nüìä Training Summary:")
    print(f"  Episodes: {len(metrics['episode_rewards'])}")
    print(f"  Best Reward: {metrics['best_reward']:.2f}")
    print(f"  Final Success Rate: {metrics['success_rates'][-1]:.2%}")
    print(f"  Training Time: {metrics['training_time']/3600:.2f} hours")

## üìä 7. Visualize Results

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Kaggle Training Results', fontsize=18, fontweight='bold')

# Rewards
ax = axes[0, 0]
ax.plot(metrics['episode_rewards'], alpha=0.3, label='Raw', color='blue')
if len(metrics['episode_rewards']) > 50:
    ma = np.convolve(metrics['episode_rewards'], np.ones(50)/50, mode='valid')
    ax.plot(range(49, len(metrics['episode_rewards'])), ma, linewidth=2.5, label='MA(50)', color='darkblue')
ax.set_xlabel('Episode', fontsize=12)
ax.set_ylabel('Reward', fontsize=12)
ax.set_title('Episode Rewards', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

# Success Rate
ax = axes[0, 1]
ax.plot(metrics['success_rates'], linewidth=2, color='green')
ax.axhline(y=0.7, color='r', linestyle='--', linewidth=2, label='Target 70%')
ax.fill_between(range(len(metrics['success_rates'])), 0, metrics['success_rates'], alpha=0.3, color='green')
ax.set_xlabel('Episode', fontsize=12)
ax.set_ylabel('Success Rate', fontsize=12)
ax.set_title('Success Rate (Rolling)', fontsize=14)
ax.set_ylim([0, 1])
ax.legend()
ax.grid(True, alpha=0.3)

# Episode Lengths
ax = axes[1, 0]
ax.plot(metrics['episode_lengths'], alpha=0.4, color='orange')
if len(metrics['episode_lengths']) > 50:
    ma = np.convolve(metrics['episode_lengths'], np.ones(50)/50, mode='valid')
    ax.plot(range(49, len(metrics['episode_lengths'])), ma, linewidth=2.5, color='darkorange')
ax.set_xlabel('Episode', fontsize=12)
ax.set_ylabel('Steps', fontsize=12)
ax.set_title('Episode Lengths', fontsize=14)
ax.grid(True, alpha=0.3)

# Loss
ax = axes[1, 1]
if metrics['losses']:
    ax.plot(metrics['losses'], alpha=0.4, color='red')
    if len(metrics['losses']) > 50:
        ma = np.convolve(metrics['losses'], np.ones(50)/50, mode='valid')
        ax.plot(range(49, len(metrics['losses'])), ma, linewidth=2.5, color='darkred')
    ax.set_xlabel('Episode', fontsize=12)
    ax.set_ylabel('Loss', fontsize=12)
    ax.set_title('Training Loss', fontsize=14)
    ax.set_yscale('log')
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(KAGGLE_WORKING / 'results' / 'training_results.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nüíæ Plot saved to: {KAGGLE_WORKING / 'results' / 'training_results.png'}")

## üíæ 8. Save Results for Download

In [None]:
# Save metrics as JSON
metrics_file = KAGGLE_WORKING / 'results' / 'metrics.json'
with open(metrics_file, 'w') as f:
    # Convert to JSON-serializable format
    json_metrics = {
        'episode_rewards': [float(r) for r in metrics['episode_rewards']],
        'episode_lengths': [int(l) for l in metrics['episode_lengths']],
        'success_rates': [float(sr) for sr in metrics['success_rates']],
        'losses': [float(l) for l in metrics['losses']] if metrics['losses'] else [],
        'best_reward': float(metrics['best_reward']),
        'training_time_hours': metrics['training_time'] / 3600,
        'total_episodes': len(metrics['episode_rewards']),
        'final_success_rate': float(metrics['success_rates'][-1]) if metrics['success_rates'] else 0.0,
        'config': config,
        'timestamp': datetime.now().isoformat()
    }
    json.dump(json_metrics, f, indent=2)

print(f"‚úÖ Metrics saved to: {metrics_file}")

# Create submission-ready package
print("\nüì¶ Files ready for download:")
print(f"  1. Model checkpoint: {KAGGLE_WORKING / 'checkpoints' / 'best_model.pt'}")
print(f"  2. Training metrics: {metrics_file}")
print(f"  3. Visualization: {KAGGLE_WORKING / 'results' / 'training_results.png'}")
print("\nüí° Use 'Save Version' to commit this kernel and download outputs!")

## üèÜ 9. Leaderboard Submission (Optional)

In [None]:
# If this is for a Kaggle competition, prepare submission file
# Modify according to competition requirements

submission = {
    'model_name': config['algorithm'],
    'best_reward': float(metrics['best_reward']),
    'success_rate': float(metrics['success_rates'][-1]),
    'episodes_trained': len(metrics['episode_rewards']),
    'configuration': config
}

submission_file = KAGGLE_WORKING / 'submission.json'
with open(submission_file, 'w') as f:
    json.dump(submission, f, indent=2)

print(f"üèÜ Submission file created: {submission_file}")
print("\nüìã Submission Summary:")
for key, value in submission.items():
    if key != 'configuration':
        print(f"  {key}: {value}")