# Azul Zero Training - Phase 5.0 GPU-Optimized (Google Colab)

This notebook is **optimized for Google Colab T4 GPU**:
- **Reduced games** to minimize CPU-bound self-play
- **Increased epochs** to maximize GPU utilization during training
- **Larger batch size** (128/256) to saturate GPU memory
- **Buffer Size:** 150,000 examples (reduced for faster training cycles)

## GPU-Optimized Curriculum
- **Warmup (Cycles 1-5):** 100 sims, 100 games, 20 epochs, batch=128
- **Scaling (Cycles 6-20):** 200 sims, 250 games, 20 epochs, batch=128
- **High Quality (Cycles 21+):** 400 sims, 500 games, 20 epochs, batch=256

## Setup Instructions
1. Upload your `azul_zero` project folder to Google Drive
2. **Enable GPU**: Runtime ‚Üí Change runtime type ‚Üí T4 GPU
3. Run all cells in order

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Navigate to project directory (adjust path as needed)
import os
PROJECT_PATH = '/content/drive/MyDrive/azul_zero'  # CHANGE THIS to your project path
os.chdir(PROJECT_PATH)
print(f"Working directory: {os.getcwd()}")

In [None]:
# Install dependencies
!pip install -q torch numpy gym

In [None]:
# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("‚ö†Ô∏è WARNING: GPU not detected. Training will be VERY slow.")

## Training Configuration

In [None]:
# Training parameters (GPU-optimized)
TOTAL_CYCLES = 50
MAX_DATASET_SIZE = 150000  # Reduced for faster cycles
CHECKPOINT_DIR = 'data/checkpoints_v5_gpu'
RESUME = True  # Set to False to start from scratch

## Training Loop

In [None]:
import sys
import os
import torch
import time
from datetime import datetime

# Add project src folder to PYTHONPATH
sys.path.insert(0, os.path.abspath('src'))

from azul.env import AzulEnv
from net.azul_net import AzulNet
from train.self_play import generate_self_play_games
from train.dataset import AzulDataset
from train.trainer import Trainer
from players.heuristic_player import HeuristicPlayer
from players.random_player import RandomPlayer
from players.random_plus_player import RandomPlusPlayer
from mcts.mcts import MCTS
import copy

class TrainingLogger:
    def __init__(self, log_dir):
        self.log_dir = log_dir
        os.makedirs(log_dir, exist_ok=True)
        self.log_file = os.path.join(log_dir, "training.log")
        self.buffer = []
        
        with open(self.log_file, "a") as f:
            f.write(f"\n{'='*20}\n[{datetime.now()}] Colab GPU Training Started\n{'='*20}\n")

    def log(self, msg):
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        formatted_msg = f"[{timestamp}] {msg}"
        print(formatted_msg)
        self.buffer.append(formatted_msg)
        
        with open(self.log_file, "a") as f:
            f.write(formatted_msg + "\n")

def get_curriculum_params(cycle):
    """GPU-optimized curriculum: fewer games, more epochs, larger batches"""
    params = {
        'n_games': 250,
        'simulations': 200,
        'epochs': 20,
        'batch_size': 128,
        'lr': 1e-3,
        'cpuct': 1.2,
        'temp_threshold': 0,
        'noise_alpha': 0.3,
        'noise_eps': 0.25
    }
    
    if cycle <= 5:
        # Warmup: Fast cycles for initial learning
        params['n_games'] = 100
        params['simulations'] = 100
        params['epochs'] = 20
        params['batch_size'] = 128
        params['lr'] = 1e-3
        params['cpuct'] = 1.0
    elif cycle <= 20:
        # Scaling: Balanced self-play and training
        params['n_games'] = 250
        params['simulations'] = 200
        params['epochs'] = 20
        params['batch_size'] = 128
        params['lr'] = 5e-4
        params['cpuct'] = 1.2
    else:
        # High Quality: Maximum quality search
        params['n_games'] = 500
        params['simulations'] = 400
        params['epochs'] = 20
        params['batch_size'] = 256  # Use more VRAM
        params['lr'] = 1e-4
        params['cpuct'] = 1.5
        
    return params

print("‚úÖ Training functions loaded")

In [None]:
# Initialize training
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
logger = TrainingLogger("logs_v5_gpu")

# Setup device
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')
logger.log(f"Using device: {device}")

# Initialize Environment
env = AzulEnv(num_players=2)
obs_flat = env.encode_observation(env.reset())
total_obs_size = obs_flat.shape[0]
in_channels = env.num_players * 2
spatial_size = in_channels * 5 * 5
factories_size = (env.N + 1) * 5
global_size = total_obs_size - spatial_size - factories_size
action_size = env.action_size

logger.log(f"Obs shape: Total={total_obs_size}, Global={global_size}")

# Initialize Model
model = AzulNet(
    in_channels=in_channels,
    global_size=global_size,
    action_size=action_size,
    factories_count=env.N
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Log model size
total_params = sum(p.numel() for p in model.parameters())
logger.log(f"Model parameters: {total_params:,}")

replay_buffer = []
start_cycle = 1

# Resume from checkpoint if requested
if RESUME:
    checkpoints = [f for f in os.listdir(CHECKPOINT_DIR) if f.startswith('model_cycle_') and f.endswith('.pt')]
    if checkpoints:
        cycles = [int(f.replace('model_cycle_', '').replace('.pt', '')) for f in checkpoints]
        last_cycle = max(cycles)
        ckpt_path = os.path.join(CHECKPOINT_DIR, f"model_cycle_{last_cycle}.pt")
        logger.log(f"Loading checkpoint: {ckpt_path}")
        checkpoint = torch.load(ckpt_path, map_location=device)
        model.load_state_dict(checkpoint['model_state'])
        optimizer.load_state_dict(checkpoint['optimizer_state'])
        start_cycle = last_cycle + 1
        logger.log(f"Resumed from cycle {last_cycle}")

logger.log(f"Starting from cycle {start_cycle}")

In [None]:
# Main Training Loop (GPU-Optimized)
for cycle in range(start_cycle, TOTAL_CYCLES + 1):
    cycle_start = time.time()
    params = get_curriculum_params(cycle)
    logger.log(f"\n{'='*60}")
    logger.log(f"CYCLE {cycle}/{TOTAL_CYCLES}")
    logger.log(f"Games: {params['n_games']}, Sims: {params['simulations']}, "
               f"Epochs: {params['epochs']}, Batch: {params['batch_size']}")
    logger.log(f"{'='*60}")
    
    # 1. Self-Play (CPU-bound, sequential on GPU)
    selfplay_start = time.time()
    logger.log(f"[Self-Play] Generating {params['n_games']} games...")
    model.eval()
    new_examples, mcts_stats = generate_self_play_games(
        verbose=False,
        n_games=params['n_games'],
        env=env,
        model=model,
        simulations=params['simulations'],
        cpuct=params['cpuct'],
        temperature_threshold=params['temp_threshold'],
        noise_alpha=params['noise_alpha'],
        noise_epsilon=params['noise_eps']
    )
    selfplay_time = time.time() - selfplay_start
    
    logger.log(f"[Self-Play] Time: {selfplay_time/60:.1f} min")
    logger.log(f"[Self-Play] MCTS stats: visits={mcts_stats['avg_visits']:.1f}, "
               f"entropy={mcts_stats['avg_entropy']:.2f}, "
               f"reuse={mcts_stats['avg_reuse_rate']:.1%}")
    
    # 2. Update Buffer
    if new_examples:
        replay_buffer.extend(new_examples)
        if len(replay_buffer) > MAX_DATASET_SIZE:
            replay_buffer = replay_buffer[-MAX_DATASET_SIZE:]
        logger.log(f"[Buffer] Size: {len(replay_buffer):,} examples")
    else:
        logger.log("[Buffer] WARNING: No new examples generated!")
    
    # 3. Training (GPU-bound, should be fast)
    train_start = time.time()
    logger.log(f"[Training] Starting {params['epochs']} epochs with batch_size={params['batch_size']}...")
    dataset = AzulDataset(replay_buffer, augment_factories=True)
    dataloader = torch.utils.data.DataLoader(
        dataset, 
        batch_size=params['batch_size'],  # GPU-optimized batch size
        shuffle=True,
        num_workers=2,  # Parallel data loading
        pin_memory=True if device.type == 'cuda' else False
    )
    
    for param_group in optimizer.param_groups:
        param_group['lr'] = params['lr']
    
    trainer = Trainer(model, optimizer, device, log_dir=f'logs_v5_gpu/cycle_{cycle}')
    history = trainer.fit(dataloader, epochs=params['epochs'])
    train_time = time.time() - train_start
    
    avg_loss = sum(history['train_loss']) / len(history['train_loss']) if history['train_loss'] else 0
    avg_policy = sum(history['train_loss_policy']) / len(history['train_loss_policy']) if history['train_loss_policy'] else 0
    avg_value = sum(history['train_loss_value']) / len(history['train_loss_value']) if history['train_loss_value'] else 0
    logger.log(f"[Training] Time: {train_time/60:.1f} min")
    logger.log(f"[Training] Loss: {avg_loss:.4f} (Policy: {avg_policy:.4f}, Value: {avg_value:.4f})")
    
    # 4. Save Checkpoint
    ckpt_path = os.path.join(CHECKPOINT_DIR, f"model_cycle_{cycle}.pt")
    torch.save({
        'cycle': cycle,
        'model_state': model.state_dict(),
        'optimizer_state': optimizer.state_dict(),
        'params': params,
        'buffer_size': len(replay_buffer)
    }, ckpt_path)
    logger.log(f"[Checkpoint] Saved: {ckpt_path}")
    
    # Save best model
    torch.save({'model_state': model.state_dict()}, 
               os.path.join(CHECKPOINT_DIR, "best.pt"))
    
    # Cycle summary
    cycle_time = time.time() - cycle_start
    logger.log(f"\n[Summary] Cycle {cycle} completed in {cycle_time/60:.1f} min")
    logger.log(f"[Summary] Self-Play: {selfplay_time/cycle_time*100:.1f}%, Training: {train_time/cycle_time*100:.1f}%")
    
    # Clear GPU cache
    if device.type == 'cuda':
        torch.cuda.empty_cache()

logger.log("\nüéâ Training Complete!")

## Performance Analysis

Run this cell to see GPU utilization during training.

In [None]:
# Check GPU memory usage
if torch.cuda.is_available():
    print(f"GPU Memory Allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")
    print(f"GPU Memory Reserved: {torch.cuda.memory_reserved()/1e9:.2f} GB")
    print(f"GPU Max Memory: {torch.cuda.max_memory_allocated()/1e9:.2f} GB")

## Download Checkpoints (Optional)

Run this cell to download the final checkpoint to your local machine.

In [None]:
from google.colab import files

# Download best model
files.download(os.path.join(CHECKPOINT_DIR, 'best.pt'))

# Or download specific cycle (change cycle number)
# files.download(os.path.join(CHECKPOINT_DIR, 'model_cycle_50.pt'))