# YOLO Training
- Support for YOLOv8, YOLOv9, YOLOv10, YOLO11, YOLO12


# 1. Set Directories

In [None]:
# Base directories
# Detect environment: Colab or local

import os
from pathlib import Path


IS_COLAB = 'COLAB_GPU' in os.environ or os.path.exists('/content')

USE_WANDB = True  # Set to False to disable W&B logging



if IS_COLAB:
    #Mount Google Drive if not already mounted
    from google.colab import drive
    drive.mount('/content/Drive', force_remount=True)
    # Running in Google Colab
    BASE_DIR = Path('/content/Drive/MyDrive/ksu_yolo_2025/computer_vision_yolo')
    
    # Configure W&B API key
    if USE_WANDB:
        # In Colab, get API key from secrets
        from google.colab import userdata
        wandb_api_key = userdata.get('wandb_api_key')
        os.environ['WANDB_API_KEY'] = wandb_api_key
        print('‚úì W&B API key loaded from Colab secrets')

    DATASET_BASE_DIR = Path('/computer_vision_yolo')

else:
    # Running locally
    BASE_DIR = Path.cwd().parent
    if USE_WANDB:
        print('‚úì Running locally - W&B will use existing login or prompt')
    
    DATASET_BASE_DIR = Path.cwd().parent


In [None]:
#  ! cd /content/Drive/MyDrive/ksu_yolo_2025 && git clone https://github.com/m3mahdy/computer_vision_yolo

In [None]:
# ! cd {BASE_DIR} && pip install -r requirements.txt --quiet

In [None]:
# download limited dataset
# !mkdir {DATASET_BASE_DIR}
# !cd {BASE_DIR}/dataset && cp 8_download_extract_other_datasets.py {DATASET_BASE_DIR} && cd {DATASET_BASE_DIR} && python 8_download_extract_other_datasets.py


## 2. Import Required Libraries

In [None]:
# Install required libraries (uncomment if running in Colab)
# !pip install -q ultralytics wandb pyyaml

import os
import sys
import gc
import yaml
import json
import torch
import shutil
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from datetime import datetime
from tqdm import tqdm
import pickle
import platform
import psutil

import wandb

# YOLO imports
from ultralytics import YOLO

# ReportLab imports for PDF generation
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors as rl_colors
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER, TA_LEFT
from PIL import Image as PILImage

warnings.filterwarnings('ignore')

# Configure matplotlib for notebook display
%matplotlib inline
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (15, 10)

# Check GPU availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'‚úì Libraries imported successfully')
print(f'‚úì Device: {device}')
if device == 'cuda':
    print(f'  GPU: {torch.cuda.get_device_name(0)}')
    print(f'  CUDA Version: {torch.version.cuda}')
    print(f'  Available Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB')


## 3. Configuration

In [None]:
# ============================================================================
# CONFIGURATION
# ============================================================================

# Base directories
# Detect environment: Colab or local

IS_COLAB = 'COLAB_GPU' in os.environ or os.path.exists('/content')

USE_WANDB = True  # Set to False to disable W&B logging

if IS_COLAB:
    #Mount Google Drive if not already mounted
    from google.colab import drive
    drive.mount('/content/Drive', force_remount=True)
    # Running in Google Colab
    BASE_DIR = Path('/content/Drive/MyDrive/ksu_yolo_2025/computer_vision_yolo')
    
    # Configure W&B API key
    if USE_WANDB:
        # In Colab, get API key from secrets
        from google.colab import userdata
        wandb_api_key = userdata.get('wandb_api_key')
        os.environ['WANDB_API_KEY'] = wandb_api_key
        print('‚úì W&B API key loaded from Colab secrets')

else:
    # Running locally
    BASE_DIR = Path.cwd().parent
    if USE_WANDB:
        print('‚úì Running locally - W&B will use existing login or prompt')
class DatasetSplit:
    """Constants for dataset split names"""
    TRAIN = "train"
    VAL = "val"
    TEST = "test"

class ModelConfig:
    """Default model training configuration constants"""
    # Image processing
    DEFAULT_IMAGE_SIZE = 640  # Standard YOLO input size
    
    # Training workers
    DEFAULT_WORKERS = 8  # Number of data loading workers
    
    # Early stopping and checkpointing
    DEFAULT_PATIENCE = 10  # Epochs to wait before early stopping
    DEFAULT_SAVE_PERIOD = 3  # Save checkpoint every N epochs
    
    # Augmentation timing
    CLOSE_MOSAIC_EPOCHS = 10  # Disable mosaic augmentation in last N epochs
    
    # Data loading and caching
    DEFAULT_CACHE = False  # Cache images for faster training (use True for small datasets)
    DEFAULT_VAL = True  # Run validation during training
    
    # Warmup configuration
    # MIN_WARMUP_EPOCHS = 0
    # MAX_WARMUP_EPOCHS = 3
    # MIN_WARMUP_MOMENTUM = 0.5
    # MAX_WARMUP_MOMENTUM = 0.95
    # MIN_WARMUP_BIAS_LR = 0.0

    # MAX_WARMUP_BIAS_LR = 0.1
    
    
    
    
# Model Selection - Choose one of the following:
MODEL_NAME = "yolov8m_finetuned_1"

#yolov10n is for testing purpose only
#Mahdy will work yolov8m


# Selected models, to choose from, based on the performance and size:
# YOLOv8:  'yolov8s', 'yolov8m'

# YOLOv10: 'yolov10s', 'yolov10m'

# YOLO12: 'yolo12s'

# Directory structure
MODELS_DIR = BASE_DIR / 'models' / MODEL_NAME
TMP_DIR = BASE_DIR / 'tmp' / MODEL_NAME

# Dataset Selection
# Option 1: Full dataset (~100k images) - for final optimization: "bdd100k_yolo"
# Option 2: Limited dataset (representative samples) - for quick tuning: "bdd100k_yolo_limited"
dataset_name = 'bdd100k_yolo_limited'


YOLO_DATASET_ROOT = DATASET_BASE_DIR / dataset_name

# data.yaml path
DATA_YAML_PATH = YOLO_DATASET_ROOT / 'data.yaml'

# Verify dataset exists
if not DATA_YAML_PATH.exists():
    raise FileNotFoundError(
        f"Dataset not found: {DATA_YAML_PATH}\n"
        f"Please prepare the dataset first using process_bdd100k_to_yolo_dataset.py"
    )

# Update data.yaml path field for Colab compatibility
with open(DATA_YAML_PATH, 'r') as yaml_file:
    data_config = yaml.safe_load(yaml_file)

# Validate required keys in data.yaml
required_yaml_keys = ['nc', 'names', 'path']
missing_keys = [key for key in required_yaml_keys if key not in data_config]
if missing_keys:
    raise ValueError(f"Missing required keys in data.yaml: {missing_keys}")

# Update the 'path' field to use BASE_DIR
data_config['path'] = str(YOLO_DATASET_ROOT)

# Create a temporary data.yaml with corrected paths
temp_data_yaml = TMP_DIR / 'data.yaml'
TMP_DIR.mkdir(parents=True, exist_ok=True)
with open(temp_data_yaml, 'w') as yaml_output_file:
    yaml.dump(data_config, yaml_output_file, default_flow_style=False, sort_keys=False)

# Use the temporary data.yaml for training
DATA_YAML_PATH = temp_data_yaml

# Training Configuration
EPOCHS_FINAL_TRAINING = 150  # Training epochs for final model = 150
BATCH_SIZE = 96  # Batch size for training
# for T4 GPU:
# 64 for 10n, 1 epoch 30 min
# 32 for 8m, 1 epoch 45 min

# for A100 GPU:
# 64 for 10m 1 epoch 11 min, 5 epochs completed in 0.797 hours.
# 96 for 8m , 1 epoch 10 min, 5 epochs completed in 0.866 hours.

# NOTE: Image size (imgsz) is loaded from best hyperparameters if USE_DEFAULT_CONFIG=False
# Defaults to 640 if using default configuration (USE_DEFAULT_CONFIG=True)

# Weights & Biases (optional)
USE_WANDB = True  # Set to True to enable W&B logging
WANDB_PROJECT_TRAINING = f"yolo-{YOLO_DATASET_ROOT.name}-training"

# ============================================================================
# CONFIGURATION MODE: DEFAULT vs TUNED HYPERPARAMETERS
# ============================================================================
# Set to True to use default YOLO configuration (no hyperparameter tuning)
# Set to False to load hyperparameters from a tuning run
# ============================================================================

USE_DEFAULT_CONFIG = False  # Set to True to skip tuning and use default YOLO config

# ============================================================================
# TUNING RUN CONFIGURATION - SPECIFY WHICH TUNING RUN TO USE
# ============================================================================
# Specify the tuning run name to load best hyperparameters from
# This should match the directory name in tune_train/tune/
# 
# Example: TUNING_RUN_NAME = "yolov10n_tune_20251125_143022"
# Leave as None to search for the latest tuning run for this model
# Note: Only used if USE_DEFAULT_CONFIG = False
# ============================================================================

TUNING_RUN_NAME = "yolov8m_finetuned_1_tune_20251127_230340"  # Set to specific tuning run name, or None to auto-detect latest

# ============================================================================
# TRAINING RUN CONFIGURATION - RESUME OR CREATE NEW
# ============================================================================
# To RESUME an existing training run: Set RESUME_TRAINING_RUN_NAME to the run directory name
# To START NEW training: Leave RESUME_TRAINING_RUN_NAME as None or empty string
# 
# Example to resume: RESUME_TRAINING_RUN_NAME = "yolov10n_train_20251125_150000"
# ============================================================================

RESUME_TRAINING_RUN_NAME = None  # Set to run name to resume, or None to create new run

# Find or verify tuning run (only if not using default config)
TUNE_TRAIN_BASE = BASE_DIR / 'tune_train'
TUNE_BASE_DIR = TUNE_TRAIN_BASE / 'tune'

if USE_DEFAULT_CONFIG:
    # Using default configuration - skip tuning run search
    print('\n‚öôÔ∏è  CONFIGURATION MODE: Using Default YOLO Configuration')
    print('   No hyperparameter tuning will be applied')
    TUNE_DIR = None
    TUNING_RUN_NAME = None
    best_hyperparams_path = None
else:
    # Using tuned hyperparameters - find or verify tuning run
    print('\n‚öôÔ∏è  CONFIGURATION MODE: Using Tuned Hyperparameters')
    
    if TUNING_RUN_NAME:
        # Use specified tuning run
        TUNE_DIR = TUNE_BASE_DIR / TUNING_RUN_NAME
        if not TUNE_DIR.exists():
            raise FileNotFoundError(
                f"Specified tuning run not found: {TUNE_DIR}\n"
                f"Available runs in {TUNE_BASE_DIR}:\n" +
                '\n'.join(f"  - {d.name}" for d in TUNE_BASE_DIR.glob(f'{MODEL_NAME}_tune_*') if d.is_dir())
            )
        print(f'   üìÇ Using specified tuning run: {TUNING_RUN_NAME}')
    else:
        # Auto-detect latest tuning run for this model
        tuning_runs = sorted(TUNE_BASE_DIR.glob(f'{MODEL_NAME}_tune_*'), key=lambda p: p.name, reverse=True)
        if not tuning_runs:
            raise FileNotFoundError(
                f"No tuning runs found for model {MODEL_NAME} in {TUNE_BASE_DIR}\n"
                f"Please run the tuning notebook first, specify TUNING_RUN_NAME, or set USE_DEFAULT_CONFIG=True"
            )
        TUNE_DIR = tuning_runs[0]
        TUNING_RUN_NAME = TUNE_DIR.name
        print(f'   üîç Auto-detected latest tuning run: {TUNING_RUN_NAME}')

    # Verify best hyperparameters exist
    best_hyperparams_path = TUNE_DIR / 'best_hyperparameters.json'
    if not best_hyperparams_path.exists():
        raise FileNotFoundError(
            f"Best hyperparameters not found in tuning run: {best_hyperparams_path}\n"
            f"Please ensure the tuning run completed successfully"
        )

    print(f'   ‚úì Found best hyperparameters: {best_hyperparams_path}')

# Configure training run name
if RESUME_TRAINING_RUN_NAME:
    # Resume existing training run
    RUN_NAME_TRAINING = RESUME_TRAINING_RUN_NAME
    print(f'\nüîÑ RESUME MODE: Will attempt to resume training run "{RESUME_TRAINING_RUN_NAME}"')
else:
    # Create new training run with timestamp
    RUN_TIMESTAMP = datetime.now().strftime('%Y%m%d_%H%M%S')
    RUN_NAME_TRAINING = f'{MODEL_NAME}_train_{RUN_TIMESTAMP}'
    print(f'\nüÜï NEW TRAINING MODE: Creating new run "{RUN_NAME_TRAINING}"')

# Create training directory
TRAIN_DIR = TUNE_TRAIN_BASE / 'training' / RUN_NAME_TRAINING
TRAIN_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)

# Read dataset configuration
NUM_CLASSES = data_config['nc']
CLASS_NAMES = {i: name for i, name in enumerate(data_config['names'])}
CLASS_NAME_TO_ID = {name: i for i, name in enumerate(data_config['names'])}

print('=' * 80)
print('CONFIGURATION SUMMARY')
print('=' * 80)
print(f'Environment: {"Google Colab" if "COLAB_GPU" in os.environ or os.path.exists("/content") else "Local"}')
print(f'Base Directory: {BASE_DIR}')
print(f'Model: {MODEL_NAME}')
print(f'Dataset: {YOLO_DATASET_ROOT.name}')
print(f'Data YAML: {DATA_YAML_PATH}')
print(f'  Dataset path in YAML: {data_config["path"]}')
print(f'Classes: {NUM_CLASSES}')
print(f'Class Names: {CLASS_NAMES}')
print(f'Device: {device}')
print(f'Epochs Final Training: {EPOCHS_FINAL_TRAINING}')
print(f'Batch Size: {BATCH_SIZE}')
print(f'Configuration Mode: {"Default (No Tuning)" if USE_DEFAULT_CONFIG else "Tuned Hyperparameters"}')
if not USE_DEFAULT_CONFIG:
    print(f'Tuning Run: {TUNING_RUN_NAME}')
print(f'Training Directory: {TRAIN_DIR}')
if USE_WANDB:
    print(f'W&B Logging: Enabled')
    print(f'  Training Project: {WANDB_PROJECT_TRAINING}')
else:
    print(f'W&B Logging: Disabled')
print('=' * 80)

## 4. Load Base YOLO Model

In [None]:
# Load YOLO model with automatic download
model_path = MODELS_DIR / f'{MODEL_NAME}.pt'

if not model_path.exists():
    print(f'Model not found at {model_path}')
    print(f'Downloading {MODEL_NAME} ...')
    
    try:
        # Download model - ensure .pt extension for ultralytics
        # Ultralytics expects model names with .pt extension for download
        if not MODEL_NAME.endswith('.pt'):
            model_name_for_download = MODEL_NAME + '.pt'
        else:
            model_name_for_download = MODEL_NAME
            
        print(f'  Requesting model: {model_name_for_download}')
        model = YOLO(model_name_for_download)
        
        # Create models directory
        MODELS_DIR.mkdir(parents=True, exist_ok=True)
        
        # Save model to our directory using export/save
        try:
            # Try to save using the model's save method
            if hasattr(model, 'save'):
                model.save(str(model_path))
                print(f'‚úì Model downloaded and saved to {model_path}')
                print(f'  Size: {model_path.stat().st_size / (1024*1024):.1f} MB')
            else:
                # Fallback: copy from cache
                cache_patterns = [
                    str(Path.home() / '.cache' / 'ultralytics' / '**' / f'{MODEL_NAME}.pt'),
                    str(Path.home() / '.config' / 'Ultralytics' / '**' / f'{MODEL_NAME}.pt'),
                ]
                
                model_found = False
                for pattern in cache_patterns:
                    cache_paths = glob.glob(pattern, recursive=True)
                    if cache_paths:
                        shutil.copy(cache_paths[0], model_path)
                        print(f'‚úì Model downloaded and saved to {model_path}')
                        print(f'  Size: {model_path.stat().st_size / (1024*1024):.1f} MB')
                        model_found = True
                        break
                
                if not model_found:
                    print(f'‚úì Model loaded from ultralytics cache')
                    print(f'  Note: Model is in cache, not copied to {model_path}')
                    print(f'  This is normal and the model will work correctly')
        except Exception as save_error:
            print(f'‚ö†Ô∏è  Could not save model to custom location: {save_error}')
            print(f'‚úì Model loaded successfully from ultralytics cache')
            
    except Exception as download_error:
        print(f'\n‚ùå Error downloading model: {download_error}')
        raise
else:
    model = YOLO(str(model_path))
    print(f'‚úì Model loaded from {model_path}')

# Get model information
model_info_dict = {}
model_info_result = model.info()
model_info_keys = ["layers", "params", "size(MB)", "FLOPs(G)"]

for info_key, info_value in zip(model_info_keys, model_info_result):
    model_info_dict[info_key] = info_value
    
model_params = model_info_dict.get("params", 0)
model_size_mb = model_info_dict.get("size(MB)", 0)
flops_gflops = model_info_dict.get("FLOPs(G)", 0)


print(f'\nüìä Model Information:')
print(f'  Model: {MODEL_NAME}')
print(f'  Classes in model: {len(model.names)}')
print(f'  Task: {model.task}')
print(f'  Parameters: {model_params / 1e6:.1f}M')
print(f'  Model Size: {model_size_mb:.1f} MB')
print(f'  FLOPs (640x640): {flops_gflops:.2f} GFLOPs')

## 6. Verify Dataset Structure

In [None]:
# ============================================================================
# VERIFY DATASET STRUCTURE
# ============================================================================

print('Verifying YOLO dataset structure...')
print(f'\nüìÅ Dataset Root: {YOLO_DATASET_ROOT}')

# Check all splits using constants
dataset_stats = {}
for split in [DatasetSplit.TRAIN, DatasetSplit.VAL, DatasetSplit.TEST]:
    images_dir = YOLO_DATASET_ROOT / 'images' / split
    labels_dir = YOLO_DATASET_ROOT / 'labels' / split
    
    if images_dir.exists() and labels_dir.exists():
        num_images = len(list(images_dir.glob('*.jpg'))) + len(list(images_dir.glob('*.png')))
        num_labels = len(list(labels_dir.glob('*.txt')))
        dataset_stats[split] = {'images': num_images, 'labels': num_labels}
        print(f'  ‚úì {split:5s}: {num_images:6d} images, {num_labels:6d} labels')
    else:
        print(f'  ‚ö†Ô∏è  {split:5s}: Directory not found')
        dataset_stats[split] = {'images': 0, 'labels': 0}

print(f'\nüìÑ Configuration: {DATA_YAML_PATH}')
print(f'  Classes: {NUM_CLASSES}')
print(f'  Names: {CLASS_NAMES}')

total_images = sum(stats['images'] for stats in dataset_stats.values())
print(f'\n‚úì Dataset verified: {total_images:,} total images')
print('‚úì Ready for training')

## 5. Load Best Hyperparameters from Tuning

In [None]:
# ============================================================================
# LOAD HYPERPARAMETERS (TUNED OR DEFAULT)
# ============================================================================

print('\n' + '=' * 80)
if USE_DEFAULT_CONFIG:
    print('USING DEFAULT YOLO CONFIGURATION')
    print('=' * 80)
    print('No hyperparameter tuning applied - using YOLO defaults')
    
    # Use empty dict for hyperparameters - YOLO will use its defaults
    best_params = {}
    
    print('\n‚úì Training will use default YOLO hyperparameters')
    print('   Default values will be applied by the YOLO model')
    
else:
    print('LOADING BEST HYPERPARAMETERS FROM TUNING')
    print('=' * 80)
    print(f'Tuning Run: {TUNING_RUN_NAME}')
    print(f'Hyperparameters Path: {best_hyperparams_path}')

    # Load best hyperparameters from JSON
    with open(best_hyperparams_path, 'r', encoding='utf-8') as f:
        best_params_file = json.load(f)

    # Extract only the actual hyperparameters (not metadata)
    # The file structure has metadata fields and a 'hyperparameters' field with the actual params
    if 'hyperparameters' in best_params_file:
        # New format: metadata + hyperparameters nested
        best_params = best_params_file['hyperparameters']
        print('\n‚úì Loaded hyperparameters from nested structure')
    else:
        # Old format: hyperparameters directly in root
        # Filter out metadata fields that aren't YOLO parameters
        metadata_keys = {'model', 'dataset_root', 'data_yaml_path', 'notes', 
                        'optimization_results', 'timestamp'}
        best_params = {k: v for k, v in best_params_file.items() if k not in metadata_keys}
        print('\n‚úì Loaded hyperparameters from flat structure (filtered metadata)')

    print('\n‚úì Best Hyperparameters Loaded:')
    for key, value in sorted(best_params.items()):
        if isinstance(value, (int, float)):
            if isinstance(value, float):
                print(f'  {key:20s}: {value:.6f}')
            else:
                print(f'  {key:20s}: {value}')
        else:
            print(f'  {key:20s}: {value}')

    # Load tuning metadata if available
    tuning_metadata_path = TUNE_DIR / 'optimization_metadata.json'
    if (not USE_DEFAULT_CONFIG and tuning_metadata_path.exists()):
        with open(tuning_metadata_path, 'r', encoding='utf-8') as f:
            tuning_metadata = json.load(f)
        
        print('\nüìä Tuning Run Summary:')
        print(f"  Best Trial: {tuning_metadata.get('best_trial', 'N/A')}")
        print(f"  Best mAP@0.5: {tuning_metadata.get('best_map50', 0):.4f}")
        print(f"  Total Trials: {tuning_metadata.get('total_trials', 'N/A')}")
        print(f"  Completed Trials: {tuning_metadata.get('completed_trials', 'N/A')}")
        
        if 'optimization_duration' in tuning_metadata:
            print(f"  Duration: {tuning_metadata['optimization_duration']}")

print('=' * 80)

## 7. Train The Model 

In [None]:
# ============================================================================
# TRAIN FINAL MODEL WITH OPTIMIZED HYPERPARAMETERS
# ============================================================================

print('\n' + '=' * 80)
if USE_DEFAULT_CONFIG:
    print('TRAINING FINAL MODEL WITH DEFAULT CONFIGURATION')
else:
    print('TRAINING FINAL MODEL WITH OPTIMIZED HYPERPARAMETERS')
print('=' * 80)

# Check if resuming from previous training
checkpoint_path = TRAIN_DIR / 'weights' / 'last.pt'
training_log_path = TRAIN_DIR / 'training_log.json'
is_resuming = checkpoint_path.exists()

if is_resuming:
    # Resume training
    print('\n' + '=' * 80)
    print('üîÑ RESUMING PREVIOUS TRAINING')
    print('=' * 80)
    print(f'Checkpoint: {checkpoint_path}')
    
    # Load training log if available
    if training_log_path.exists():
        with open(training_log_path, 'r', encoding='utf-8') as f:
            training_log = json.load(f)
        
        print(f'\nüìä Previous Training Summary:')
        print(f"  Started: {training_log.get('start_time', 'N/A')}")
        if 'last_epoch' in training_log:
            print(f"  Last Epoch: {training_log['last_epoch']}")
        if 'best_map50' in training_log:
            print(f"  Best mAP@0.5: {training_log['best_map50']:.4f}")
        if 'last_checkpoint' in training_log:
            print(f"  Last Checkpoint: {training_log['last_checkpoint']}")
    
    print(f'\n‚û°Ô∏è  Resuming training from checkpoint')
    print('=' * 80)
    
    # Load model from checkpoint
    print(f'\nüì¶ Loading model from checkpoint: {checkpoint_path}')
    final_model = YOLO(str(checkpoint_path))
    model_to_train = str(checkpoint_path)
    resume_training = True
    
else:
    # Start new training
    print(f'\nüì¶ Loading base model: {MODEL_NAME}')
    final_model = YOLO(str(model_path))
    model_to_train = str(model_path)
    resume_training = False
    
    # Initialize training log
    training_log = {
        'start_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'model': MODEL_NAME,
        'dataset': YOLO_DATASET_ROOT.name,
        'config_mode': 'default' if USE_DEFAULT_CONFIG else 'tuned',
        'tuning_run': TUNING_RUN_NAME if not USE_DEFAULT_CONFIG else None,
        'best_hyperparameters': best_params,
        'epochs': EPOCHS_FINAL_TRAINING,
        'batch_size': BATCH_SIZE,
        'image_size': best_params.get('imgsz', 640)
    }

# Prepare training parameters
# Note: Fixed parameters (not part of optimization) are always included
# Optimization parameters are added via **best_params (empty if using defaults)
# NOTE: 'imgsz' removed - will come from best_params if tuned, or YOLO default if not

final_training_params = {
    # ============================================================================
    # FIXED PARAMETERS - Always passed, not part of hyperparameter optimization
    # ============================================================================
    'data': str(DATA_YAML_PATH),              # Dataset configuration file
    'epochs': EPOCHS_FINAL_TRAINING,          # Number of training epochs
    'batch': BATCH_SIZE,                       # Batch size
    'device': device,                          # Training device (cuda/cpu)
    'project': str(TRAIN_DIR.parent),         # Project directory
    'name': TRAIN_DIR.name,                    # Run name
    'exist_ok': True,                          # Overwrite existing project
    'patience': ModelConfig.DEFAULT_PATIENCE,  # Early stopping patience
    'save_period': ModelConfig.DEFAULT_SAVE_PERIOD,  # Save checkpoint frequency
    'workers': ModelConfig.DEFAULT_WORKERS,    # Number of data loading workers
    'verbose': True,                           # Verbose output
    'seed': 42,                                # Random seed for reproducibility
    'close_mosaic': ModelConfig.CLOSE_MOSAIC_EPOCHS,  # Disable mosaic in final epochs
    'resume': resume_training,                 # Resume from checkpoint if exists
    'cache': ModelConfig.DEFAULT_CACHE,        # Cache images for faster training
    'val': ModelConfig.DEFAULT_VAL,            # Run validation during training
    
    # ============================================================================
    # OPTIMIZATION PARAMETERS - From tuning (if USE_DEFAULT_CONFIG=False)
    # ============================================================================
    # Parameters like: lr0, lrf, momentum, weight_decay, warmup_epochs, etc.
    **best_params  # Empty dict if USE_DEFAULT_CONFIG=True, tuned params otherwise
}

print(f'\nüöÄ {"Resuming" if resume_training else "Starting"} training...')
print(f'  Configuration: {"Default YOLO" if USE_DEFAULT_CONFIG else "Tuned Hyperparameters"}')
print(f'  Epochs: {final_training_params["epochs"]}')
print(f'  Batch Size: {final_training_params["batch"]}')
print(f'  Dataset: {DATA_YAML_PATH}')
print(f'  Device: {device}')
print(f'  Resume: {resume_training}')

if best_params:
    print('\nüìä Applied Hyperparameters:')
    for key, value in sorted(best_params.items()):
        if isinstance(value, float):
            print(f'  {key:20s}: {value:.6f}')
        else:
            print(f'  {key:20s}: {value}')
else:
    print('\nüìä Using YOLO default hyperparameters (no custom values)')

print('\nThis may take a while. Training progress will be displayed below.')
print('=' * 80)

# Initialize W&B for final training
if USE_WANDB:
    try:
        wandb_config = {
            'model': MODEL_NAME,
            'dataset': YOLO_DATASET_ROOT.name,
            'phase': 'final_training',
            'config_mode': 'default' if USE_DEFAULT_CONFIG else 'tuned',
            'tuning_run': TUNING_RUN_NAME if not USE_DEFAULT_CONFIG else None,
            'epochs': final_training_params['epochs'],
            'batch_size': final_training_params['batch'],
            'resume': resume_training,
            **best_params
        }
        
        wandb_training_run = wandb.init(
            project=WANDB_PROJECT_TRAINING,
            name=RUN_NAME_TRAINING,
            id=training_log.get('wandb_run_id') if is_resuming else None,
            resume='allow' if is_resuming else None,
            config=wandb_config,
            group='final-training',
            tags=['final', 'optimized' if not USE_DEFAULT_CONFIG else 'default', MODEL_NAME, YOLO_DATASET_ROOT.name]
        )
        
        # Save W&B run ID for future resume
        if not is_resuming:
            training_log['wandb_run_id'] = wandb_training_run.id
            with open(training_log_path, 'w', encoding='utf-8') as f:
                json.dump(training_log, f, indent=2)
        
        print(f'‚úì W&B initialized: {WANDB_PROJECT_TRAINING}/{RUN_NAME_TRAINING}')
    except Exception as wandb_error:
        print(f'‚ö†Ô∏è  Could not initialize W&B: {wandb_error}')
        wandb_training_run = None
else:
    wandb_training_run = None

# Train model
start_time = datetime.now()
try:
    final_results = final_model.train(**final_training_params)
    
    # Update training log with completion
    training_log['end_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    training_log['status'] = 'completed'
    training_log['duration'] = str(datetime.now() - start_time)
    
    # Save final metrics
    if hasattr(final_results, 'results_dict'):
        training_log['final_metrics'] = final_results.results_dict
    
    # Save updated training log
    with open(training_log_path, 'w', encoding='utf-8') as f:
        json.dump(training_log, f, indent=2)
    
    print('\n‚úì Training completed successfully!')
    
except KeyboardInterrupt:
    print('\n‚ö†Ô∏è  Training interrupted by user')
    print(f'üíæ Progress saved to: {TRAIN_DIR}')
    print(f'   - Last checkpoint: {checkpoint_path}')
    print(f'   - Training log: {training_log_path}')
    print(f'\nüîÑ To resume: Simply re-run this notebook')
    
    # Update training log
    training_log['last_interrupt'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    training_log['status'] = 'interrupted'
    training_log['end_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    with open(training_log_path, 'w', encoding='utf-8') as f:
        json.dump(training_log, f, indent=2)
    raise
    
except Exception as e:
    print(f'\n‚ùå Training failed with error: {e}')
    training_log['status'] = 'failed'
    training_log['error'] = str(e)
    training_log['end_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    with open(training_log_path, 'w', encoding='utf-8') as f:
        json.dump(training_log, f, indent=2)
    raise
    
finally:
    if USE_WANDB and wandb_training_run is not None:
        wandb_training_run.finish()
        print('‚úì W&B run finished')

end_time = datetime.now()
duration = end_time - start_time

print('\n' + '=' * 80)
print('TRAINING SUMMARY')
print('=' * 80)

# Get final validation metrics
print('\nüìä Running final validation...')
final_val_results = final_model.val(
    data=str(DATA_YAML_PATH),
    project=str(TRAIN_DIR),
    name='final_val',
)

final_metrics = {
    'map50': float(final_val_results.box.map50),
    'map50_95': float(final_val_results.box.map),
    'precision': float(final_val_results.box.mp),
    'recall': float(final_val_results.box.mr),
}

print('\nüìä Final Model Performance:')
print(f"  mAP@0.5: {final_metrics['map50']:.4f}")
print(f"  mAP@0.5:0.95: {final_metrics['map50_95']:.4f}")
print(f"  Precision: {final_metrics['precision']:.4f}")
print(f"  Recall: {final_metrics['recall']:.4f}")

# Update training log with final metrics
training_log['final_metrics'] = final_metrics
training_log['best_model_path'] = str(TRAIN_DIR / 'weights' / 'best.pt')
training_log['last_model_path'] = str(TRAIN_DIR / 'weights' / 'last.pt')

# Save final training log
with open(training_log_path, 'w', encoding='utf-8') as f:
    json.dump(training_log, f, indent=2)

print(f'\nüíæ Training log saved: {training_log_path}')

# Compare with tuning results if available
if not USE_DEFAULT_CONFIG and tuning_metadata_path.exists():
    tuning_best_map = tuning_metadata.get('best_map50', 0)
    improvement = final_metrics['map50'] - tuning_best_map
    print('\nüìà Improvement vs Best Tuning Trial:')
    print(f"  Best Tuning mAP@0.5: {tuning_best_map:.4f}")
    print(f"  Final Model mAP@0.5: {final_metrics['map50']:.4f}")

    print(f"  Improvement: {improvement:+.4f} ({improvement/tuning_best_map*100:+.2f}%)")
    print('=' * 80)

print(f'Last Weights: {TRAIN_DIR / "weights" / "last.pt"}')

print('=' * 80)
print(f'Best Weights: {TRAIN_DIR / "weights" / "best.pt"}')

print(f'Training Directory: {TRAIN_DIR}')

print(f'Start Time: {start_time.strftime("%Y-%m-%d %H:%M:%S")}')
print(f'Configuration: {"Default YOLO" if USE_DEFAULT_CONFIG else f"Tuned ({TUNING_RUN_NAME})"}')

print(f'End Time: {end_time.strftime("%Y-%m-%d %H:%M:%S")}')
print(f'Duration: {duration}')

## 8. Save Final Model and Metadata

In [None]:
# ============================================================================
# SAVE FINAL OPTIMIZED MODEL
# ============================================================================

print('\n' + '=' * 80)
print('SAVING FINAL OPTIMIZED MODEL')
print('=' * 80)

date_stamp = datetime.now().strftime('%Y%m%d')
finetuned_model_name = f'{MODEL_NAME}_finetuned_{date_stamp}'

# Create model directory if it doesn't exist
model_save_dir = BASE_DIR / 'models' / finetuned_model_name
model_save_dir.mkdir(parents=True, exist_ok=True)

# Define paths for saving
final_model_path = model_save_dir / f'{finetuned_model_name}.pt'
metadata_path = model_save_dir / f'{finetuned_model_name}_metadata.json'

# Copy best weights from training directory
# Note: TRAIN_DIR already includes RUN_NAME_TRAINING
weights_path = TRAIN_DIR / 'weights' / 'best.pt'

if weights_path.exists():
    shutil.copy(weights_path, final_model_path)
    print(f'\n‚úì Final model saved to: {final_model_path}')
    print(f'  Size: {final_model_path.stat().st_size / (1024*1024):.1f} MB')
else:
    print(f'\n‚ö†Ô∏è  Best weights not found at: {weights_path}')
    print('  Attempting to save current model state...')
    try:
        # Save current model state if weights not found
        final_model.save(str(final_model_path))
        print(f'‚úì Model saved to: {final_model_path}')
    except Exception as save_error:
        print(f'‚ö†Ô∏è  Error saving model: {save_error}')

# Prepare optimization metadata
optimization_meta = {
    'tuning_run': TUNING_RUN_NAME,
    'tuning_run_path': str(TUNE_DIR),
}

# Add tuning details if available
if not USE_DEFAULT_CONFIG and tuning_metadata_path.exists():
    optimization_meta.update({
        'n_trials': tuning_metadata.get('total_trials', 'N/A'),
        'completed_trials': tuning_metadata.get('completed_trials', 'N/A'),
        'best_trial': tuning_metadata.get('best_trial', 'N/A'),
        'best_trial_map50': tuning_metadata.get('best_map50', 0),
        'optimization_duration': tuning_metadata.get('optimization_duration', 'N/A'),
    })

# Calculate improvement if tuning metadata available
improvement_value = 0
if not USE_DEFAULT_CONFIG and tuning_metadata_path.exists():
    tuning_best_map = tuning_metadata.get('best_map50', 0)
    if tuning_best_map > 0:
        improvement_value = float(final_metrics['map50'] - tuning_best_map)

# Save model metadata
metadata = {
    'model_name': MODEL_NAME,
    'finetuned_name': finetuned_model_name,
    'model_path': str(final_model_path),
    'dataset': str(YOLO_DATASET_ROOT),
    'training_date': datetime.now().isoformat(),
    'training_run': RUN_NAME_TRAINING,
    'training_run_path': str(TRAIN_DIR),
    'optimization': optimization_meta,
    'best_hyperparameters': best_params,
    'training_params': {
        'epochs': EPOCHS_FINAL_TRAINING,
        'batch_size': BATCH_SIZE,
        'image_size': best_params.get('imgsz', 640),
        'patience': ModelConfig.DEFAULT_PATIENCE,
        'save_period': ModelConfig.DEFAULT_SAVE_PERIOD,
    },
    'final_metrics': final_metrics,
    'improvement_vs_tuning': improvement_value,
}

with open(metadata_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, indent=2)

print(f'‚úì Model metadata saved to: {metadata_path}')
print('\nüì¶ Final Model Package:')
print(f'  Model: {final_model_path}')
print(f'  Metadata: {metadata_path}')
print(f'  Training Log: {training_log_path}')
print('=' * 80)

## 9. Test Final Model

In [None]:
# RUN FINAL VALIDATION ON TEST SET (ENHANCED)
# ============================================================================
print('\n' + '=' * 80)
print('RUNNING FINAL VALIDATION ON TEST SET')
print('=' * 80)

results_summary = []
IOU_THRESHOLDS = 0.5  # Could expand to [0.5, 0.55, 0.6] if needed

# Verify that the final model exists before validation
if not final_model_path.exists():
    print(f"‚ö†Ô∏è  Warning: Final model not found at {final_model_path}")
    print(f"   Skipping test validation. Please complete training first.")
else:
    # Add YOLO test scripts path safely
    scrpt_dir = BASE_DIR / "yolo_test"
    if str(scrpt_dir) not in sys.path:
        sys.path.append(str(scrpt_dir))

    try:
        from run_yolo_detailed_testing_report import run_validation_pipeline

        # Important: 
        # - Datasets are in DATASET_BASE_DIR (can be different in Colab)
        # - Models are ALWAYS in BASE_DIR/models/
        # 
        # Validation script uses base_dir for both:
        #   - Dataset path: base_dir / dataset_name / data.yaml
        #   - Model path: base_dir / models / model_name / model_name.pt
        #
        # Solution: Copy dataset to BASE_DIR temporarily, or use symlink
        
        # For Colab: Need to ensure model is accessible
        if IS_COLAB:
            # Check if dataset exists in BASE_DIR
            base_dir_dataset = BASE_DIR / dataset_name
            if not (base_dir_dataset / 'data.yaml').exists() and YOLO_DATASET_ROOT.exists():
                print(f"\nüìÇ Dataset location mismatch detected")
                print(f"   Dataset is in: {YOLO_DATASET_ROOT}")
                print(f"   Validation expects: {base_dir_dataset}")
                print(f"   Creating symbolic link...")
                try:
                    import os
                    if not base_dir_dataset.exists():
                        os.symlink(str(YOLO_DATASET_ROOT), str(base_dir_dataset))
                        print(f"   ‚úì Symbolic link created")
                except Exception as symlink_error:
                    print(f"   ‚ö†Ô∏è  Could not create symlink: {symlink_error}")
                    print(f"   Validation may fail if dataset path is incorrect")
            
            validation_base_dir = BASE_DIR
        else:
            validation_base_dir = BASE_DIR
        
        print(f"\nüîç Validation Configuration:")
        print(f"   Base Dir: {validation_base_dir}")
        print(f"   Dataset: {dataset_name}")
        print(f"   Model: {finetuned_model_name}")
        print(f"   Expected dataset path: {validation_base_dir / dataset_name / 'data.yaml'}")
        print(f"   Expected model path: {validation_base_dir / 'models' / finetuned_model_name / f'{finetuned_model_name}.pt'}")
        print(f"   Actual model path: {final_model_path}")
        
        # Verify paths
        expected_model_path = validation_base_dir / 'models' / finetuned_model_name / f'{finetuned_model_name}.pt'
        if final_model_path != expected_model_path and not expected_model_path.exists():
            print(f"\nüì¶ Copying model to expected location...")
            expected_model_path.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy(final_model_path, expected_model_path)
            print(f"   ‚úì Model copied to {expected_model_path}")
        
        result = run_validation_pipeline(
            model_name=finetuned_model_name,
            dataset_name=dataset_name,
            split="test",
            iou_threshold=IOU_THRESHOLDS,
            base_dir=validation_base_dir,
            use_wandb=True,
            save_reports=True,
            batch_size=BATCH_SIZE,
            include_training_exposure_analysis=True
        )
        
        overall = result["metrics"]["overall"]
        yolo_overall = result["metrics"]["yolo_metrics"]
        
        results_summary.append({
            "model_name": finetuned_model_name,
            "dataset": dataset_name,
            "split": "test",
            "iou": IOU_THRESHOLDS,
            "precision_confusion": overall["precision"],
            "recall_confusion": overall["recall"],
            "f1_confusion": overall["f1"],
            "precision_yolo": yolo_overall["precision"],
            "recall_yolo": yolo_overall["recall"],
            "map50": yolo_overall["map50"],
            "map50_95": yolo_overall["map50_95"],
            "params_m": result["model_info"]["params"] / 1e6,
            "size_mb": result["model_info"]["size(MB)"],
            "fps": result["metrics"]["fps"],
            "status": "ok",
            "run_dir": str(result["run_dir"]),
            "hyperparameters": final_training_params,  # traceable
        })
        
    except Exception as e:
        print(f"‚ö†Ô∏è Model {finetuned_model_name} failed during validation: {e}")
        import traceback
        traceback.print_exc()
        results_summary.append({
            "model_name": finetuned_model_name,
            "dataset": dataset_name,
            "split": "test",
            "iou": IOU_THRESHOLDS,
            "status": "error",
            "error_message": str(e)
        })

# Convert to DataFrame
if results_summary:
    results_df = pd.DataFrame(results_summary)
    print('\nüìä Final Validation Results:')
    display(results_df)
else:
    print('\n‚ö†Ô∏è  No validation results - model training not completed yet')


## 10. Generate Training Report (PDF)

In [None]:
# GENERATE COMPREHENSIVE TRAINING PDF REPORT
# ============================================================================
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors as rl_colors
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER, TA_LEFT
import platform
import psutil

print('\n' + '=' * 80)
print('GENERATING COMPREHENSIVE TRAINING PDF REPORT')
print('=' * 80)

pdf_training_report_path = TRAIN_DIR / f'{MODEL_NAME}_training_report.pdf'
doc = SimpleDocTemplate(str(pdf_training_report_path), pagesize=A4,
                       rightMargin=30, leftMargin=30,
                       topMargin=30, bottomMargin=30)
story = []
styles = getSampleStyleSheet()

# Custom styles
title_style = ParagraphStyle('Title', parent=styles['Heading1'], fontSize=24,
                             textColor=rl_colors.HexColor('#2c3e50'), alignment=TA_CENTER, spaceAfter=20)
heading_style = ParagraphStyle('Heading', parent=styles['Heading2'], fontSize=16,
                               textColor=rl_colors.HexColor('#34495e'), spaceAfter=12, spaceBefore=20)
normal_style = ParagraphStyle('Normal', parent=styles['Normal'], fontSize=10)

# --- Title ---
story.append(Paragraph(f'{MODEL_NAME} Final Training Report', title_style))
story.append(Spacer(1, 12))

# --- System Info ---
story.append(Paragraph('System Information', heading_style))
sys_info_data = [
    ['OS', platform.system() + ' ' + platform.release()],
    ['Python Version', platform.python_version()],
    ['PyTorch Version', torch.__version__],
    ['CUDA Available', str(torch.cuda.is_available())],
    ['Device', device],
    ['RAM (GB)', f"{psutil.virtual_memory().total/1e9:.2f}"],
]
sys_table = Table(sys_info_data, colWidths=[2.5*inch, 3.5*inch])
sys_table.setStyle(TableStyle([
    ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#95a5a6')),
    ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
    ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
    ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
]))
story.append(sys_table)
story.append(Spacer(1, 12))

# --- Dataset Info ---
story.append(Paragraph('Dataset Information', heading_style))
# Wrap class names text for better readability
class_names_text = ', '.join(str(name) for name in CLASS_NAMES.values())
class_names_wrapped = Paragraph(class_names_text, normal_style)

dataset_info_data = [
    ['Property', 'Value'],
    ['Dataset', YOLO_DATASET_ROOT.name],
    ['Number of Classes', str(NUM_CLASSES)],
    ['Train Images', str(dataset_stats.get('train', {}).get('images', 'N/A'))],
    ['Val Images', str(dataset_stats.get('val', {}).get('images', 'N/A'))],
    ['Test Images', str(dataset_stats.get('test', {}).get('images', 'N/A'))],
    ['Data YAML', str(DATA_YAML_PATH.name)],
]
dataset_table = Table(dataset_info_data, colWidths=[2*inch, 4*inch])
dataset_table.setStyle(TableStyle([
    ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#16a085')),
    ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
    ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
]))
story.append(dataset_table)
story.append(Spacer(1, 6))
# Add class names separately with wrapping
story.append(Paragraph('<b>Classes:</b>', normal_style))
story.append(class_names_wrapped)
story.append(Spacer(1, 12))

# --- Optimization Summary ---
story.append(Paragraph('Optimization Summary', heading_style))
opt_summary_data = [
    ['Metric', 'Value'],
    ['Tuning Run', TUNING_RUN_NAME],
    ['Total Trials', str(tuning_metadata.get('total_trials', 'N/A')) if not USE_DEFAULT_CONFIG and  tuning_metadata_path.exists() else 'N/A'],
    ['Completed Trials', str(tuning_metadata.get('completed_trials', 'N/A')) if not USE_DEFAULT_CONFIG and  tuning_metadata_path.exists() else 'N/A'],
    ['Best Trial Number', str(tuning_metadata.get('best_trial', 'N/A')) if not USE_DEFAULT_CONFIG and  tuning_metadata_path.exists() else 'N/A'],
    ['Best Trial mAP@0.5', f"{tuning_metadata.get('best_map50', 0):.4f}" if not USE_DEFAULT_CONFIG and tuning_metadata_path.exists() else 'N/A'],
    ['Final Training Epochs', str(EPOCHS_FINAL_TRAINING)],
]
opt_table = Table(opt_summary_data, colWidths=[3*inch, 3*inch])
opt_table.setStyle(TableStyle([
    ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#f39c12')),
    ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
    ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
]))
story.append(opt_table)
story.append(Spacer(1, 12))

# --- Optimized Hyperparameters ---
story.append(PageBreak())
story.append(Paragraph('Optimized Hyperparameters Used', heading_style))
hyperparam_data = [['Parameter', 'Value']]
for key, value in best_params.items():
    hyperparam_data.append([key, f"{value:.6f}" if isinstance(value, float) else str(value)])
hyperparam_table = Table(hyperparam_data, colWidths=[3*inch, 3*inch])
hyperparam_table.setStyle(TableStyle([
    ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#3498db')),
    ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
    ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
]))
story.append(hyperparam_table)
story.append(Spacer(1, 12))

# --- Training Process Details ---
story.append(PageBreak())
story.append(Paragraph('Training Process Analysis', heading_style))

# Try to load training results CSV for detailed epoch-by-epoch analysis
# YOLO saves results.csv directly in the training run directory
results_csv = TRAIN_DIR / 'results.csv'
if results_csv.exists():
    try:
        import pandas as pd
        import matplotlib.pyplot as plt
        import matplotlib
        matplotlib.use('Agg')
        
        # Load results
        training_results = pd.read_csv(results_csv)
        training_results.columns = training_results.columns.str.strip()
        
        story.append(Paragraph('Epoch-by-Epoch Training Metrics', styles['Heading3']))
        story.append(Spacer(1, 6))
        
        # Create comprehensive training curves
        fig, axes = plt.subplots(3, 2, figsize=(12, 14))
        fig.suptitle('Training Progress Over Epochs', fontsize=16, fontweight='bold')
        
        # 1. Loss Curves (Train/Box/Cls/DFL)
        ax = axes[0, 0]
        if 'train/box_loss' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['train/box_loss'], 
                   label='Box Loss', color='#e74c3c', linewidth=2)
        if 'train/cls_loss' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['train/cls_loss'], 
                   label='Class Loss', color='#3498db', linewidth=2)
        if 'train/dfl_loss' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['train/dfl_loss'], 
                   label='DFL Loss', color='#f39c12', linewidth=2)
        ax.set_xlabel('Epoch', fontsize=10)
        ax.set_ylabel('Loss', fontsize=10)
        ax.set_title('Training Loss Components', fontsize=12, fontweight='bold')
        ax.legend(loc='best', fontsize=9)
        ax.grid(True, alpha=0.3)
        
        # 2. Validation Loss Curves
        ax = axes[0, 1]
        if 'val/box_loss' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['val/box_loss'], 
                   label='Box Loss', color='#e74c3c', linewidth=2, linestyle='--')
        if 'val/cls_loss' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['val/cls_loss'], 
                   label='Class Loss', color='#3498db', linewidth=2, linestyle='--')
        if 'val/dfl_loss' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['val/dfl_loss'], 
                   label='DFL Loss', color='#f39c12', linewidth=2, linestyle='--')
        ax.set_xlabel('Epoch', fontsize=10)
        ax.set_ylabel('Loss', fontsize=10)
        ax.set_title('Validation Loss Components', fontsize=12, fontweight='bold')
        ax.legend(loc='best', fontsize=9)
        ax.grid(True, alpha=0.3)
        
        # 3. mAP Metrics Over Epochs
        ax = axes[1, 0]
        if 'metrics/mAP50(B)' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['metrics/mAP50(B)'], 
                   label='mAP@0.5', color='#27ae60', linewidth=2.5, marker='o', markersize=4)
        if 'metrics/mAP50-95(B)' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['metrics/mAP50-95(B)'], 
                   label='mAP@0.5:0.95', color='#16a085', linewidth=2.5, marker='s', markersize=4)
        ax.set_xlabel('Epoch', fontsize=10)
        ax.set_ylabel('mAP', fontsize=10)
        ax.set_title('mAP Progression', fontsize=12, fontweight='bold')
        ax.legend(loc='best', fontsize=9)
        ax.grid(True, alpha=0.3)
        ax.set_ylim(0, 1)
        
        # 4. Precision and Recall
        ax = axes[1, 1]
        if 'metrics/precision(B)' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['metrics/precision(B)'], 
                   label='Precision', color='#9b59b6', linewidth=2.5, marker='^', markersize=4)
        if 'metrics/recall(B)' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['metrics/recall(B)'], 
                   label='Recall', color='#e67e22', linewidth=2.5, marker='v', markersize=4)
        ax.set_xlabel('Epoch', fontsize=10)
        ax.set_ylabel('Score', fontsize=10)
        ax.set_title('Precision & Recall Progression', fontsize=12, fontweight='bold')
        ax.legend(loc='best', fontsize=9)
        ax.grid(True, alpha=0.3)
        ax.set_ylim(0, 1)
        
        # 5. Learning Rate Schedule
        ax = axes[2, 0]
        if 'lr/pg0' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['lr/pg0'], 
                   label='LR Group 0', color='#34495e', linewidth=2)
        if 'lr/pg1' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['lr/pg1'], 
                   label='LR Group 1', color='#7f8c8d', linewidth=2)
        if 'lr/pg2' in training_results.columns:
            ax.plot(training_results['epoch'], training_results['lr/pg2'], 
                   label='LR Group 2', color='#95a5a6', linewidth=2)
        ax.set_xlabel('Epoch', fontsize=10)
        ax.set_ylabel('Learning Rate', fontsize=10)
        ax.set_title('Learning Rate Schedule', fontsize=12, fontweight='bold')
        ax.legend(loc='best', fontsize=9)
        ax.grid(True, alpha=0.3)
        
        # 6. Combined Loss (Train vs Val)
        ax = axes[2, 1]
        # Calculate total train loss if components available
        train_loss_cols = [col for col in training_results.columns if 'train/' in col and 'loss' in col]
        val_loss_cols = [col for col in training_results.columns if 'val/' in col and 'loss' in col]
        
        if train_loss_cols:
            train_total = training_results[train_loss_cols].sum(axis=1)
            ax.plot(training_results['epoch'], train_total, 
                   label='Total Train Loss', color='#c0392b', linewidth=2.5)
        if val_loss_cols:
            val_total = training_results[val_loss_cols].sum(axis=1)
            ax.plot(training_results['epoch'], val_total, 
                   label='Total Val Loss', color='#2980b9', linewidth=2.5, linestyle='--')
        ax.set_xlabel('Epoch', fontsize=10)
        ax.set_ylabel('Total Loss', fontsize=10)
        ax.set_title('Total Loss: Train vs Validation', fontsize=12, fontweight='bold')
        ax.legend(loc='best', fontsize=9)
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        # Save training curves
        training_curves_img = TRAIN_DIR / 'report_training_curves.png'
        plt.savefig(training_curves_img, dpi=150, bbox_inches='tight')
        plt.close()
        
        # Add to PDF
        story.append(Image(str(training_curves_img), width=6.5*inch, height=7.5*inch))
        story.append(Spacer(1, 12))
        
        # Epoch-by-Epoch Summary Table (First 10, Middle 5, Last 10)
        story.append(PageBreak())
        story.append(Paragraph('Detailed Epoch Metrics', styles['Heading3']))
        story.append(Spacer(1, 6))
        
        # Select representative epochs
        total_epochs = len(training_results)
        if total_epochs <= 25:
            selected_epochs = training_results
        else:
            # First 10, middle 5, last 10
            first_10 = training_results.head(10)
            middle_start = total_epochs // 2 - 2
            middle_5 = training_results.iloc[middle_start:middle_start+5]
            last_10 = training_results.tail(10)
            selected_epochs = pd.concat([first_10, middle_5, last_10])
        
        # Build table with key metrics
        epoch_table_data = [['Epoch', 'Train Loss', 'Val Loss', 'mAP@0.5', 'mAP@0.5:0.95', 'Precision', 'Recall']]
        
        for _, row in selected_epochs.iterrows():
            epoch_num = int(row['epoch']) if 'epoch' in row else '?'
            
            # Calculate total losses
            train_loss = sum([row.get(col, 0) for col in train_loss_cols]) if train_loss_cols else 'N/A'
            val_loss = sum([row.get(col, 0) for col in val_loss_cols]) if val_loss_cols else 'N/A'
            
            map50 = f"{row.get('metrics/mAP50(B)', 0):.4f}" if 'metrics/mAP50(B)' in row else 'N/A'
            map50_95 = f"{row.get('metrics/mAP50-95(B)', 0):.4f}" if 'metrics/mAP50-95(B)' in row else 'N/A'
            precision = f"{row.get('metrics/precision(B)', 0):.4f}" if 'metrics/precision(B)' in row else 'N/A'
            recall = f"{row.get('metrics/recall(B)', 0):.4f}" if 'metrics/recall(B)' in row else 'N/A'
            
            epoch_table_data.append([
                str(epoch_num),
                f"{train_loss:.4f}" if isinstance(train_loss, (int, float)) else train_loss,
                f"{val_loss:.4f}" if isinstance(val_loss, (int, float)) else val_loss,
                map50,
                map50_95,
                precision,
                recall
            ])
        
        epoch_table = Table(epoch_table_data, colWidths=[0.6*inch, 1*inch, 1*inch, 0.9*inch, 1.1*inch, 0.9*inch, 0.9*inch])
        epoch_table.setStyle(TableStyle([
            ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#8e44ad')),
            ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
            ('FONTSIZE', (0,0), (-1,-1), 8),
            ('GRID', (0,0), (-1,-1), 0.5, rl_colors.black),
            ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
            ('ALIGN', (0,0), (-1,-1), 'CENTER'),
        ]))
        story.append(epoch_table)
        story.append(Spacer(1, 12))
        
        # Training Summary Statistics
        story.append(Paragraph('Training Statistics Summary', styles['Heading3']))
        story.append(Spacer(1, 6))
        
        stats_data = [['Metric', 'Initial', 'Final', 'Best', 'Change']]
        
        # mAP@0.5
        if 'metrics/mAP50(B)' in training_results.columns:
            map50_col = training_results['metrics/mAP50(B)']
            stats_data.append([
                'mAP@0.5',
                f"{map50_col.iloc[0]:.4f}",
                f"{map50_col.iloc[-1]:.4f}",
                f"{map50_col.max():.4f}",
                f"+{map50_col.iloc[-1] - map50_col.iloc[0]:.4f}"
            ])
        
        # mAP@0.5:0.95
        if 'metrics/mAP50-95(B)' in training_results.columns:
            map50_95_col = training_results['metrics/mAP50-95(B)']
            stats_data.append([
                'mAP@0.5:0.95',
                f"{map50_95_col.iloc[0]:.4f}",
                f"{map50_95_col.iloc[-1]:.4f}",
                f"{map50_95_col.max():.4f}",
                f"+{map50_95_col.iloc[-1] - map50_95_col.iloc[0]:.4f}"
            ])
        
        # Precision
        if 'metrics/precision(B)' in training_results.columns:
            prec_col = training_results['metrics/precision(B)']
            stats_data.append([
                'Precision',
                f"{prec_col.iloc[0]:.4f}",
                f"{prec_col.iloc[-1]:.4f}",
                f"{prec_col.max():.4f}",
                f"+{prec_col.iloc[-1] - prec_col.iloc[0]:.4f}"
            ])
        
        # Recall
        if 'metrics/recall(B)' in training_results.columns:
            recall_col = training_results['metrics/recall(B)']
            stats_data.append([
                'Recall',
                f"{recall_col.iloc[0]:.4f}",
                f"{recall_col.iloc[-1]:.4f}",
                f"{recall_col.max():.4f}",
                f"+{recall_col.iloc[-1] - recall_col.iloc[0]:.4f}"
            ])
        
        stats_table = Table(stats_data, colWidths=[1.5*inch, 1*inch, 1*inch, 1*inch, 1*inch])
        stats_table.setStyle(TableStyle([
            ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#2ecc71')),
            ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
            ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
            ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
            ('ALIGN', (1,1), (-1,-1), 'CENTER'),
        ]))
        story.append(stats_table)
        story.append(Spacer(1, 12))
        
    except Exception as e:
        story.append(Paragraph(f'Could not load detailed training results: {str(e)}', normal_style))
        story.append(Spacer(1, 12))
else:
    story.append(Paragraph('Training results file (results.csv) not found. Train the model to generate detailed metrics.', normal_style))
    story.append(Spacer(1, 12))

# --- Final Model Performance ---
if 'final_metrics' in globals():
    story.append(PageBreak())
    story.append(Paragraph('Final Model Performance', heading_style))
    
    perf_data = [
        ['Metric', 'Value'],
        ['mAP@0.5', f"{final_metrics['map50']:.4f}"],
        ['mAP@0.5:0.95', f"{final_metrics['map50_95']:.4f}"],
        ['Precision', f"{final_metrics['precision']:.4f}"],
        ['Recall', f"{final_metrics['recall']:.4f}"],
    ]
    perf_table = Table(perf_data, colWidths=[3*inch, 3*inch])
    perf_table.setStyle(TableStyle([
        ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#27ae60')),
        ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
        ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
    ]))
    story.append(perf_table)
    story.append(Spacer(1, 12))

# --- Test Set Validation Results ---
if 'result' in globals() and 'metrics' in result:
    story.append(PageBreak())
    story.append(Paragraph('Test Set Validation Results', heading_style))
    story.append(Spacer(1, 6))
    
    # Test metrics summary
    test_metrics = result['metrics']
    test_overall = test_metrics['overall']
    test_yolo = test_metrics['yolo_metrics']
    test_model_info = result['model_info']
    
    # Model Architecture and Performance Summary
    story.append(Paragraph('Model Architecture & Performance', styles['Heading3']))
    model_arch_data = [
        ['Metric', 'Value'],
        ['Model Name', finetuned_model_name],
        ['Parameters (M)', f"{test_model_info.get('params', 0) / 1e6:.2f}"],
        ['Model Size (MB)', f"{test_model_info.get('size(MB)', 0):.2f}"],
        ['FLOPs (G)', f"{test_model_info.get('FLOPs(G)', 0):.2f}"],
        ['Layers', str(test_model_info.get('layers', 'N/A'))],
        ['Inference Speed (FPS)', f"{test_metrics['fps']:.2f}"],
        ['IoU Threshold', f"{IOU_THRESHOLDS:.2f}"],
    ]
    model_arch_table = Table(model_arch_data, colWidths=[2.5*inch, 3.5*inch])
    model_arch_table.setStyle(TableStyle([
        ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#34495e')),
        ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
        ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
        ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
        ('ALIGN', (1,1), (-1,-1), 'CENTER'),
    ]))
    story.append(model_arch_table)
    story.append(Spacer(1, 12))
    
    # Overall Performance Metrics
    story.append(Paragraph('Overall Performance Metrics on Test Set', styles['Heading3']))
    test_perf_data = [
        ['Metric', 'Confusion Matrix', 'YOLO Validation'],
        ['Precision', f"{test_overall['precision']:.4f}", f"{test_yolo['precision']:.4f}"],
        ['Recall', f"{test_overall['recall']:.4f}", f"{test_yolo['recall']:.4f}"],
        ['F1-Score', f"{test_overall['f1']:.4f}", 'N/A'],
        ['mAP@0.5 (Overall)', 'N/A', f"{test_yolo['map50']:.4f}"],
        ['mAP@0.5:0.95 (Overall)', 'N/A', f"{test_yolo['map50_95']:.4f}"],
    ]
    test_perf_table = Table(test_perf_data, colWidths=[2*inch, 2*inch, 2*inch])
    test_perf_table.setStyle(TableStyle([
        ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#e74c3c')),
        ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
        ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
        ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
        ('ALIGN', (1,0), (-1,-1), 'CENTER'),
    ]))
    story.append(test_perf_table)
    story.append(Spacer(1, 12))
    
    # Per-Class mAP@0.5 and Performance
    if 'df_metrics' in result and not result['df_metrics'].empty:
        story.append(PageBreak())
        story.append(Paragraph('Per-Class Performance Metrics', styles['Heading3']))
        story.append(Spacer(1, 6))
        
        df_metrics = result['df_metrics']
        
        # Per-class table with all metrics
        per_class_data = [['Class', 'Precision', 'Recall', 'F1-Score', 'mAP@0.5', 'TP', 'FP', 'FN']]
        for _, row in df_metrics.iterrows():
            per_class_data.append([
                str(row['Class']),
                f"{row['Precision']:.4f}",
                f"{row['Recall']:.4f}",
                f"{row['F1-Score']:.4f}",
                f"{row['mAP@0.5']:.4f}",
                str(int(row['TP'])),
                str(int(row['FP'])),
                str(int(row['FN']))
            ])
        
        per_class_table = Table(per_class_data, colWidths=[1.2*inch, 0.8*inch, 0.7*inch, 0.8*inch, 0.8*inch, 0.5*inch, 0.5*inch, 0.5*inch])
        per_class_table.setStyle(TableStyle([
            ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#9b59b6')),
            ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
            ('FONTSIZE', (0,0), (-1,-1), 8),
            ('GRID', (0,0), (-1,-1), 0.5, rl_colors.black),
            ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
            ('ALIGN', (1,0), (-1,-1), 'CENTER'),
        ]))
        story.append(per_class_table)
        story.append(Spacer(1, 12))
        
        # mAP@0.5 by Class visualization
        map50_by_class_img = result['figures'].get('map50_by_class')
        if map50_by_class_img and Path(map50_by_class_img).exists():
            try:
                story.append(Paragraph('mAP@0.5 Distribution by Class', styles['Heading4']))
                story.append(Spacer(1, 4))
                story.append(Image(str(map50_by_class_img), width=6.5*inch, height=4.5*inch))
                story.append(Spacer(1, 12))
            except Exception as img_error:
                story.append(Paragraph(f'Could not load mAP by class chart: {str(img_error)}', normal_style))
    
    # IoU Information
    story.append(PageBreak())
    story.append(Paragraph('Intersection over Union (IoU) Analysis', styles['Heading3']))
    story.append(Spacer(1, 6))
    
    iou_info_text = f"""
    <b>IoU Threshold Used:</b> {IOU_THRESHOLDS:.2f}<br/>
    <br/>
    IoU (Intersection over Union) measures the overlap between predicted and ground truth bounding boxes.
    A prediction is considered correct (True Positive) when IoU ‚â• {IOU_THRESHOLDS:.2f}.<br/>
    <br/>
    <b>Per-Class IoU Performance:</b><br/>
    The confusion matrix and per-class metrics above show detection accuracy at IoU={IOU_THRESHOLDS:.2f} threshold.
    Each class's True Positives (TP) represent detections with IoU ‚â• {IOU_THRESHOLDS:.2f}.
    """
    story.append(Paragraph(iou_info_text, normal_style))
    story.append(Spacer(1, 12))
    
    # Confusion Matrix
    story.append(PageBreak())
    story.append(Paragraph('Confusion Matrix (Test Set)', styles['Heading3']))
    story.append(Spacer(1, 6))
    
    confusion_matrix_img = result['figures'].get('confusion_matrix')
    if confusion_matrix_img and Path(confusion_matrix_img).exists():
        try:
            with PILImage.open(confusion_matrix_img) as img:
                img_width, img_height = img.size
                aspect_ratio = img_height / img_width
                pdf_width = 6*inch
                pdf_height = pdf_width * aspect_ratio
                if pdf_height > 6*inch:
                    pdf_height = 6*inch
                    pdf_width = pdf_height / aspect_ratio
                story.append(Image(str(confusion_matrix_img), width=pdf_width, height=pdf_height))
                story.append(Spacer(1, 12))
        except Exception as img_error:
            story.append(Paragraph(f'Could not load confusion matrix: {str(img_error)}', normal_style))
    else:
        story.append(Paragraph('Confusion matrix image not available.', normal_style))
    story.append(Spacer(1, 12))
    
    # Test Performance Curves - Only add section if curves exist
    pr_curve_img = result['figures'].get('pr_curve')
    f1_curve_img = result['figures'].get('f1_curve')
    overall_metrics_img = result['figures'].get('overall_metrics')
    
    has_curves = (
        (pr_curve_img and Path(pr_curve_img).exists()) or
        (f1_curve_img and Path(f1_curve_img).exists()) or
        (overall_metrics_img and Path(overall_metrics_img).exists())
    )
    
    if has_curves:
        story.append(PageBreak())
        story.append(Paragraph('Test Set Performance Curves', styles['Heading3']))
        story.append(Spacer(1, 6))
        
        # PR Curve
        if pr_curve_img and Path(pr_curve_img).exists():
            try:
                story.append(Paragraph('Precision-Recall Curve', styles['Heading4']))
                story.append(Image(str(pr_curve_img), width=6*inch, height=4*inch))
                story.append(Spacer(1, 12))
            except Exception as img_error:
                story.append(Paragraph(f'Could not load PR curve: {str(img_error)}', normal_style))
        
        # F1 Curve
        if f1_curve_img and Path(f1_curve_img).exists():
            try:
                story.append(Paragraph('F1-Score Curve', styles['Heading4']))
                story.append(Image(str(f1_curve_img), width=6*inch, height=4*inch))
                story.append(Spacer(1, 12))
            except Exception as img_error:
                story.append(Paragraph(f'Could not load F1 curve: {str(img_error)}', normal_style))
        
        # Overall Metrics
        if overall_metrics_img and Path(overall_metrics_img).exists():
            try:
                story.append(Paragraph('Overall Metrics Visualization', styles['Heading4']))
                story.append(Image(str(overall_metrics_img), width=6.5*inch, height=5*inch))
                story.append(Spacer(1, 12))
            except Exception as img_error:
                story.append(Paragraph(f'Could not load overall metrics: {str(img_error)}', normal_style))
    
    # Sample Comparison Images
    if 'comparison_data' in result and result['comparison_data']:
        story.append(PageBreak())
        story.append(Paragraph('Sample Predictions: Ground Truth vs Model Output', heading_style))
        story.append(Spacer(1, 6))
        
        # Add up to 6 comparison images
        for idx, comp in enumerate(result['comparison_data'][:6], 1):
            comp_img_path = comp.get('comparison_image_path')
            if comp_img_path and Path(comp_img_path).exists():
                try:
                    # Add attributes info
                    attributes = comp.get('attributes', {})
                    attr_text = f"Sample {idx} - Weather: {attributes.get('weather', 'unknown')}, Scene: {attributes.get('scene', 'unknown')}, Time: {attributes.get('timeofday', 'unknown')}"
                    story.append(Paragraph(attr_text, normal_style))
                    story.append(Spacer(1, 4))
                    
                    # Add comparison image
                    with PILImage.open(comp_img_path) as img:
                        img_width, img_height = img.size
                        aspect_ratio = img_height / img_width
                        pdf_width = 6.5*inch
                        pdf_height = pdf_width * aspect_ratio
                        if pdf_height > 4*inch:
                            pdf_height = 4*inch
                            pdf_width = pdf_height / aspect_ratio
                        story.append(Image(str(comp_img_path), width=pdf_width, height=pdf_height))
                    
                    # Add object count info
                    gt_count = comp.get('gt_count', 0)
                    pred_count = comp.get('pred_count', 0)
                    count_text = f"Ground Truth: {gt_count} objects | Predictions: {pred_count} objects"
                    story.append(Paragraph(count_text, ParagraphStyle('Small', parent=normal_style, fontSize=8, textColor=rl_colors.grey)))
                    story.append(Spacer(1, 15))
                    
                    # Page break after every 2 comparisons
                    if idx % 2 == 0 and idx < len(result['comparison_data'][:6]):
                        story.append(PageBreak())
                        
                except Exception as img_error:
                    story.append(Paragraph(f'Could not load comparison {idx}: {str(img_error)}', normal_style))
                    story.append(Spacer(1, 12))

elif 'results_summary' in globals() and len(results_summary) > 0 and results_summary[0].get('status') == 'ok':
    # Fallback: Show basic info from results_summary
    story.append(PageBreak())
    story.append(Paragraph('Test Set Validation Results', heading_style))
    
    res = results_summary[0]
    fallback_data = [
        ['Metric', 'Value'],
        ['Model', res.get('model_name', 'N/A')],
        ['Precision (YOLO)', f"{res.get('precision_yolo', 0):.4f}"],
        ['Recall (YOLO)', f"{res.get('recall_yolo', 0):.4f}"],
        ['mAP@0.5', f"{res.get('map50', 0):.4f}"],
        ['mAP@0.5:0.95', f"{res.get('map50_95', 0):.4f}"],
        ['Parameters (M)', f"{res.get('params_m', 0):.2f}"],
        ['Size (MB)', f"{res.get('size_mb', 0):.2f}"],
        ['FPS', f"{res.get('fps', 0):.2f}"],
    ]
    fallback_table = Table(fallback_data, colWidths=[3*inch, 3*inch])
    fallback_table.setStyle(TableStyle([
        ('BACKGROUND', (0,0), (-1,0), rl_colors.HexColor('#95a5a6')),
        ('TEXTCOLOR', (0,0), (-1,0), rl_colors.whitesmoke),
        ('GRID', (0,0), (-1,-1), 1, rl_colors.black),
    ]))
    story.append(fallback_table)
    story.append(Spacer(1, 12))
    
    # Try to load images from run_dir if available
    if 'run_dir' in res:
        run_dir = Path(res['run_dir'])
        
        # Try confusion matrix
        confusion_img = run_dir / 'confusion_matrix.png'
        if confusion_img.exists():
            try:
                story.append(PageBreak())
                story.append(Paragraph('Confusion Matrix', styles['Heading3']))
                story.append(Image(str(confusion_img), width=6*inch, height=5*inch))
                story.append(Spacer(1, 12))
            except:
                pass
        
        # Try comparison images
        comparisons_dir = run_dir / 'sample_comparisons'
        if comparisons_dir.exists():
            comparison_imgs = sorted(comparisons_dir.glob('comparison_*.png'))[:4]
            if comparison_imgs:
                story.append(PageBreak())
                story.append(Paragraph('Sample Predictions', styles['Heading3']))
                for comp_img in comparison_imgs:
                    try:
                        story.append(Image(str(comp_img), width=6.5*inch, height=3.5*inch))
                        story.append(Spacer(1, 10))
                    except:
                        pass

# --- Footer ---
story.append(Spacer(1, 20))
story.append(Paragraph('Generated by YOLO Training Notebook', ParagraphStyle('Footer', parent=styles['Normal'], alignment=TA_CENTER, textColor=rl_colors.grey)))
story.append(Paragraph('BDD100K Dataset - Computer Vision Project', ParagraphStyle('Footer2', parent=styles['Normal'], alignment=TA_CENTER, textColor=rl_colors.grey)))

# Build PDF
try:
    doc.build(story)
    print(f'\n‚úì Comprehensive Training PDF generated: {pdf_training_report_path}')
except Exception as e:
    print(f'\n‚ùå Error generating PDF: {e}')
    import traceback
    traceback.print_exc()


## 11. Final Summary

In [None]:
# FINAL SUMMARY
# ============================================================================

print('\n\n')
print('=' * 80)
print('FINAL TRAINING COMPLETE!')
print('=' * 80)

print(f'\nüìä Project: {MODEL_NAME} on {YOLO_DATASET_ROOT.name}')
print(f'üìÖ Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

# Tuning Summary
print(f'\nüî¨ Tuning Run Used:')
print(f'  Run Name: {TUNING_RUN_NAME}')
print(f'  Run Path: {TUNE_DIR}')

if not USE_DEFAULT_CONFIG and tuning_metadata_path.exists():
    print(f'  Total Trials: {tuning_metadata.get("total_trials", "N/A")}')
    print(f'  Completed Trials: {tuning_metadata.get("completed_trials", "N/A")}')
    print(f'  Best Trial: {tuning_metadata.get("best_trial", "N/A")}')
    print(f'  Best Trial mAP@0.5: {tuning_metadata.get("best_map50", 0):.4f}')
    if 'optimization_duration' in tuning_metadata:
        print(f'  Tuning Duration: {tuning_metadata["optimization_duration"]}')

# Training Summary
print(f'\nüéØ Training Run:')
print(f'  Run Name: {RUN_NAME_TRAINING}')
print(f'  Run Path: {TRAIN_DIR}')
print(f'  Epochs: {EPOCHS_FINAL_TRAINING}')
print(f'  Batch Size: {BATCH_SIZE}')

if 'final_metrics' in globals():
    print(f'\nüéØ Final Model Performance:')
    print(f'  mAP@0.5: {final_metrics["map50"]:.4f}')
    print(f'  mAP@0.5:0.95: {final_metrics["map50_95"]:.4f}')
    print(f'  Precision: {final_metrics["precision"]:.4f}')
    print(f'  Recall: {final_metrics["recall"]:.4f}')
    
    # Show improvement if available
    if not USE_DEFAULT_CONFIG and tuning_metadata_path.exists():
        tuning_best_map = tuning_metadata.get('best_map50', 0)
        if tuning_best_map > 0:
            improvement = final_metrics['map50'] - tuning_best_map
            print(f'\nüìà Improvement vs Tuning:')
            print(f'  Tuning Best: {tuning_best_map:.4f}')
            print(f'  Training Final: {final_metrics["map50"]:.4f}')
            print(f'  Improvement: {improvement:+.4f} ({improvement/tuning_best_map*100:+.2f}%)')

print(f'\nüìÅ Generated Files:')
if not USE_DEFAULT_CONFIG:
    print(f'\n  üìä Tuning Results (in {TUNE_DIR.name}):')
    print(f'    - best_hyperparameters.json')
    print(f'    - best_hparams.yaml')
    print(f'    - checkpoint_log.json')
    print(f'    - optuna_study.pkl')

print(f'\n  üéØ Training Results (in {TRAIN_DIR.name}):')
print(f'    - training_log.json')
print(f'    - weights/best.pt')
print(f'    - weights/last.pt')
print(f'    - results.csv')
print(f'  üìÑ Training PDF Report:')
print(f'    - {MODEL_NAME}_training_report.pdf')

if 'final_model_path' in globals():
    print(f'\n  üéØ Final Model Package:')
    print(f'    - {final_model_path.name}')
    print(f'    - {metadata_path.name}')
    print(f'    Location: {model_save_dir}')

print(f'\nüìÇ All results saved to:')
print(f'  Tuning: {TUNE_DIR}')
print(f'  Training: {TRAIN_DIR}')
if 'model_save_dir' in globals():
    print(f'  Final Model: {model_save_dir}')

print(f'\nüöÄ Next Steps:')
print(f'  1. Review training PDF report: {TRAIN_DIR / f"{MODEL_NAME}_training_report.pdf"}')
print(f'  2. Review training plots and metrics in: {TRAIN_DIR}')
if 'final_model_path' in globals():
    print(f'  3. Use final model for inference: {final_model_path}')
    print(f'  4. Evaluate on test set using yolo_test scripts')
else:
    print(f'  3. Complete training to generate final model')
print(f'  5. Consider fine-tuning with different datasets or model sizes')

print('\nüìù To Resume Training:')
print(f'  Set RESUME_TRAINING_RUN_NAME = "{RUN_NAME_TRAINING}"')
print(f'  Then re-run this notebook')

print('\n' + '=' * 80)
print('SUCCESS! ‚úì')
print('=' * 80)