# YOLOv11s Training - Simple Version

## üìù Overview

Simplified training notebook that:
- ‚úÖ Loads preprocessed data directly from `data/balanced_preprocessed/`
- ‚úÖ Trains YOLOv11s model with WandB tracking
- ‚úÖ Validates on test set
- ‚úÖ Exports model to backend

**Prerequisites:**
- Data already preprocessed and balanced (run `finetune_yolo_balanced.ipynb` sections 1-5 first)
- WandB account and API key

**Time to complete:** ~2-3 hours (training only)

---

## Section 1: Setup and Imports

In [None]:
# Set working directory to repository root
%cd /home/minhquana/workspace/project_DeepLearning/computer_vision/Abnormal-Prediction-In-Chest-X-Ray

In [None]:
# Import required libraries
import os
import shutil
from pathlib import Path
import wandb
from ultralytics import YOLO, settings
import torch

print("‚úì Imports successful")
print(f"  PyTorch version: {torch.__version__}")
print(f"  CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"  GPU: {torch.cuda.get_device_name(0)}")
    print(f"  GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## Section 2: Verify Preprocessed Data

Check that preprocessed data exists and includes augmented images.

**IMPORTANT:** This notebook expects augmented images to be merged into training set.
- Run `finetune_yolo_balanced.ipynb` sections 1-5 first
- Augmented images should be in `balanced_preprocessed/train/images/`

In [None]:
# Verify preprocessed data directory
preprocessed_dir = Path('data/balanced_preprocessed')
data_yaml = preprocessed_dir / 'data.yaml'

print("üîç Verifying Preprocessed Data")
print("=" * 80)

if not preprocessed_dir.exists():
    print("‚ùå ERROR: Preprocessed data not found!")
    print(f"   Expected location: {preprocessed_dir.absolute()}")
    print("\n‚ö†Ô∏è Please run finetune_yolo_balanced.ipynb sections 1-5 first to:")
    print("   1. Download dataset")
    print("   2. Create balanced dataset")
    print("   3. Preprocess images")
    raise FileNotFoundError(f"Preprocessed data not found at {preprocessed_dir}")

if not data_yaml.exists():
    print(f"‚ùå ERROR: data.yaml not found at {data_yaml}")
    raise FileNotFoundError(f"data.yaml not found")

print(f"‚úì Preprocessed data directory found: {preprocessed_dir}")
print(f"‚úì Data YAML found: {data_yaml}")

# Count images in each split
splits = ['train', 'valid', 'test']
split_counts = {}

for split in splits:
    images_dir = preprocessed_dir / split / 'images'
    if images_dir.exists():
        count = len(list(images_dir.glob('*.png'))) + len(list(images_dir.glob('*.jpg')))
        split_counts[split] = count
    else:
        split_counts[split] = 0

print(f"\nüìä Dataset Statistics:")
print(f"  Train:      {split_counts['train']:,} images")
print(f"  Validation: {split_counts['valid']:,} images")
print(f"  Test:       {split_counts['test']:,} images")
print(f"  Total:      {sum(split_counts.values()):,} images")

if split_counts['train'] == 0:
    print("\n‚ùå ERROR: No training images found!")
    raise ValueError("No training images found in preprocessed data")

print("\n‚úì Data verification complete - ready for training!")
print("=" * 80)

## Section 3: WandB Setup

Initialize Weights & Biases for experiment tracking.

In [None]:
# Login to WandB
wandb.login(key=os.getenv('WANDB_API_KEY'))
print("‚úì Logged into Weights & Biases successfully")

In [None]:
# Initialize WandB project
wandb.init(
    project="chest-xray-detection-balanced",
    name="yolov11s-no-augment",
    config={
        "model": "YOLOv11s",
        "dataset": "VinBigData Chest X-ray v3 (Balanced + Preprocessed + Augmented)",
        "epochs": 100,
        "batch_size": 16,
        "image_size": 1024,
        "patience": 10,
        "optimizer": "AdamW",
        "learning_rate": 0.001,
        "preprocessing": "histogram_eq + gaussian_blur + normalization",
        "augmentation": "pre-augmented only (YOLO augmentation disabled)",
        "training_strategy": "use pre-augmented data - no real-time augmentation"
    }
)

print("‚úì WandB initialized successfully")
print(f"  Project: chest-xray-detection-balanced")
print(f"  Run name: {wandb.run.name}")
print(f"  Run URL: {wandb.run.url}")
print(f"\n‚ö†Ô∏è  Note: Training with pre-augmented data only")
print(f"  ‚Üí All YOLO augmentations disabled")
print(f"  ‚Üí Using fixed augmented variations from preprocessing")

In [None]:
# Enable WandB integration in Ultralytics
settings.update({'wandb': True})

print("‚úì WandB integration enabled for Ultralytics YOLO")
print("\nüìä Training metrics will be automatically logged to WandB:")
print("   - Loss curves (box_loss, cls_loss, dfl_loss)")
print("   - mAP scores (mAP50, mAP50-95)")
print("   - Learning rate schedules")
print("   - Training/validation images with predictions")

## Section 4: Model Training

Train YOLOv11s on preprocessed balanced dataset.

In [None]:
# Training configuration - NO AUGMENTATION (use pre-augmented data only)
training_config = {
    'data': str(data_yaml),
    'epochs': 100,
    'batch': 16,
    'imgsz': 1024,
    'patience': 10,
    'save': True,
    'plots': True,
    'verbose': True,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'workers': 8,
    'cache': False,
    # Optimization parameters for better convergence
    'optimizer': 'AdamW',  # Changed from auto (SGD) to AdamW
    'lr0': 0.001,  # Initial learning rate
    'lrf': 0.0001,  # Final learning rate (lr0 * lrf)
    'momentum': 0.937,
    'weight_decay': 0.0005,
    'warmup_epochs': 3.0,
    'warmup_momentum': 0.8,
    'warmup_bias_lr': 0.1,
    'cos_lr': True,  # Use cosine learning rate scheduler
    # ===== DISABLE ALL AUGMENTATIONS =====
    # Use pre-augmented data from finetune_yolo_balanced.ipynb
    # 'augment': False,  # Disable all augmentations
    'hsv_h': 0.0,      # No hue shift
    'hsv_s': 0.0,      # No saturation change
    'hsv_v': 0.0,      # No value/brightness change
    'degrees': 0.0,    # No rotation
    'translate': 0.0,  # No translation
    'scale': 0.0,      # No scaling
    'shear': 0.0,      # No shearing
    'perspective': 0.0,  # No perspective transform
    'fliplr': 0.0,     # No horizontal flip
    'flipud': 0.0,     # No vertical flip
    'mosaic': 0.0,     # No mosaic augmentation
    'mixup': 0.0,      # No mixup
    'copy_paste': 0.0,  # No copy-paste
    'auto_augment': None,  # Disable auto augmentation
    'erasing': 0.0,    # No random erasing
}

print("üöÄ Starting YOLOv11s Training (NO AUGMENTATION MODE)")
print("=" * 80)
print("\n‚öôÔ∏è  Training Configuration:")
for key, value in training_config.items():
    print(f"  {key:20s}: {value}")
print("=" * 80)
print("\n‚ö†Ô∏è  IMPORTANT: All YOLO augmentations are DISABLED")
print("   ‚Üí Using ONLY pre-augmented data from balanced_preprocessed/")
print("   ‚Üí Ensure augmented images are merged into training set")
print("=" * 80)

In [None]:
# Load YOLOv11s model
print("\nüì¶ Loading YOLOv11s model...")
model = YOLO('yolo11s.pt')

print("‚úì Model loaded successfully")
print(f"  Model architecture: YOLOv11s")
print(f"  Parameters: ~{sum(p.numel() for p in model.model.parameters()) / 1e6:.1f}M")

print("\nüèãÔ∏è  Starting training...")
print("üìä Progress will be tracked in WandB dashboard")
print("-" * 80)

In [None]:
# Train the model
try:
    results = model.train(
        **training_config,
        project='chest-xray-detection-balanced',
        name='yolov11s-no-augment'
    )
    
    print("\n" + "=" * 80)
    print("‚úì Training completed successfully!")
    print("=" * 80)
    
    # Display results
    print("\nüìà Training Results:")
    if hasattr(results, 'results_dict'):
        print(f"  Best mAP50: {results.results_dict.get('metrics/mAP50(B)', 'N/A')}")
        print(f"  Best mAP50-95: {results.results_dict.get('metrics/mAP50-95(B)', 'N/A')}")
    
    # Save best model path
    best_model_path = Path(results.save_dir) / 'weights' / 'best.pt'
    print(f"\nüíæ Best model saved to: {best_model_path}")
    
    print(f"\nüí° Training Summary:")
    print(f"  ‚úì Used pre-augmented data only")
    print(f"  ‚úì No real-time augmentation applied")
    print(f"  ‚úì Preserved preprocessed image quality")
    
except Exception as e:
    print(f"\n‚ùå Training failed: {e}")
    raise

## Section 5: Model Validation

Validate trained model on test set.

In [None]:
# Validate on test set
print("üß™ Model Validation on Test Set")
print("=" * 80)

# Load best model
if 'best_model_path' in locals() and best_model_path.exists():
    print(f"üì¶ Loading best model: {best_model_path}")
    model = YOLO(str(best_model_path))
else:
    print("‚ö†Ô∏è Using last trained model")

print("\nüîç Running validation...")
metrics = model.val(data=str(data_yaml), split='test')

print("\nüìä Validation Results:")
print("=" * 80)
results_dict = metrics.results_dict
print(f"  mAP50:       {results_dict.get('metrics/mAP50(B)', 0):.4f}")
print(f"  mAP50-95:    {results_dict.get('metrics/mAP50-95(B)', 0):.4f}")
print(f"  Precision:   {results_dict.get('metrics/precision(B)', 0):.4f}")
print(f"  Recall:      {results_dict.get('metrics/recall(B)', 0):.4f}")
print("=" * 80)

## Section 6: Model Export

Export trained model to backend for production.

In [None]:
# Export to backend
backend_models_dir = Path('../backend/models')
backend_models_dir.mkdir(parents=True, exist_ok=True)

target_model_path = backend_models_dir / 'yolov11s_finetuned.pt'

print("üì¶ Exporting Model to Backend")
print("=" * 80)

if 'best_model_path' in locals() and best_model_path.exists():
    print(f"üìÇ Source: {best_model_path}")
    print(f"üìÇ Target: {target_model_path}")
    
    shutil.copy(best_model_path, target_model_path)
    
    if target_model_path.exists():
        size_mb = target_model_path.stat().st_size / (1024*1024)
        print(f"\n‚úì Model exported successfully!")
        print(f"  File size: {size_mb:.2f} MB")
        print(f"  Location: {target_model_path}")
        print(f"\nüéØ Model ready for production use!")
        print(f"  ‚úì Trained on pre-augmented data")
        print(f"  ‚úì No real-time augmentation used")
        print(f"  ‚úì Preserved preprocessed features")
    else:
        print("‚ùå Export failed")
else:
    print("‚ùå Best model not found - cannot export")

print("=" * 80)

In [None]:
# Close WandB run
wandb.finish()
print("‚úì WandB run finished")