In [1]:
import os
import numpy as np
import pandas as pd
import shutil
import yaml
from PIL import Image
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
from tqdm import tqdm
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, precision_recall_curve, average_precision_score
import ultralytics
print(f"Ultralytics version: {ultralytics.__version__}")

Ultralytics version: 8.3.130


In [2]:
print("CUDA_VISIBLE_DEVICES:", os.environ.get("CUDA_VISIBLE_DEVICES", "Not set"))
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")

# Check GPU access
if torch.cuda.is_available() and torch.cuda.device_count() > 0:
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

CUDA_VISIBLE_DEVICES: GPU-65d5f9c5-4882-dda2-e605-d1f9d08ac724
PyTorch version: 2.7.0+cu128
CUDA available: True
CUDA device count: 1
GPU 0: NVIDIA RTX 4000 Ada Generation
Using device: cuda


In [3]:
root_path = "/data/mhedas/common/challenge_dataset/"

In [4]:
# Step 2: Load and preprocess the data
train_df = pd.read_csv(os.path.join(root_path, "train.csv"))
img_dims_df = pd.read_csv(os.path.join(root_path, "img_size.csv"))
train_df = train_df.merge(img_dims_df, on='image_id', how='left')

print(f"Loaded {len(train_df)} annotations")
print(f"Number of unique images: {train_df['image_id'].nunique()}")

# Print class distribution
class_counts = train_df['class_id'].value_counts().sort_index()
print("\nClass distribution:")
for class_id, count in class_counts.items():
    print(f"Class {class_id}: {count} annotations")

Loaded 45925 annotations
Number of unique images: 8573

Class distribution:
Class 0: 5481 annotations
Class 1: 255 annotations
Class 2: 851 annotations
Class 3: 4046 annotations
Class 4: 519 annotations
Class 5: 904 annotations
Class 6: 1097 annotations
Class 7: 2188 annotations
Class 8: 2324 annotations
Class 9: 1945 annotations
Class 10: 2190 annotations
Class 11: 4308 annotations
Class 12: 195 annotations
Class 13: 4097 annotations
Class 14: 15525 annotations


In [5]:
# Step 3: Create a mapping of original image dimensions from img_size.csv
def load_image_dimensions(csv_path):
    """Load image dimensions from CSV file"""
    img_dimensions = {}
    df = pd.read_csv(csv_path)
    
    for _, row in df.iterrows():
        # Store dimensions as (height, width)
        img_dimensions[row['image_id']] = (int(row['dim0']), int(row['dim1']))
    
    return img_dimensions

original_dimensions = load_image_dimensions(os.path.join(root_path, "img_size.csv"))
print(f"Loaded dimensions for {len(original_dimensions)} images")

Loaded dimensions for 15000 images


In [6]:
# Define the PNG dimensions - all PNGs are 1024x1024
PNG_HEIGHT, PNG_WIDTH = 1024, 1024

In [7]:
# Step 4: Function to scale bounding box coordinates
def scale_bbox(bbox, original_dims, new_dims):
    """
    Scale bounding box coordinates from original dimensions to new dimensions
    
    Args:
        bbox (list/array): Bounding box coordinates [x_min, y_min, x_max, y_max]
        original_dims (tuple): Original image dimensions (height, width)
        new_dims (tuple): New image dimensions (height, width)
        
    Returns:
        list: Scaled bounding box coordinates [x_min, y_min, x_max, y_max]
    """
    orig_height, orig_width = original_dims
    new_height, new_width = new_dims
    
    # Scale factors
    width_scale = new_width / orig_width
    height_scale = new_height / orig_height
    
    # Scale coordinates
    x_min = bbox[0] * width_scale
    y_min = bbox[1] * height_scale
    x_max = bbox[2] * width_scale
    y_max = bbox[3] * height_scale
    
    return [x_min, y_min, x_max, y_max]

In [8]:
# Step 5: Create directories for YOLOv8 dataset format
os.makedirs('yolov8_dataset/images/train', exist_ok=True)
os.makedirs('yolov8_dataset/images/val', exist_ok=True)
os.makedirs('yolov8_dataset/labels/train', exist_ok=True)
os.makedirs('yolov8_dataset/labels/val', exist_ok=True)


In [9]:
# Step 6: Split data into training and validation sets with stratification
print("\nPerforming stratified split to maintain class distribution...")

# Get unique image IDs for each class
class_image_ids = {}
for class_id in range(15):  # 0-14 classes
    class_image_ids[class_id] = set(train_df[train_df['class_id'] == class_id]['image_id'].unique())

# Count how many images have each class
for class_id, img_ids in class_image_ids.items():
    print(f"Class {class_id}: {len(img_ids)} unique images")

# Create a stratified split based on the presence of rarer classes
all_image_ids = train_df['image_id'].unique()
rare_classes = [0, 2, 5, 6, 12, 13]  # Classes with fewer instances based on your counts
images_with_rare_classes = set()
for class_id in rare_classes:
    images_with_rare_classes.update(class_image_ids[class_id])

# Split both rare and common images with the same ratio
rare_images = list(images_with_rare_classes)
common_images = list(set(all_image_ids) - images_with_rare_classes)

# Split rare and common images separately with the same ratio
rare_train, rare_val = train_test_split(rare_images, test_size=0.2, random_state=42)
common_train, common_val = train_test_split(common_images, test_size=0.2, random_state=42)

# Combine the splits
train_images = rare_train + common_train
val_images = rare_val + common_val

print(f"Training images: {len(train_images)}")
print(f"Validation images: {len(val_images)}")

# Check class distribution in splits
train_df_subset = train_df[train_df['image_id'].isin(train_images)]
val_df_subset = train_df[train_df['image_id'].isin(val_images)]

train_class_counts = train_df_subset['class_id'].value_counts().sort_index()
val_class_counts = val_df_subset['class_id'].value_counts().sort_index()

print("\nTraining set class distribution:")
for class_id, count in train_class_counts.items():
    print(f"Class {class_id}: {count} annotations ({count/sum(train_class_counts)*100:.1f}%)")

print("\nValidation set class distribution:")
for class_id, count in val_class_counts.items():
    print(f"Class {class_id}: {count} annotations ({count/sum(val_class_counts)*100:.1f}%)")


Performing stratified split to maintain class distribution...
Class 0: 2365 unique images
Class 1: 167 unique images
Class 2: 385 unique images
Class 3: 1746 unique images
Class 4: 325 unique images
Class 5: 341 unique images
Class 6: 532 unique images
Class 7: 1132 unique images
Class 8: 705 unique images
Class 9: 965 unique images
Class 10: 909 unique images
Class 11: 1689 unique images
Class 12: 79 unique images
Class 13: 1388 unique images
Class 14: 5175 unique images
Training images: 6857
Validation images: 1716

Training set class distribution:
Class 0: 4361 annotations (11.8%)
Class 1: 215 annotations (0.6%)
Class 2: 655 annotations (1.8%)
Class 3: 3251 annotations (8.8%)
Class 4: 417 annotations (1.1%)
Class 5: 743 annotations (2.0%)
Class 6: 884 annotations (2.4%)
Class 7: 1772 annotations (4.8%)
Class 8: 1954 annotations (5.3%)
Class 9: 1518 annotations (4.1%)
Class 10: 1764 annotations (4.8%)
Class 11: 3460 annotations (9.4%)
Class 12: 155 annotations (0.4%)
Class 13: 3295 

In [10]:

# Step 7: Convert annotations to YOLOv8 format with proper scaling
def convert_to_yolo_format(df, img_ids, output_dir, original_dims, png_dims):
    """Convert bounding box annotations to YOLOv8 format with proper scaling"""
    no_finding_count = 0
    other_class_count = 0
    classes_count = {i: 0 for i in range(15)}  # Count for each class
    
    for img_id in tqdm(img_ids, desc="Converting annotations"):
        img_annotations = df[df['image_id'] == img_id]
        
        # Skip if no annotations found
        if len(img_annotations) == 0:
            continue
            
        # Get original dimensions for this image
        orig_height, orig_width = original_dims.get(img_id, (3000, 3000))  # Default if not found
        
        # Check if this is a "No finding" image (all annotations are class 14)
        is_no_finding = all(img_annotations['class_id'] == 14)
        
        # Create label file
        with open(os.path.join(output_dir, f"{img_id}.txt"), 'w') as f:
            # For "No finding" images, we'll create an empty label file
            # This is the proper way to handle background/negative samples in YOLO
            if is_no_finding:
                no_finding_count += 1
                classes_count[14] += 1
                # Empty file indicates no objects (YOLO understands this)
                pass
            else:
                for _, row in img_annotations.iterrows():
                    # Skip the "No finding" class entries in mixed images
                    if row['class_id'] == 14:
                        continue
                        
                    other_class_count += 1
                    classes_count[row['class_id']] += 1
                    
                    # Get bounding box coordinates from the dataframe
                    x_min, y_min, x_max, y_max = row['x_min'], row['y_min'], row['x_max'], row['y_max']
                    
                    # Skip rows with NaN values
                    if pd.isna(x_min) or pd.isna(y_min) or pd.isna(x_max) or pd.isna(y_max):
                        continue
                    
                    # Scale bbox coordinates from original dimensions to PNG dimensions
                    scaled_bbox = scale_bbox([x_min, y_min, x_max, y_max], 
                                            (orig_height, orig_width), 
                                            png_dims)
                    
                    # Convert to YOLO format (normalized coordinates)
                    x_center = ((scaled_bbox[0] + scaled_bbox[2]) / 2) / png_dims[1]
                    y_center = ((scaled_bbox[1] + scaled_bbox[3]) / 2) / png_dims[0]
                    bbox_width = (scaled_bbox[2] - scaled_bbox[0]) / png_dims[1]
                    bbox_height = (scaled_bbox[3] - scaled_bbox[1]) / png_dims[0]
                    
                    # Make sure values are within [0,1] range
                    x_center = max(0, min(x_center, 1))
                    y_center = max(0, min(y_center, 1))
                    bbox_width = max(0, min(bbox_width, 1))
                    bbox_height = max(0, min(bbox_height, 1))
                    
                    # Write to file (class_id, x_center, y_center, width, height)
                    f.write(f"{row['class_id']} {x_center} {y_center} {bbox_width} {bbox_height}\n")
    
    print(f"Processed {no_finding_count} 'No finding' images and {other_class_count} annotations for other classes")
    for class_id, count in classes_count.items():
        if count > 0:
            print(f"Class {class_id}: {count} annotations")
    
    return no_finding_count, other_class_count, classes_count

# Convert annotations with proper scaling
print("Converting annotations to YOLOv8 format with coordinate scaling...")
train_no_finding, train_other, train_class_counts = convert_to_yolo_format(
    train_df, 
    train_images, 
    'yolov8_dataset/labels/train',
    original_dimensions,
    (PNG_HEIGHT, PNG_WIDTH)
)

val_no_finding, val_other, val_class_counts = convert_to_yolo_format(
    train_df, 
    val_images, 
    'yolov8_dataset/labels/val',
    original_dimensions,
    (PNG_HEIGHT, PNG_WIDTH)
)

print(f"Training set: {train_no_finding} 'No finding' images, {train_other} annotations for other classes")
print(f"Validation set: {val_no_finding} 'No finding' images, {val_other} annotations for other classes")


Converting annotations to YOLOv8 format with coordinate scaling...


Converting annotations: 100%|██████████| 6857/6857 [01:59<00:00, 57.22it/s] 


Processed 4137 'No finding' images and 24444 annotations for other classes
Class 0: 4361 annotations
Class 1: 215 annotations
Class 2: 655 annotations
Class 3: 3251 annotations
Class 4: 417 annotations
Class 5: 743 annotations
Class 6: 884 annotations
Class 7: 1772 annotations
Class 8: 1954 annotations
Class 9: 1518 annotations
Class 10: 1764 annotations
Class 11: 3460 annotations
Class 12: 155 annotations
Class 13: 3295 annotations
Class 14: 4137 annotations


Converting annotations: 100%|██████████| 1716/1716 [00:29<00:00, 57.35it/s] 

Processed 1038 'No finding' images and 5956 annotations for other classes
Class 0: 1120 annotations
Class 1: 40 annotations
Class 2: 196 annotations
Class 3: 795 annotations
Class 4: 102 annotations
Class 5: 161 annotations
Class 6: 213 annotations
Class 7: 416 annotations
Class 8: 370 annotations
Class 9: 427 annotations
Class 10: 426 annotations
Class 11: 848 annotations
Class 12: 40 annotations
Class 13: 802 annotations
Class 14: 1038 annotations
Training set: 4137 'No finding' images, 24444 annotations for other classes
Validation set: 1038 'No finding' images, 5956 annotations for other classes





In [41]:
len(val_images)

1716

In [11]:
# Step 8: Copy images to YOLOv8 directories
print("Copying images to YOLOv8 directories...")
for img_id in tqdm(train_images, desc="Copying training images"):
    src_path = os.path.join(root_path, "train", "train", f"{img_id}.png")
    dst_path = os.path.join('yolov8_dataset/images/train', f"{img_id}.png")
    if os.path.exists(src_path):
        shutil.copy(src_path, dst_path)
    else:
        print(f"Image not found: {src_path}")

for img_id in tqdm(val_images, desc="Copying validation images"):
    src_path = os.path.join(root_path, "train", "train", f"{img_id}.png")
    dst_path = os.path.join('yolov8_dataset/images/val', f"{img_id}.png")
    if os.path.exists(src_path):
        shutil.copy(src_path, dst_path)
    else:
        print(f"Image not found: {src_path}")

Copying images to YOLOv8 directories...


Copying training images: 100%|██████████| 6857/6857 [08:23<00:00, 13.61it/s]
Copying validation images: 100%|██████████| 1716/1716 [02:12<00:00, 12.94it/s]


In [19]:
# Step 9: Create YOLOv8 dataset YAML file
data_yaml = {
    'path': os.path.abspath('yolov8_dataset'),  # dataset root dir
    'train': 'images/train',
    'val': 'images/val',
    'nc': 14,  # number of classes (we only detect 14 classes, "No finding" is handled differently)
    'names': [
        'Aortic enlargement', 'Atelectasis', 'Calcification', 'Cardiomegaly',
        'Consolidation', 'ILD', 'Infiltration', 'Lung Opacity', 'Nodule/Mass',
        'Other lesion', 'Pleural effusion', 'Pleural thickening', 'Pneumothorax',
        'Pulmonary fibrosis'
    ]
}

with open('dataset.yaml', 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)
print("Created dataset.yaml configuration file")

Created dataset.yaml configuration file


In [20]:
# Calculate class weights for balanced training
class_weights = []
total_annotations = sum(train_class_counts.values())
for class_id in range(14):  # Only include 0-13 classes (exclude "No finding" class)
    count = train_class_counts.get(class_id, 0)
    if count > 0:
        # Inverse frequency weighting
        weight = total_annotations / (14 * count)
    else:
        weight = 1.0
    class_weights.append(weight)

# Normalize weights to have a mean of 1
class_weights = np.array(class_weights)
class_weights = class_weights / np.mean(class_weights)

print("\nClass weights for balanced training:")
for class_id, weight in enumerate(class_weights):
    print(f"Class {class_id}: {weight:.4f}")

# Save class weights to file
with open('dataset.yaml', 'a') as f:
    yaml.dump({"weights": class_weights.tolist()}, f)
print("Class weights saved to dataset.yaml")


Class weights for balanced training:
Class 0: 0.1533
Class 1: 3.1103
Class 2: 1.0209
Class 3: 0.2057
Class 4: 1.6036
Class 5: 0.9000
Class 6: 0.7565
Class 7: 0.3774
Class 8: 0.3422
Class 9: 0.4405
Class 10: 0.3791
Class 11: 0.1933
Class 12: 4.3143
Class 13: 0.2029
Class weights saved to dataset.yaml


In [21]:
# Step 10: Create hyperparameters file for medical imaging
hyp = {
    # Loss coefficients
    "box": 7.5,  # Box loss weight
    "cls": 0.5,  # Classification loss weight
    "dfl": 1.5,  # Distribution focal loss
    
    # Optimizer settings
    "lr0": 0.001,  # Initial learning rate
    "lrf": 0.01,   # Final learning rate factor
    "momentum": 0.937,  # SGD momentum/Adam beta1
    "weight_decay": 0.0005,  # Optimizer weight decay
    "warmup_epochs": 3.0,  # Warmup epochs
    "warmup_momentum": 0.8,  # Warmup momentum
    "warmup_bias_lr": 0.1,  # Warmup bias learning rate
    
    # Augmentation settings (optimized for medical imaging)
    "hsv_h": 0.01,  # Hue augmentation
    "hsv_s": 0.1,   # Saturation augmentation
    "hsv_v": 0.1,   # Value augmentation
    "degrees": 0.0,  # Rotation (disabled for medical orientation)
    "translate": 0.1,  # Translation
    "scale": 0.1,   # Scale
    "shear": 0.0,   # Shear
    "perspective": 0.0,  # Perspective
    "flipud": 0.0,  # Vertical flip
    "fliplr": 0.5,  # Horizontal flip
    "mosaic": 0.3,  # Mosaic (reduced)
    "mixup": 0.0,   # Mixup (disabled)
    "copy_paste": 0.0,  # Copy-paste
}

# Save hyperparameters to a file
with open('hyp_medical.yaml', 'w') as f:
    yaml.dump(hyp, f, default_flow_style=False)

In [22]:
# Clear GPU cache before training
print("Clearing GPU cache...")
torch.cuda.empty_cache()

Clearing GPU cache...


In [16]:
# Function to handle evaluation errors
def safe_val(model, data, split='val', **kwargs):
    """Run validation with error handling for missing keys"""
    try:
        results = model.val(data=data, split=split, **kwargs)
        return results
    except KeyError as e:
        print(f"KeyError during validation: {e}")
        # If error occurs, try with default parameters
        print("Retrying with default parameters...")
        results = model.val(data=data, split=split)
        return results
    except Exception as e:
        print(f"Error during validation: {e}")
        return None

In [23]:
# Step 11: First stage of training - frozen backbone
print("\n--- STAGE 1: TRAINING WITH FROZEN BACKBONE ---")
model_stage1 = YOLO('yolov8m.pt')  # Start with pretrained model

# Get number of layers in model
num_layers = len(model_stage1.model.model)
backbone_layers = list(range(10))  # Freeze first 10 layers (backbone)

# First stage training with frozen backbone
results_stage1 = model_stage1.train(
    data='dataset.yaml',
    epochs=25,              # Shorter first stage
    patience=10,            # Early stopping
    batch=4,                # Batch size
    imgsz=1024,             # Image size
    device=0,               # GPU device
    val=True,               # Validate during training
    amp=True,               # Mixed precision training
    pretrained=True,        # Use pretrained weights
    cfg='hyp_medical.yaml', # Custom hyperparameters
    optimizer='AdamW',      # Optimizer
    project='chest_xray',   # Project name
    name='yolov8m_stage1',  # Run name
    exist_ok=True,          # Overwrite existing run
    cos_lr=True,            # Cosine LR scheduler
    close_mosaic=10,        # Disable mosaic in last 10 epochs
    freeze=backbone_layers, # Freeze backbone layers
    rect=True,              # Rectangular training
    verbose=True,           # Verbose output
)


--- STAGE 1: TRAINING WITH FROZEN BACKBONE ---
New https://pypi.org/project/ultralytics/8.3.131 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=hyp_medical.yaml, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=dataset.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=25, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], half=False, hsv_h=0.01, hsv_s=0.1, hsv_v=0.1, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, mom

[34m[1mtrain: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/train... 8573 images, 5175 backgrounds, 0 corrupt: 100%|██████████| 8573/8573 [00:03<00:00, 2255.16it/s]


[34m[1mtrain: [0mNew cache created: /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/train.cache
[34m[1mval: [0mFast image access ✅ (ping: 0.2±0.1 ms, read: 340.2±49.2 MB/s, size: 436.8 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:02<00:00, 2235.88it/s]


[34m[1mval: [0mNew cache created: /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache
Plotting labels to chest_xray/yolov8m_stage1/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 1024 train, 1024 val
Using 8 dataloader workers
Logging results to [1mchest_xray/yolov8m_stage1[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/25      1.88G      1.798      3.974       1.96          0       1024: 100%|██████████| 2144/2144 [03:03<00:00, 11.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.57it/s]


                   all       6336      22518      0.314      0.069     0.0762     0.0361

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/25      2.44G      1.712      3.123       1.91          0       1024: 100%|██████████| 2144/2144 [02:57<00:00, 12.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.53it/s]


                   all       6336      22518      0.387      0.101      0.112     0.0526

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/25      2.45G      1.654      2.862      1.847          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.56it/s]


                   all       6336      22518      0.519      0.117      0.125       0.06

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/25      2.59G      1.607      2.621      1.794          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.54it/s]


                   all       6336      22518      0.294      0.173      0.137     0.0658

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/25      2.59G      1.567      2.451      1.751          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.54it/s]


                   all       6336      22518      0.284      0.178       0.15      0.075

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/25      2.59G      1.544      2.281       1.72          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.54it/s]


                   all       6336      22518      0.331      0.178      0.157     0.0783

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/25      2.59G      1.506      2.238      1.683          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.52it/s]


                   all       6336      22518      0.397      0.197       0.18     0.0866

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/25       2.6G      1.486      2.109      1.659          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.53it/s]


                   all       6336      22518      0.424      0.186      0.205      0.103

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/25      2.64G      1.463       1.97      1.634          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.54it/s]


                   all       6336      22518      0.336      0.226      0.204      0.102

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/25      2.64G      1.438      1.905      1.605          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.52it/s]


                   all       6336      22518      0.328      0.243      0.212      0.104

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/25      2.64G      1.416      1.843      1.582          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.52it/s]


                   all       6336      22518      0.335      0.274      0.236      0.116

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/25      2.64G      1.397      1.807      1.562          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.51it/s]


                   all       6336      22518      0.337      0.302       0.25      0.123

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/25      2.64G      1.371      1.726      1.541          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.51it/s]


                   all       6336      22518      0.338       0.31      0.266      0.132

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/25      2.64G      1.354      1.643      1.524          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.52it/s]


                   all       6336      22518      0.371       0.33      0.289      0.146

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/25      2.64G      1.329      1.573      1.501          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.51it/s]


                   all       6336      22518      0.373      0.341      0.297      0.149
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/25      2.64G      1.308      1.525       1.48          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518      0.388      0.347      0.304      0.156

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/25      2.64G      1.286      1.487      1.462          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.53it/s]


                   all       6336      22518      0.416      0.347      0.324      0.169

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/25      2.64G      1.264      1.442      1.445          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518      0.417      0.357       0.34      0.178

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/25      2.64G      1.243      1.398      1.424          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.54it/s]


                   all       6336      22518      0.436      0.362      0.352      0.187

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/25      2.64G      1.224      1.351      1.412          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.54it/s]


                   all       6336      22518      0.431      0.376      0.359      0.193

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/25      2.64G      1.204      1.317      1.394          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.54it/s]


                   all       6336      22518      0.455      0.373      0.368      0.198

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/25      2.64G      1.192      1.294      1.385          0       1024: 100%|██████████| 2144/2144 [02:57<00:00, 12.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.54it/s]


                   all       6336      22518       0.45      0.375      0.371      0.201

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/25      2.64G      1.181      1.267      1.376          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.54it/s]


                   all       6336      22518      0.446      0.385      0.374      0.205

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/25      2.64G      1.172      1.257      1.371          0       1024: 100%|██████████| 2144/2144 [02:56<00:00, 12.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.58it/s]


                   all       6336      22518       0.46      0.388      0.379      0.209

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/25      2.64G      1.165      1.243      1.364          0       1024: 100%|██████████| 2144/2144 [02:57<00:00, 12.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518       0.46      0.389      0.379       0.21

25 epochs completed in 1.816 hours.
Optimizer stripped from chest_xray/yolov8m_stage1/weights/last.pt, 52.1MB
Optimizer stripped from chest_xray/yolov8m_stage1/weights/best.pt, 52.1MB

Validating chest_xray/yolov8m_stage1/weights/best.pt...
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
Model summary (fused): 92 layers, 25,847,866 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:13<00:00, 10.78it/s]


                   all       6336      22518       0.46      0.388      0.379       0.21
    Aortic enlargement       1749       4053       0.68      0.585      0.649      0.437
           Atelectasis        128        198      0.433      0.157      0.211      0.108
         Calcification        281        625      0.364      0.312      0.286      0.144
          Cardiomegaly       1287       2970       0.72      0.531      0.632      0.453
         Consolidation        229        380      0.412      0.358      0.342      0.184
                   ILD        265        711      0.589      0.254      0.363      0.202
          Infiltration        395        815      0.414      0.323       0.32      0.159
          Lung Opacity        836       1606      0.403      0.428      0.344      0.168
           Nodule/Mass        534       1740      0.525      0.345      0.371      0.205
          Other lesion        728       1510      0.338      0.299      0.264      0.127
      Pleural effusio

In [24]:
# Run validation on first stage model
print("\nValidating first stage model...")
first_stage_model = YOLO('chest_xray/yolov8m_stage1/weights/best.pt')
val_results_stage1 = safe_val(
    first_stage_model,
    data='dataset.yaml',
    split='val',
    imgsz=1024,
    batch=16,
    device=0,
    verbose=True,
    conf=0.25,
    iou=0.4,   # PASCAL VOC metric uses IoU > 0.4
    plots=True,
)

# Print first stage metrics
if val_results_stage1 is not None and hasattr(val_results_stage1, 'box'):
    print(f"\nStage 1 - mAP@0.4: {val_results_stage1.box.map50:.4f}")
    print(f"Stage 1 - mAP@0.4:0.95: {val_results_stage1.box.map:.4f}")
    print(f"Stage 1 - Precision: {val_results_stage1.box.p:.4f}")
    print(f"Stage 1 - Recall: {val_results_stage1.box.r:.4f}")
    print(f"Stage 1 - F1-Score: {val_results_stage1.box.f1:.4f}")


Validating first stage model...
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
Model summary (fused): 92 layers, 25,847,866 parameters, 0 gradients, 78.7 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.0 ms, read: 1660.9±353.4 MB/s, size: 435.9 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:14<00:00,  2.94it/s]


                   all       6336      22518       0.74      0.194       0.47      0.302
    Aortic enlargement       1749       4053      0.806      0.389      0.616      0.457
           Atelectasis        128        198        0.5     0.0455       0.27      0.172
         Calcification        281        625      0.735      0.115       0.43      0.272
          Cardiomegaly       1287       2970      0.832      0.408      0.642      0.507
         Consolidation        229        380      0.775     0.0816      0.427      0.284
                   ILD        265        711      0.852      0.105      0.478      0.333
          Infiltration        395        815      0.701      0.144      0.413       0.25
          Lung Opacity        836       1606      0.646      0.189      0.405       0.23
           Nodule/Mass        534       1740      0.755      0.209      0.486      0.331
          Other lesion        728       1510      0.727      0.141      0.437      0.254
      Pleural effusio

TypeError: unsupported format string passed to numpy.ndarray.__format__

In [25]:
# Step 12: Second stage of training - unfreeze all layers
print("\n--- STAGE 2: TRAINING WITH ALL LAYERS UNFROZEN ---")
model_stage2 = YOLO('chest_xray/yolov8m_stage1/weights/best.pt')

# Modified hyperparameters for second stage
hyp_stage2 = hyp.copy()
hyp_stage2["lr0"] = 0.0003  # Lower learning rate for fine-tuning
hyp_stage2["mosaic"] = 0.0  # Disable mosaic for fine-tuning

# Save stage 2 hyperparameters
with open('hyp_medical_stage2.yaml', 'w') as f:
    yaml.dump(hyp_stage2, f, default_flow_style=False)

# Second stage training with all layers unfrozen
results_stage2 = model_stage2.train(
    data='dataset.yaml',
    epochs=25,                  # Continue training
    patience=15,                # Early stopping
    batch=4,                    # Batch size
    imgsz=1024,                 # Image size
    device=0,                   # GPU device
    val=True,                   # Validate during training
    amp=True,                   # Mixed precision training
    pretrained=False,           # Don't use pretrained (use our first stage)
    cfg='hyp_medical_stage2.yaml', # Stage 2 hyperparameters
    optimizer='AdamW',          # Optimizer
    project='chest_xray',       # Project name
    name='yolov8m_stage2',      # Run name
    exist_ok=True,              # Overwrite existing run
    cos_lr=True,                # Cosine LR scheduler
    close_mosaic=0,             # Disable mosaic
    freeze=[],                  # Unfreeze all layers
    rect=True,                  # Rectangular training
    verbose=True,               # Verbose output
)


--- STAGE 2: TRAINING WITH ALL LAYERS UNFROZEN ---
New https://pypi.org/project/ultralytics/8.3.131 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=hyp_medical_stage2.yaml, classes=None, close_mosaic=0, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=dataset.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=25, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=[], half=False, hsv_h=0.01, hsv_s=0.1, hsv_v=0.1, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.0003, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=chest_xray/yolov8m_stage1/weight

[34m[1mtrain: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/train.cache... 8573 images, 5175 backgrounds, 0 corrupt: 100%|██████████| 8573/8573 [00:00<?, ?it/s]






[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.0 ms, read: 1064.2±530.8 MB/s, size: 436.8 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]


Plotting labels to chest_xray/yolov8m_stage2/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.0003, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 1024 train, 1024 val
Using 8 dataloader workers
Logging results to [1mchest_xray/yolov8m_stage2[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/25       4.5G      1.529      2.908      1.681          0       1024: 100%|██████████| 2144/2144 [05:05<00:00,  7.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518      0.409      0.171      0.173     0.0838

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/25      4.61G      1.463      2.359      1.601          0       1024: 100%|██████████| 2144/2144 [04:59<00:00,  7.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518      0.365      0.175      0.188     0.0913

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/25       4.7G      1.429      2.244      1.569          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.53it/s]


                   all       6336      22518      0.355      0.236      0.219      0.108

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/25      4.85G      1.423      2.034      1.554          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.52it/s]


                   all       6336      22518      0.374      0.217      0.218      0.108

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/25      4.85G      1.401      1.995      1.542          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518      0.373      0.247      0.231      0.114

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/25      4.85G      1.386      1.908      1.526          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.53it/s]


                   all       6336      22518       0.37      0.261      0.244      0.121

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/25      4.85G      1.369      1.836      1.516          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.54it/s]


                   all       6336      22518      0.365       0.28      0.255      0.126

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/25      4.85G      1.352      1.748      1.497          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.53it/s]


                   all       6336      22518      0.356      0.295      0.266      0.132

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/25      4.85G      1.329      1.701      1.482          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518      0.374      0.306      0.275       0.14

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/25      4.85G      1.318      1.644      1.468          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.54it/s]


                   all       6336      22518       0.39       0.32       0.29      0.148

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/25      4.85G      1.303      1.583      1.457          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.52it/s]


                   all       6336      22518      0.396      0.328      0.299      0.153

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/25      4.85G      1.285      1.528      1.443          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518      0.405      0.335       0.31      0.157

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/25      4.85G      1.265       1.49      1.428          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:23<00:00,  9.54it/s]


                   all       6336      22518       0.42      0.337      0.319      0.166

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/25      4.85G      1.247      1.438      1.415          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.58it/s]


                   all       6336      22518      0.437      0.336      0.328      0.173

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/25      4.85G       1.23       1.41      1.399          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.58it/s]


                   all       6336      22518      0.445      0.343       0.34      0.181

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/25      4.85G      1.214      1.367      1.388          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518       0.45      0.359      0.351      0.188

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/25      4.85G        1.2      1.326      1.375          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.56it/s]


                   all       6336      22518      0.429      0.361      0.341      0.183

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/25      4.85G       1.18      1.291      1.361          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.55it/s]


                   all       6336      22518      0.431       0.35      0.342      0.184

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/25      4.85G      1.164      1.258       1.35          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.60it/s]


                   all       6336      22518      0.442       0.36      0.351      0.191

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/25      4.85G      1.148      1.226      1.336          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.62it/s]


                   all       6336      22518       0.46       0.36      0.368      0.202

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/25      4.85G      1.132      1.199      1.327          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.62it/s]


                   all       6336      22518      0.444       0.37      0.365      0.202

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/25      4.85G       1.12      1.175      1.317          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.64it/s]


                   all       6336      22518      0.458      0.367      0.372      0.206

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/25      4.85G      1.108      1.158       1.31          0       1024: 100%|██████████| 2144/2144 [04:56<00:00,  7.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.64it/s]


                   all       6336      22518      0.465      0.372      0.378       0.21

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/25      4.85G      1.102      1.146      1.304          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.62it/s]


                   all       6336      22518       0.46      0.373       0.38      0.212

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/25      4.85G      1.096      1.137        1.3          0       1024: 100%|██████████| 2144/2144 [04:57<00:00,  7.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:22<00:00,  9.64it/s]


                   all       6336      22518       0.47      0.379      0.385      0.216

25 epochs completed in 2.656 hours.
Optimizer stripped from chest_xray/yolov8m_stage2/weights/last.pt, 52.1MB
Optimizer stripped from chest_xray/yolov8m_stage2/weights/best.pt, 52.1MB

Validating chest_xray/yolov8m_stage2/weights/best.pt...
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
Model summary (fused): 92 layers, 25,847,866 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 792/792 [01:13<00:00, 10.85it/s]


                   all       6336      22518      0.469      0.378      0.384      0.215
    Aortic enlargement       1749       4053      0.507      0.558      0.587      0.402
           Atelectasis        128        198      0.441      0.259      0.277      0.136
         Calcification        281        625      0.359      0.323      0.289      0.152
          Cardiomegaly       1287       2970      0.748      0.485      0.602      0.429
         Consolidation        229        380      0.477      0.329      0.375       0.21
                   ILD        265        711      0.572      0.111      0.259      0.142
          Infiltration        395        815       0.47      0.337      0.366      0.194
          Lung Opacity        836       1606       0.43      0.409      0.377      0.202
           Nodule/Mass        534       1740      0.593      0.289      0.374      0.218
          Other lesion        728       1510      0.334      0.291      0.252      0.127
      Pleural effusio

In [26]:
# Step 13: Comprehensive evaluation with proper metrics
print("\n--- COMPREHENSIVE MODEL EVALUATION ---")

# Load the best model from stage 2
best_model = YOLO('chest_xray/yolov8m_stage2/weights/best.pt')

# Run validation with PASCAL VOC metrics (IoU > 0.4)
val_results = safe_val(
    best_model,
    data='dataset.yaml',
    split='val',
    imgsz=1024,
    batch=16,
    device=0,
    verbose=True,
    conf=0.25,
    iou=0.4,   # PASCAL VOC metric uses IoU > 0.4
    save_json=True,
    save_hybrid=True,
    plots=True,
)

# Print metrics
if val_results is not None and hasattr(val_results, 'box'):
    print("\nDetection Metrics:")
    print(f"mAP@0.4: {val_results.box.map50:.4f}")
    print(f"mAP@0.4:0.95: {val_results.box.map:.4f}")
    print(f"Precision: {val_results.box.p:.4f}")
    print(f"Recall: {val_results.box.r:.4f}")
    print(f"F1-Score: {val_results.box.f1:.4f}")
else:
    print("Could not retrieve validation metrics")


--- COMPREHENSIVE MODEL EVALUATION ---
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
Model summary (fused): 92 layers, 25,847,866 parameters, 0 gradients, 78.7 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.0 ms, read: 1652.3±199.2 MB/s, size: 435.9 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:14<00:00,  2.94it/s]


                   all       6336      22518      0.758      0.173      0.469      0.312
    Aortic enlargement       1749       4053      0.801       0.36      0.604      0.453
           Atelectasis        128        198      0.762     0.0808      0.427      0.254
         Calcification        281        625      0.678      0.128      0.401      0.261
          Cardiomegaly       1287       2970      0.895       0.34      0.628      0.493
         Consolidation        229        380      0.735     0.0947      0.414      0.285
                   ILD        265        711      0.762     0.0225      0.393      0.307
          Infiltration        395        815      0.744      0.146      0.445      0.274
          Lung Opacity        836       1606      0.707      0.183      0.438      0.282
           Nodule/Mass        534       1740      0.801      0.157      0.486      0.349
          Other lesion        728       1510      0.677       0.13      0.403      0.243
      Pleural effusio

TypeError: unsupported format string passed to numpy.ndarray.__format__

In [27]:
# Step 14: Per-class metrics
try:
    class_names = data_yaml['names']
    # Extract per-class AP
    per_class_ap = {}
    per_class_precision = {}
    per_class_recall = {}
    
    if hasattr(val_results, 'box') and hasattr(val_results.box, 'ap_class_index'):
        # Extract per-class metrics
        for i, idx in enumerate(val_results.box.ap_class_index):
            per_class_ap[int(idx)] = float(val_results.box.ap_class[i])
    else:
        # Alternative way to extract metrics if standard method fails
        print("Using alternative method to extract per-class metrics...")
        # Try to access metrics directly from validation results
        for i in range(len(class_names)):
            if hasattr(val_results.box, f'ap{i}'):
                per_class_ap[i] = getattr(val_results.box, f'ap{i}')
            else:
                per_class_ap[i] = 0
    
    # Create DataFrame for easier visualization
    metrics_df = pd.DataFrame({
        'Class': class_names,
        'AP@0.4': [per_class_ap.get(i, 0) for i in range(len(class_names))],
    })
    
    print("\nPer-Class Metrics:")
    print(metrics_df.to_string(index=False))
    
    # Save to CSV
    metrics_df.to_csv('class_metrics.csv', index=False)
    print("Per-class metrics saved to class_metrics.csv")
    
    # Create a bar plot of AP per class
    plt.figure(figsize=(14, 8))
    bars = plt.bar(metrics_df['Class'], metrics_df['AP@0.4'], color='skyblue')
    plt.xlabel('Class')
    plt.ylabel('AP@0.4')
    plt.title('Average Precision by Class (IoU > 0.4)')
    plt.xticks(rotation=45, ha='right')
    
    # Add value labels on top of bars
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2., height + 0.01,
                 f'{height:.3f}', ha='center', va='bottom', fontsize=8)
    
    plt.tight_layout()
    plt.savefig('ap_by_class.png')
    print("AP by class visualization saved to ap_by_class.png")
    
except Exception as e:
    print(f"Could not generate per-class metrics visualization: {e}")

Could not generate per-class metrics visualization: 'Metric' object has no attribute 'ap_class'. See valid attributes below.

    Class for computing evaluation metrics for YOLOv8 model.

    Attributes:
        p (list): Precision for each class. Shape: (nc,).
        r (list): Recall for each class. Shape: (nc,).
        f1 (list): F1 score for each class. Shape: (nc,).
        all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
        ap_class_index (list): Index of class for each AP score. Shape: (nc,).
        nc (int): Number of classes.

    Methods:
        ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
        ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
        mp(): Mean precision of all classes. Returns: Float.
        mr(): Mean recall of all classes. Returns: Float.
        map50(): Mean AP at IoU threshold of 0.5 for all clas

In [None]:
threshold_df

In [33]:
# Step 15: Find optimal confidence threshold with Pascal VOC metric (IoU > 0.4)
print("\n--- FINDING OPTIMAL CONFIDENCE THRESHOLD ---")
conf_thresholds = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5]
threshold_results = []

for conf in conf_thresholds:
    print(f"Evaluating with confidence threshold: {conf}")
    
    # Run validation with this threshold using PASCAL VOC IoU threshold (0.4)
    results = safe_val(
        best_model,
        data='dataset.yaml',
        split='val',
        imgsz=1024,
        batch=16,
        device=0,
        verbose=False,
        conf=conf,
        iou=0.4,  # PASCAL VOC metric uses IoU > 0.4
    )
    
    # Extract metrics
    if results is not None and hasattr(results, 'box'):
        threshold_results.append({
            'Conf': conf,
            'mAP40': results.box.map50,  # mAP at IoU=0.4
            'mAP40-95': results.box.map,
            'Precision': np.mean(results.box.p),
            'Recall': np.mean(results.box.r),
            'F1-Score': np.mean(results.box.f1)
        })
    else:
        print(f"Error evaluating confidence threshold {conf}")

# Convert results to DataFrame
threshold_df = pd.DataFrame(threshold_results)
print(threshold_df)

# Save results to CSV
threshold_df.to_csv('confidence_threshold_results.csv', index=False)

# Find optimal threshold based on F1 score
if len(threshold_df) > 0:
    optimal_f1_idx = threshold_df['F1-Score'].idxmax()
    optimal_threshold = threshold_df.loc[optimal_f1_idx, 'Conf']
    print(f"Optimal confidence threshold based on F1-Score: {optimal_threshold}")
    
    # Plot results
    plt.figure(figsize=(12, 8))
    plt.plot(threshold_df['Conf'], threshold_df['Precision'], 'b-', label='Precision')
    plt.plot(threshold_df['Conf'], threshold_df['Recall'], 'r-', label='Recall')
    plt.plot(threshold_df['Conf'], threshold_df['F1-Score'], 'g-', label='F1-Score')
    plt.plot(threshold_df['Conf'], threshold_df['mAP40'], 'y-', label='mAP@0.4')
    plt.axvline(x=optimal_threshold, color='k', linestyle='--', label=f'Optimal threshold = {optimal_threshold}')
    plt.xlabel('Confidence Threshold')
    plt.ylabel('Metric Value')
    plt.title('Metrics vs Confidence Threshold (IoU > 0.4)')
    plt.legend()
    plt.grid(True)
    plt.savefig('confidence_threshold_metrics.png')
    print("Confidence threshold analysis saved to confidence_threshold_metrics.png")
else:
    # Default threshold if evaluation fails
    optimal_threshold = 0.25
    print(f"Using default confidence threshold: {optimal_threshold}")

# Step 16: Confusion Matrix Analysis
print("\n--- CONFUSION MATRIX ANALYSIS ---")
try:
    # Collect predictions and ground truth for confusion matrix
    all_preds = []
    all_targets = []
    
    val_loader = best_model.val_loader
    
    for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(val_loader, desc="Collecting predictions")):
        # Run inference on batch
        results = best_model.predictor(im, conf=optimal_threshold, iou=0.4)
        
        for i, result in enumerate(results):
            # Extract predictions
            if len(result.boxes) > 0:
                pred_classes = result.boxes.cls.cpu().numpy().astype(int)
                for pred_class in pred_classes:
                    all_preds.append(pred_class)
            else:
                # No detection counts as "No finding" (class 14)
                all_preds.append(14)
            
            # Extract targets for this image
            img_targets = targets[targets[:, 0] == i]
            if len(img_targets) > 0:
                target_classes = img_targets[:, 1].cpu().numpy().astype(int)
                for target_class in target_classes:
                    all_targets.append(target_class)
            else:
                # No target counts as "No finding" (class 14)
                all_targets.append(14)
    
    # Create confusion matrix
    cm = confusion_matrix(all_targets, all_preds, labels=list(range(15)))
    
    # Normalize confusion matrix
    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    cm_norm = np.nan_to_num(cm_norm)
    
    # Plot confusion matrix
    plt.figure(figsize=(15, 15))
    class_names_with_nofinding = data_yaml['names'] + ['No finding']
    sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='Blues', 
                xticklabels=class_names_with_nofinding, 
                yticklabels=class_names_with_nofinding)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Normalized Confusion Matrix')
    plt.tight_layout()
    plt.savefig('confusion_matrix.png')
    print("Confusion matrix saved to confusion_matrix.png")
    
except Exception as e:
    print(f"Error generating confusion matrix: {e}")

# Step 17: Function for test inference with proper scaling
def scale_bbox_back(bbox, png_dims, original_dims):
    """Scale bounding box coordinates from PNG dimensions back to original dimensions"""
    png_height, png_width = png_dims
    orig_height, orig_width = original_dims
    
    # Scale factors
    width_scale = orig_width / png_width
    height_scale = orig_height / png_height
    
    # Scale coordinates back to original dimensions
    x_min = int(bbox[0] * width_scale)
    y_min = int(bbox[1] * height_scale)
    x_max = int(bbox[2] * width_scale)
    y_max = int(bbox[3] * height_scale)
    
    return [x_min, y_min, x_max, y_max]


--- FINDING OPTIMAL CONFIDENCE THRESHOLD ---
Evaluating with confidence threshold: 0.05
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1147.1±323.8 MB/s, size: 447.6 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:16<00:00,  2.91it/s]


                   all       6336      22518      0.503      0.354      0.431      0.262
Speed: 0.4ms preprocess, 20.1ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val14[0m
Evaluating with confidence threshold: 0.1
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1391.7±465.4 MB/s, size: 419.1 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:15<00:00,  2.93it/s]


                   all       6336      22518      0.607      0.282      0.448      0.282
Speed: 0.3ms preprocess, 20.1ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val15[0m
Evaluating with confidence threshold: 0.15
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.1 ms, read: 2291.6±1197.7 MB/s, size: 410.7 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:15<00:00,  2.93it/s]


                   all       6336      22518      0.684      0.235      0.462      0.298
Speed: 0.3ms preprocess, 20.2ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val16[0m
Evaluating with confidence threshold: 0.2
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.1 ms, read: 3729.1±1915.2 MB/s, size: 408.0 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:15<00:00,  2.93it/s]


                   all       6336      22518      0.732      0.201      0.469      0.307
Speed: 0.3ms preprocess, 20.2ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val17[0m
Evaluating with confidence threshold: 0.25
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 3497.4±1950.4 MB/s, size: 473.5 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:14<00:00,  2.93it/s]


                   all       6336      22518      0.758      0.173      0.469      0.312
Speed: 0.3ms preprocess, 20.2ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val18[0m
Evaluating with confidence threshold: 0.3
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.1 ms, read: 1927.3±690.2 MB/s, size: 429.4 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:14<00:00,  2.94it/s]


                   all       6336      22518      0.802       0.15      0.479      0.323
Speed: 0.3ms preprocess, 20.2ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val19[0m
Evaluating with confidence threshold: 0.35
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.1 ms, read: 1838.9±447.7 MB/s, size: 426.2 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:14<00:00,  2.94it/s]


                   all       6336      22518       0.84      0.136       0.49      0.336
Speed: 0.3ms preprocess, 20.2ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val20[0m
Evaluating with confidence threshold: 0.4
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.1 ms, read: 2917.2±2187.9 MB/s, size: 466.5 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:14<00:00,  2.94it/s]


                   all       6336      22518      0.869      0.118      0.496      0.344
Speed: 0.3ms preprocess, 20.2ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val21[0m
Evaluating with confidence threshold: 0.5
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2793.7±2063.3 MB/s, size: 387.3 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset/labels/val.cache... 6336 images, 3831 backgrounds, 0 corrupt: 100%|██████████| 6336/6336 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 396/396 [02:14<00:00,  2.94it/s]


                   all       6336      22518      0.905     0.0916        0.5      0.363
Speed: 0.3ms preprocess, 20.2ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val22[0m
   Conf     mAP40  mAP40-95  Precision    Recall  F1-Score
0  0.05  0.431469  0.261933   0.503102  0.353717  0.406788
1  0.10  0.447932  0.281894   0.607260  0.282254  0.378278
2  0.15  0.462065  0.297952   0.683954  0.235004  0.340884
3  0.20  0.469212  0.307451   0.732028  0.200779  0.306060
4  0.25  0.468681  0.312418   0.757715  0.172968  0.272467
5  0.30  0.479167  0.322879   0.801628  0.150268  0.242334
6  0.35  0.489902  0.336354   0.839533  0.135904  0.222472
7  0.40  0.495506  0.344406   0.868596  0.118199  0.195565
8  0.50  0.499616  0.362703   0.904936  0.091617  0.153463
Optimal confidence threshold based on F1-Score: 0.05
Confidence threshold analysis saved to confidence_threshold_metrics.png

--- CONFUSION MATRIX ANALYSIS ---
Error generating confusion matrix: 

In [None]:
# Step 18: Test Set Inference and Submission
print("\n--- TEST SET INFERENCE ---")
print("Starting inference on test set...")

# Load test data
test_path = os.path.join(root_path, "test", "test")
sample_submission = pd.read_csv(os.path.join(root_path, "sample_submission.csv"))
test_dimensions = load_image_dimensions(os.path.join(root_path, "img_size.csv"))

print(f"Loaded {len(sample_submission)} test images")

# Make predictions on test set using optimal confidence threshold
predictions = []
test_count = 0
no_detection_count = 0

for img_id in tqdm(sample_submission['image_id'], desc="Running inference on test set"):
    img_path = os.path.join(test_path, f"{img_id}.png")
    
    if os.path.exists(img_path):
        test_count += 1
        # Get original dimensions for this image
        orig_dims = test_dimensions[img_id]  
        
        # Run inference with optimal confidence threshold and PASCAL VOC IoU (0.4)
        results = best_model(img_path, conf=optimal_threshold, iou=0.4)
        
        # Get detections
        boxes = results[0].boxes.xyxy.cpu().numpy()  # in 1024x1024 scale
        scores = results[0].boxes.conf.cpu().numpy()
        classes = results[0].boxes.cls.cpu().numpy().astype(int)
        
        if len(boxes) == 0:
            # No findings detected - use class 14 ("No finding")
            no_detection_count += 1
            predictions.append(f"{img_id},14 1.0 0 0 1 1")
        else:
            # Scale boxes back to original dimensions
            img_preds = []
            for box, score, cls in zip(boxes, scores, classes):
                # Scale box back to original dimensions
                scaled_box = scale_bbox_back(box, (1024, 1024), orig_dims)
                img_preds.append(f"{cls} {score:.6f} {scaled_box[0]} {scaled_box[1]} {scaled_box[2]} {scaled_box[3]}")
            
            predictions.append(f"{img_id},{' '.join(img_preds)}")
    else:
        print(f"Test image not found: {img_path}")

print(f"Processed {test_count} test images")
print(f"Number of images with no detections (classified as 'No finding'): {no_detection_count}")
print(f"Percentage of 'No finding' classifications: {(no_detection_count/test_count)*100:.2f}%")

# Create submission file
submission_path = 'submission.csv'
with open(submission_path, 'w') as f:
    f.write("image_id,PredictionString\n")
    for pred in predictions:
        img_id, target = pred.split(',', 1)
        f.write(f"{img_id},{target}\n")

print(f"Submission file created at {submission_path}")

# Step 19: Visualize some test predictions
print("\n--- CREATING TEST VISUALIZATIONS ---")

try:
    # Sample a few test images
    sample_size = min(5, test_count)
    sample_ids = np.random.choice(sample_submission['image_id'].values, size=sample_size, replace=False)
    
    print(f"Generating visualizations for {sample_size} random test images...")
    
    for img_id in sample_ids:
        img_path = os.path.join(test_path, f"{img_id}.png")
        if not os.path.exists(img_path):
            continue
            
        # Load image
        img = Image.open(img_path)
        img_array = np.array(img)
        
        # Run inference with optimal threshold
        results = best_model(img_path, conf=optimal_threshold, iou=0.4)
        
        # Create figure
        plt.figure(figsize=(12, 12))
        plt.imshow(img_array, cmap='gray')
        plt.title(f"Test Image: {img_id}")
        
        # Plot predictions
        boxes = results[0].boxes.xyxy.cpu().numpy()
        scores = results[0].boxes.conf.cpu().numpy()
        classes = results[0].boxes.cls.cpu().numpy().astype(int)
        
        if len(boxes) == 0:
            plt.text(10, 30, "Prediction: No finding", fontsize=14, 
                     color='white', bbox=dict(facecolor='red', alpha=0.5))
        else:
            for box, score, cls in zip(boxes, scores, classes):
                x_min, y_min, x_max, y_max = box
                width = x_max - x_min
                height = y_max - y_min
                
                rect = plt.Rectangle((x_min, y_min), width, height, 
                                    fill=False, edgecolor='red', linewidth=2)
                plt.gca().add_patch(rect)
                
                class_name = data_yaml['names'][cls] if cls < len(data_yaml['names']) else f"Class {cls}"
                plt.text(x_min, y_min - 5, f"{class_name}: {score:.2f}", 
                         color='white', fontsize=10, 
                         bbox=dict(facecolor='red', alpha=0.5))
        
        plt.axis('off')
        plt.tight_layout()
        plt.savefig(f"test_prediction_{img_id}.png")
        plt.close()
    
    print(f"Test visualizations saved as test_prediction_*.png")
    
except Exception as e:
    print(f"Could not generate test visualizations: {e}")

print("\n--- TRAINING AND EVALUATION COMPLETE ---")
print(f"Best model saved at: chest_xray/yolov8m_stage2/weights/best.pt")
print(f"Optimal confidence threshold: {optimal_threshold}")
print(f"Submission file: {submission_path}")

In [39]:
submission_path = 'submission.csv'
with open(submission_path, 'w') as f:
    f.write("image_id,PredictionString\n")
    for pred in predictions:
        img_id, target = pred.split(',', 1)
        f.write(f"{img_id},{target}\n")

print(f"Submission file created at {submission_path}")


Submission file created at submission.csv
