In [4]:
import os
import numpy as np
import pandas as pd
import shutil
import yaml
from PIL import Image
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
from tqdm import tqdm
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import ultralytics
print(f"Ultralytics version: {ultralytics.__version__}")

Ultralytics version: 8.3.130


In [5]:
print("CUDA_VISIBLE_DEVICES:", os.environ.get("CUDA_VISIBLE_DEVICES", "Not set"))
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")

# Check GPU access
if torch.cuda.is_available() and torch.cuda.device_count() > 0:
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

CUDA_VISIBLE_DEVICES: GPU-73280671-c9c4-70a5-0e54-7f98b2a44f29
PyTorch version: 2.7.0+cu128
CUDA available: True
CUDA device count: 1
GPU 0: NVIDIA RTX 4000 Ada Generation
Using device: cuda


In [6]:
root_path = "/data/mhedas/common/challenge_dataset/"

In [7]:
# Step 2: Load and preprocess the data
train_df = pd.read_csv(os.path.join(root_path, "train.csv"))
img_dims_df = pd.read_csv(os.path.join(root_path, "img_size.csv"))
train_df = train_df.merge(img_dims_df, on='image_id', how='left')

print(f"Loaded {len(train_df)} annotations")
print(f"Number of unique images: {train_df['image_id'].nunique()}")

# Print class distribution
class_counts = train_df['class_id'].value_counts().sort_index()
print("\nClass distribution:")
for class_id, count in class_counts.items():
    print(f"Class {class_id}: {count} annotations")

Loaded 45925 annotations
Number of unique images: 8573

Class distribution:
Class 0: 5481 annotations
Class 1: 255 annotations
Class 2: 851 annotations
Class 3: 4046 annotations
Class 4: 519 annotations
Class 5: 904 annotations
Class 6: 1097 annotations
Class 7: 2188 annotations
Class 8: 2324 annotations
Class 9: 1945 annotations
Class 10: 2190 annotations
Class 11: 4308 annotations
Class 12: 195 annotations
Class 13: 4097 annotations
Class 14: 15525 annotations


In [8]:
# Step 3: Create a mapping of original image dimensions from img_size.csv
def load_image_dimensions(csv_path):
    """Load image dimensions from CSV file"""
    img_dimensions = {}
    df = pd.read_csv(csv_path)
    
    for _, row in df.iterrows():
        # Store dimensions as (height, width)
        img_dimensions[row['image_id']] = (int(row['dim0']), int(row['dim1']))
    
    return img_dimensions

original_dimensions = load_image_dimensions(os.path.join(root_path, "img_size.csv"))
print(f"Loaded dimensions for {len(original_dimensions)} images")

Loaded dimensions for 15000 images


In [9]:
PNG_HEIGHT, PNG_WIDTH = 1024, 1024

In [10]:
# Step 4: Function to scale bounding box coordinates
def scale_bbox(bbox, original_dims, new_dims):
    """
    Scale bounding box coordinates from original dimensions to new dimensions
    
    Args:
        bbox (list/array): Bounding box coordinates [x_min, y_min, x_max, y_max]
        original_dims (tuple): Original image dimensions (height, width)
        new_dims (tuple): New image dimensions (height, width)
        
    Returns:
        list: Scaled bounding box coordinates [x_min, y_min, x_max, y_max]
    """
    orig_height, orig_width = original_dims
    new_height, new_width = new_dims
    
    # Scale factors
    width_scale = new_width / orig_width
    height_scale = new_height / orig_height
    
    # Scale coordinates
    x_min = bbox[0] * width_scale
    y_min = bbox[1] * height_scale
    x_max = bbox[2] * width_scale
    y_max = bbox[3] * height_scale
    
    return [x_min, y_min, x_max, y_max]

In [11]:
# Step 5: Create directories for YOLOv8 dataset format (initial train/val split)
# These directories will be used for HP tuning and Gauge Training
initial_dataset_dir_name = 'yolov8_dataset_initial_split'
os.makedirs(f'{initial_dataset_dir_name}/images/train', exist_ok=True)
os.makedirs(f'{initial_dataset_dir_name}/images/val', exist_ok=True)
os.makedirs(f'{initial_dataset_dir_name}/labels/train', exist_ok=True)
os.makedirs(f'{initial_dataset_dir_name}/labels/val', exist_ok=True)

In [12]:
# Step 6: Split data into training and validation sets with stratification
print("\nPerforming stratified split to maintain class distribution...")

# Get unique image IDs for each class
class_image_ids = {}
for class_id in range(15):  # 0-14 classes
    class_image_ids[class_id] = set(train_df[train_df['class_id'] == class_id]['image_id'].unique())

# Count how many images have each class
for class_id, img_ids in class_image_ids.items():
    print(f"Class {class_id}: {len(img_ids)} unique images")


Performing stratified split to maintain class distribution...
Class 0: 2365 unique images
Class 1: 167 unique images
Class 2: 385 unique images
Class 3: 1746 unique images
Class 4: 325 unique images
Class 5: 341 unique images
Class 6: 532 unique images
Class 7: 1132 unique images
Class 8: 705 unique images
Class 9: 965 unique images
Class 10: 909 unique images
Class 11: 1689 unique images
Class 12: 79 unique images
Class 13: 1388 unique images
Class 14: 5175 unique images


In [13]:
all_image_ids = train_df['image_id'].unique()
rare_classes = [0, 2, 5, 6, 12, 13]
images_with_rare_classes = set()
for class_id in rare_classes:
    images_with_rare_classes.update(class_image_ids.get(class_id, set()))

rare_images = list(images_with_rare_classes)
common_images = list(set(all_image_ids) - images_with_rare_classes)

# Split rare and common images separately with the same ratio
rare_train, rare_val = train_test_split(rare_images, test_size=0.2, random_state=111)
common_train, common_val = train_test_split(common_images, test_size=0.2, random_state=111)

# Combine the splits
train_images = rare_train + common_train
val_images = rare_val + common_val

print(f"Initial training images: {len(train_images)}")
print(f"Initial validation images: {len(val_images)}")

Initial training images: 6857
Initial validation images: 1716


In [14]:
train_df_subset = train_df[train_df['image_id'].isin(train_images)]
val_df_subset = train_df[train_df['image_id'].isin(val_images)]

train_class_counts_annotations = train_df_subset['class_id'].value_counts().sort_index()
val_class_counts_annotations = val_df_subset['class_id'].value_counts().sort_index()

print("\nInitial Training set class distribution (annotations):")
for class_id, count in train_class_counts_annotations.items():
    print(f"Class {class_id}: {count} annotations ({count/sum(train_class_counts_annotations)*100:.1f}%)")

print("\nInitial Validation set class distribution (annotations):")
for class_id, count in val_class_counts_annotations.items():
    print(f"Class {class_id}: {count} annotations ({count/sum(val_class_counts_annotations)*100:.1f}%)")



Initial Training set class distribution (annotations):
Class 0: 4342 annotations (11.8%)
Class 1: 210 annotations (0.6%)
Class 2: 672 annotations (1.8%)
Class 3: 3258 annotations (8.9%)
Class 4: 421 annotations (1.1%)
Class 5: 742 annotations (2.0%)
Class 6: 870 annotations (2.4%)
Class 7: 1748 annotations (4.8%)
Class 8: 1800 annotations (4.9%)
Class 9: 1566 annotations (4.3%)
Class 10: 1766 annotations (4.8%)
Class 11: 3420 annotations (9.3%)
Class 12: 172 annotations (0.5%)
Class 13: 3310 annotations (9.0%)
Class 14: 12396 annotations (33.8%)

Initial Validation set class distribution (annotations):
Class 0: 1139 annotations (12.3%)
Class 1: 45 annotations (0.5%)
Class 2: 179 annotations (1.9%)
Class 3: 788 annotations (8.5%)
Class 4: 98 annotations (1.1%)
Class 5: 162 annotations (1.8%)
Class 6: 227 annotations (2.5%)
Class 7: 440 annotations (4.8%)
Class 8: 524 annotations (5.7%)
Class 9: 379 annotations (4.1%)
Class 10: 424 annotations (4.6%)
Class 11: 888 annotations (9.6%)
Cla

In [15]:
# Step 7: Convert annotations to YOLOv8 format with proper scaling
def convert_to_yolo_format(df, img_ids, output_dir_labels, original_dims_map, target_png_dims):
    """Convert bounding box annotations to YOLOv8 format with proper scaling"""
    images_with_no_finding_label_file = 0
    annotations_for_other_classes = 0
    processed_class_counts = {i: 0 for i in range(15)} 
    
    for img_id in tqdm(img_ids, desc="Converting annotations"):
        img_annotations_df = df[df['image_id'] == img_id]
        
        if len(img_annotations_df) == 0:
            with open(os.path.join(output_dir_labels, f"{img_id}.txt"), 'w') as f:
                pass 
            images_with_no_finding_label_file += 1
            processed_class_counts[14] += 1 
            continue
            
        orig_height, orig_width = original_dims_map.get(img_id, target_png_dims) 
        has_actual_ailment = any(img_annotations_df['class_id'] != 14)
        
        with open(os.path.join(output_dir_labels, f"{img_id}.txt"), 'w') as f:
            if not has_actual_ailment:
                images_with_no_finding_label_file += 1
                processed_class_counts[14] += 1 
                pass
            else:
                for _, row in img_annotations_df.iterrows():
                    if row['class_id'] == 14:
                        continue
                    
                    if pd.isna(row['x_min']) or pd.isna(row['y_min']) or pd.isna(row['x_max']) or pd.isna(row['y_max']) or pd.isna(row['class_id']):
                        print(f"Warning: NaN bbox/class_id found for image_id {img_id}, annotation skipped.")
                        continue

                    actual_class_id = int(row['class_id'])
                    annotations_for_other_classes += 1
                    processed_class_counts[actual_class_id] += 1
                    
                    x_min, y_min, x_max, y_max = row['x_min'], row['y_min'], row['x_max'], row['y_max']
                    scaled_bbox = scale_bbox([x_min, y_min, x_max, y_max], 
                                             (orig_height, orig_width), 
                                             target_png_dims)
                    
                    png_h, png_w = target_png_dims
                    x_center_norm = ((scaled_bbox[0] + scaled_bbox[2]) / 2) / png_w
                    y_center_norm = ((scaled_bbox[1] + scaled_bbox[3]) / 2) / png_h
                    bbox_width_norm = (scaled_bbox[2] - scaled_bbox[0]) / png_w
                    bbox_height_norm = (scaled_bbox[3] - scaled_bbox[1]) / png_h
                    
                    x_center_norm = max(0.0, min(x_center_norm, 1.0))
                    y_center_norm = max(0.0, min(y_center_norm, 1.0))
                    bbox_width_norm = max(0.001, min(bbox_width_norm, 1.0)) 
                    bbox_height_norm = max(0.001, min(bbox_height_norm, 1.0))
                    
                    f.write(f"{actual_class_id} {x_center_norm} {y_center_norm} {bbox_width_norm} {bbox_height_norm}\n")
    
    print(f"Created {images_with_no_finding_label_file} empty label files for 'No finding' images.")
    print(f"Processed {annotations_for_other_classes} annotations for actual ailment classes (0-13).")
    print("Counts of each class_id written to label files (class 14 indicates empty files):")
    for class_id, count in processed_class_counts.items():
        if count > 0 :
            print(f"Class {class_id}: {count} entries")
            
    return images_with_no_finding_label_file, annotations_for_other_classes, processed_class_counts

# Convert annotations for the initial train/val split
print("\nConverting initial training annotations to YOLOv8 format...")
_, _, train_label_class_counts_initial = convert_to_yolo_format(
    train_df, 
    train_images, 
    f'{initial_dataset_dir_name}/labels/train',
    original_dimensions,
    (PNG_HEIGHT, PNG_WIDTH)
)

print("\nConverting initial validation annotations to YOLOv8 format...")
_, _, val_label_class_counts_initial = convert_to_yolo_format(
    train_df, 
    val_images, 
    f'{initial_dataset_dir_name}/labels/val',
    original_dimensions,
    (PNG_HEIGHT, PNG_WIDTH)
)


Converting initial training annotations to YOLOv8 format...


Converting annotations: 100%|██████████| 6857/6857 [03:24<00:00, 33.53it/s]


Created 4132 empty label files for 'No finding' images.
Processed 24297 annotations for actual ailment classes (0-13).
Counts of each class_id written to label files (class 14 indicates empty files):
Class 0: 4342 entries
Class 1: 210 entries
Class 2: 672 entries
Class 3: 3258 entries
Class 4: 421 entries
Class 5: 742 entries
Class 6: 870 entries
Class 7: 1748 entries
Class 8: 1800 entries
Class 9: 1566 entries
Class 10: 1766 entries
Class 11: 3420 entries
Class 12: 172 entries
Class 13: 3310 entries
Class 14: 4132 entries

Converting initial validation annotations to YOLOv8 format...


Converting annotations: 100%|██████████| 1716/1716 [00:35<00:00, 47.76it/s]

Created 1043 empty label files for 'No finding' images.
Processed 6103 annotations for actual ailment classes (0-13).
Counts of each class_id written to label files (class 14 indicates empty files):
Class 0: 1139 entries
Class 1: 45 entries
Class 2: 179 entries
Class 3: 788 entries
Class 4: 98 entries
Class 5: 162 entries
Class 6: 227 entries
Class 7: 440 entries
Class 8: 524 entries
Class 9: 379 entries
Class 10: 424 entries
Class 11: 888 entries
Class 12: 23 entries
Class 13: 787 entries
Class 14: 1043 entries





In [16]:
print("\nCopying images for initial train/val split...")
image_source_dir = os.path.join(root_path, "train", "train") # Adjusted path

for img_id in tqdm(train_images, desc="Copying initial training images"):
    src_path = os.path.join(image_source_dir, f"{img_id}.png")
    dst_path = os.path.join(f'{initial_dataset_dir_name}/images/train', f"{img_id}.png")
    if os.path.exists(src_path):
        shutil.copy(src_path, dst_path)
    else:
        print(f"Image not found: {src_path}")

for img_id in tqdm(val_images, desc="Copying initial validation images"):
    src_path = os.path.join(image_source_dir, f"{img_id}.png")
    dst_path = os.path.join(f'{initial_dataset_dir_name}/images/val', f"{img_id}.png")
    if os.path.exists(src_path):
        shutil.copy(src_path, dst_path)
    else:
        print(f"Image not found: {src_path}")




Copying images for initial train/val split...


Copying initial training images: 100%|██████████| 6857/6857 [09:02<00:00, 12.63it/s]
Copying initial validation images: 100%|██████████| 1716/1716 [04:08<00:00,  6.90it/s]


In [17]:
# Step 9: Create YOLOv8 dataset YAML file for initial train/val split (for tuning and gauge training)
# This will be 'dataset_initial.yaml'
data_yaml_initial_content = {
    'path': os.path.abspath(initial_dataset_dir_name),
    'train': 'images/train',
    'val': 'images/val',
    'nc': 14, 
    'names': [
        'Aortic enlargement', 'Atelectasis', 'Calcification', 'Cardiomegaly',
        'Consolidation', 'ILD', 'Infiltration', 'Lung Opacity', 'Nodule/Mass',
        'Other lesion', 'Pleural effusion', 'Pleural thickening', 'Pneumothorax',
        'Pulmonary fibrosis'
    ]
}

# Save this as 'dataset_initial.yaml'
path_to_initial_yaml = 'dataset_initial.yaml'
with open(path_to_initial_yaml, 'w') as f:
    yaml.dump(data_yaml_initial_content, f, sort_keys=False)
print(f"\nCreated {path_to_initial_yaml} for initial train/val split.")


Created dataset_initial.yaml for initial train/val split.


In [18]:
# Step 13: Calculate class weights for balanced training (optional, for initial train set)
# These weights could be used for the gauge training if desired, or let tuned HPs handle balancing.
# We use train_label_class_counts_initial which reflects the initial training split.
num_actual_classes_for_weights = data_yaml_initial_content['nc'] # Should be 14
class_weights_initial = []

total_ailment_annotations_in_initial_train = sum(
    count for class_id, count in train_label_class_counts_initial.items() 
    if class_id < num_actual_classes_for_weights
)

print(f"\nCalculating class weights based on {total_ailment_annotations_in_initial_train} ailment annotations in initial training label files.")

if total_ailment_annotations_in_initial_train > 0:
    for i in range(num_actual_classes_for_weights):  # Iterate 0-13
        count = train_label_class_counts_initial.get(i, 0)
        if count > 0:
            weight = 1.0 / count 
        else:
            weight = 1.0 
        class_weights_initial.append(weight)

    class_weights_initial = np.array(class_weights_initial)
    class_weights_initial = (class_weights_initial / np.sum(class_weights_initial)) * num_actual_classes_for_weights
    
    print("\nCalculated initial class weights (normalized):")
    for i, weight in enumerate(class_weights_initial):
        print(f"Class {data_yaml_initial_content['names'][i]} (ID {i}): {weight:.4f} (Count: {train_label_class_counts_initial.get(i, 0)})")
    
    with open(path_to_initial_yaml, 'a') as f:
        yaml.dump({"weights": class_weights_initial.tolist()}, f)
        print(f"Initial class weights appended to {path_to_initial_yaml}")
else:
    print("No ailment annotations found in initial train set to calculate class weights. Using default weights (1.0 for all).")
    class_weights_initial = np.ones(num_actual_classes_for_weights).tolist()


Calculating class weights based on 24297 ailment annotations in initial training label files.

Calculated initial class weights (normalized):
Class Aortic enlargement (ID 0): 0.1581 (Count: 4342)
Class Atelectasis (ID 1): 3.2679 (Count: 210)
Class Calcification (ID 2): 1.0212 (Count: 672)
Class Cardiomegaly (ID 3): 0.2106 (Count: 3258)
Class Consolidation (ID 4): 1.6301 (Count: 421)
Class ILD (ID 5): 0.9249 (Count: 742)
Class Infiltration (ID 6): 0.7888 (Count: 870)
Class Lung Opacity (ID 7): 0.3926 (Count: 1748)
Class Nodule/Mass (ID 8): 0.3813 (Count: 1800)
Class Other lesion (ID 9): 0.4382 (Count: 1566)
Class Pleural effusion (ID 10): 0.3886 (Count: 1766)
Class Pleural thickening (ID 11): 0.2007 (Count: 3420)
Class Pneumothorax (ID 12): 3.9899 (Count: 172)
Class Pulmonary fibrosis (ID 13): 0.2073 (Count: 3310)
Initial class weights appended to dataset_initial.yaml


In [19]:
# Step 14: Create a base hyperparameters file for medical imaging ('hyp_medical_base.yaml')
# This can serve as a starting point or fallback if HP tuning fails or is skipped.
hyp_medical_base = {
    # Loss coefficients
    "box": 7.5,
    "cls": 0.5, 
    "dfl": 1.5,
    
    # Optimizer settings
    "lr0": 0.001,
    "lrf": 0.01,
    "momentum": 0.937,
    "weight_decay": 0.0005,
    "warmup_epochs": 3.0,
    "warmup_momentum": 0.8,
    "warmup_bias_lr": 0.1,
    
    # Augmentation settings
    "hsv_h": 0.01,
    "hsv_s": 0.2,
    "hsv_v": 0.2,
    "degrees": 5.0,
    "translate": 0.1,
    "scale": 0.3,
    "shear": 0.0,
    "perspective": 0.0,
    "flipud": 0.0,
    "fliplr": 0.5,
    "mosaic": 0.3, # Will be adjusted by close_mosaic during training
    "mixup": 0.0,
    "copy_paste": 0.0,
}

path_to_base_hyp_yaml = 'hyp_medical_base.yaml'
with open(path_to_base_hyp_yaml, 'w') as f:
    yaml.dump(hyp_medical_base, f, sort_keys=False)
print(f"\nCreated base hyperparameter file: {path_to_base_hyp_yaml}")


Created base hyperparameter file: hyp_medical_base.yaml


In [20]:
# Clear GPU cache before training
if torch.cuda.is_available():
    print("\nClearing GPU cache...")
    torch.cuda.empty_cache()


Clearing GPU cache...


In [21]:
# Function to handle evaluation errors
def safe_val(model_to_eval, data_path, split_name='val', imgsz_val=1024, batch_val=16, conf_val=0.25, iou_val=0.4, **kwargs):
    """Run validation with error handling"""
    try:
        print(f"Running validation: data={data_path}, split={split_name}, imgsz={imgsz_val}, batch={batch_val}, conf={conf_val}, iou={iou_val}")
        results = model_to_eval.val(
            data=data_path, 
            split=split_name, 
            imgsz=imgsz_val, 
            batch=batch_val, 
            conf=conf_val, 
            iou=iou_val, 
            plots=True, # Enable plots for confusion matrix etc.
            save_json=True, # Save JSON for pycocotools if needed
            save_hybrid=True, # Save hybrid format (labels + predictions)
            verbose=True, 
            **kwargs
        )
        return results
    except KeyError as e:
        print(f"KeyError during validation: {e}. This can happen with inconsistent class indexing or metric calculation issues.")
        print("Attempting to proceed, but results might be incomplete.")
        return None
    except Exception as e:
        print(f"An unexpected error occurred during validation: {e}")
        return None

In [32]:
from ultralytics import YOLO
Test = YOLO("yolo11m.pt")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'yolo11m.pt'...


100%|██████████| 38.8M/38.8M [00:00<00:00, 64.8MB/s]


In [34]:
# --- STEP 1: HYPERPARAMETER TUNING ---
print("\n--- STEP 1: HYPERPARAMETER TUNING ---")
path_to_best_hyperparameters_yaml = path_to_base_hyp_yaml # Default fallback
freeze_N_layers = 10

try:
    tuning_epochs_per_trial = 10  # Your desired epochs per trial
    tuning_iterations = 15

    print(f"Starting hyperparameter tuning: {tuning_iterations} iterations, {tuning_epochs_per_trial} epochs each.")
    print(f"Using initial dataset for tuning: {path_to_initial_yaml}")
    
    tuner_instance = YOLO("yolo11m.pt") # Create a base instance to call .tune()

    tune_results = tuner_instance.tune(
        data=path_to_initial_yaml,      
        epochs=tuning_epochs_per_trial, 
        iterations=tuning_iterations,
        optimizer='AdamW',
        project='chest_xray_runs',      
        name='hp_tuning',               
        exist_ok=True,
        val=True,                       
        plots=False,                    
        save=False,                     
        device=device,
        # verbose=False,                
        freeze=freeze_N_layers
    )
    
    # ... (rest of your code for finding path_to_best_hyperparameters_yaml) ...
    # (This part needs to correctly interpret tune_results based on your Ultralytics version)
    if hasattr(tune_results, 'save_dir') and os.path.exists(os.path.join(tune_results.save_dir, 'best_hyperparameters.yaml')):
        path_to_best_hyperparameters_yaml = os.path.join(tune_results.save_dir, 'best_hyperparameters.yaml')
    elif hasattr(tune_results, 'best_hyp_yaml') and os.path.exists(tune_results.best_hyp_yaml): # another possible attribute
         path_to_best_hyperparameters_yaml = tune_results.best_hyp_yaml
    else:
        presumed_path = os.path.join('chest_xray_runs', 'hp_tuning', 'best_hyperparameters.yaml')
        if os.path.exists(presumed_path):
            path_to_best_hyperparameters_yaml = presumed_path
        else:
            print(f"Warning: 'best_hyperparameters.yaml' not found automatically.")
            path_to_best_hyperparameters_yaml = path_to_base_hyp_yaml


    print(f"Tuning completed. Using hyperparameters from: {path_to_best_hyperparameters_yaml}")

except Exception as e:
    print(f"An error occurred during hyperparameter tuning: {e}")
    print(f"Falling back to using base HPs: {path_to_base_hyp_yaml}")
    path_to_best_hyperparameters_yaml = path_to_base_hyp_yaml


--- STEP 1: HYPERPARAMETER TUNING ---
Starting hyperparameter tuning: 50 iterations, 15 epochs each.
Using initial dataset for tuning: dataset_initial.yaml
[34m[1mTuner: [0mInitialized Tuner instance with 'tune_dir=chest_xray_runs/hp_tuning7'
[34m[1mTuner: [0m💡 Learn about tuning at https://docs.ultralytics.com/guides/hyperparameter-tuning
[34m[1mTuner: [0mStarting iteration 1/50 with hyperparameters: {'lr0': 0.01, 'lrf': 0.01, 'momentum': 0.937, 'weight_decay': 0.0005, 'warmup_epochs': 3.0, 'warmup_momentum': 0.8, 'box': 7.5, 'cls': 0.5, 'dfl': 1.5, 'hsv_h': 0.015, 'hsv_s': 0.7, 'hsv_v': 0.4, 'degrees': 0.0, 'translate': 0.1, 'scale': 0.5, 'shear': 0.0, 'perspective': 0.0, 'flipud': 0.0, 'fliplr': 0.5, 'bgr': 0.0, 'mosaic': 1.0, 'mixup': 0.0, 'cutmix': 0.0, 'copy_paste': 0.0}
New https://pypi.org/project/ultralytics/8.3.142 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.130 🚀 Python-3.11.12 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX 4000 Ada Generation, 20028

[34m[1mtrain: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset_initial_split/labels/train.cache... 6857 images, 4132 backgrounds, 0 corrupt: 100%|██████████| 6857/6857 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.0 ms, read: 1089.7±477.9 MB/s, size: 453.1 KB)


[34m[1mval: [0mScanning /data/mhedas/common/amunozbr/amia-2025-challenge/yolov8_dataset_initial_split/labels/val.cache... 1716 images, 1043 backgrounds, 0 corrupt: 100%|██████████| 1716/1716 [00:00<?, ?it/s]


[34m[1moptimizer:[0m AdamW(lr=0.01, momentum=0.937) with parameter groups 106 weight(decay=0.0), 113 weight(decay=0.0005), 112 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mchest_xray_runs/train2[0m
Starting training for 15 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/15      7.91G      2.557      4.586      2.437        109        640:  19%|█▉        | 81/429 [00:25<01:50,  3.14it/s]
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/data/mhedas/scratch_ssd/amunozbr/.micromamba/envs/amia-3.11/lib/python3.11/site-packages/ultralytics/cfg/__init__.py", line 1023, in <module>
    entrypoint(debug="")
  File "/data/mhedas/scratch_ssd/amunozbr/.micromamba/envs/amia-3.11/lib/python3.11/site-packages/ultralytics/cfg/__init__.py", line 981, in entrypoint
    getattr(model, mode)(**overrides)  # default args from model
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/mhedas/scratch_ssd/amunozbr/.micromamba/envs/amia-3.11/lib/python3.11/site-packages/ultralytics/engine/model.py", line 793, in train
    self.trainer.train()
  File "/data/mhedas/scratch_ssd/amunozbr/.micromamba/envs/amia-3.11/lib/python3.11/site-packages/ultralytics/engine/trainer.py",

KeyboardInterrupt: 

In [None]:
# --- STEP 2: "GAUGE" TRAINING & THRESHOLD FINDING (using tuned HPs on initial train/val split) ---
print("\n--- STEP 2: GAUGE TRAINING & THRESHOLD FINDING ---")

# Define parameters for the gauge training run
gauge_training_epochs = 150 # Set a high number of epochs; early stopping will determine the actual count
gauge_patience = 20       # Patience for early stopping

model_gauge = YOLO('yolov8m.pt') # Start with a fresh pre-trained model
print(f"Starting gauge training with HPs from '{path_to_best_hyperparameters_yaml}' for up to {gauge_training_epochs} epochs.")
print(f"Using dataset for gauge training: {path_to_initial_yaml}")

results_gauge = model_gauge.train(
    data=path_to_initial_yaml,    # Uses 'dataset_initial.yaml' (initial train/val split)
    epochs=gauge_training_epochs,
    patience=gauge_patience,
    batch=8,                      # Adjust batch size based on your VRAM
    imgsz=1024,
    device=device,                # Use the auto-detected device
    val=True,                     # Crucial for early stopping & getting validation metrics
    amp=True,                     # Automatic Mixed Precision training
    cfg=path_to_best_hyperparameters_yaml, # Use HPs from tuning (or base HPs if tuning failed)
    optimizer='AdamW',            # This might be overridden by 'cfg' if specified there
    project='chest_xray_runs',    # Main project for all runs
    name='gauge_train_run',       # Specific name for this gauge training run
    exist_ok=True,                # Overwrite if previous run with same name exists
    cos_lr=True,                  # Use cosine learning rate scheduler (often in tuned HPs)
    close_mosaic=10,              # Disable mosaic in last N epochs (often in tuned HPs)
    freeze=freeze_N_layers,       # Apply freezing strategy
    multi_scale=True,             # Enable multi-scale training if part of your strategy
    verbose=True,
    # cls_pw=class_weights_initial.tolist() # Optionally pass initial class weights if not handled by HPs/autobalance
)

In [None]:
# Determine the optimal number of epochs for the final run based on gauge training
epochs_for_final_run = results_gauge.epoch + 1 # Default to total epochs completed if no early stopping or best_epoch not found
if hasattr(results_gauge, 'best_epoch') and results_gauge.best_epoch is not None and results_gauge.best_epoch > 0 :
    # best_epoch is 0-indexed epoch number of the best model found during validation
    epochs_for_final_run = results_gauge.best_epoch + 1
    print(f"Gauge training stopped early or best epoch identified. Optimal epoch for best.pt: {results_gauge.best_epoch}.")
else:
    print(f"Gauge training completed {results_gauge.epoch + 1} epochs.")
print(f"Derived optimal number of epochs for final training run: {epochs_for_final_run}")

# Path to the best model weights from the gauge training
path_to_gauge_model_weights = os.path.join('chest_xray_runs', 'gauge_train_run', 'weights', 'best.pt')

In [None]:
# --- Find optimal confidence threshold using the gauge model ---
optimal_threshold_glob = 0.25 # Default value
threshold_df_gauge = pd.DataFrame()

if os.path.exists(path_to_gauge_model_weights):
    print(f"\nLoading gauge model from {path_to_gauge_model_weights} for threshold finding...")
    model_for_thresholding = YOLO(path_to_gauge_model_weights)
    
    print("\n--- FINDING OPTIMAL CONFIDENCE THRESHOLD (using Gauge Model on initial val split) ---")
    # Using IoU @ 0.4 as per competition guidelines for Pascal VOC mAP
    conf_thresholds_search_gauge = np.linspace(0.01, 0.50, 20).tolist() # Search range for confidence
    threshold_results_list_gauge = []

    for conf_val_search in tqdm(conf_thresholds_search_gauge, desc="Evaluating confidence thresholds (Gauge Model)"):
        results_conf_search_gauge = safe_val(
            model_for_thresholding,
            data_path=path_to_initial_yaml, # Validate on the initial validation set
            split_name='val',
            imgsz_val=1024,
            batch_val=max(4, 8 // 2), # Adjust batch size for validation if needed
            device=device,
            conf_val=conf_val_search,
            iou_val=0.4,   # PASCAL VOC metric often uses IoU > 0.4 for mAP@0.4 (equivalent to map50 if iou_thres for map50 is 0.4)
                               # Note: Ultralytics map50 is typically IoU@0.5. For IoU@0.4, direct metrics might be needed or check val_results.
                               # We will use map50 (mAP@0.5) and F1 for optimization here as it's standard output.
                               # If competition uses IoU@0.4, ensure val uses correct IoU for reported mAP (e.g. val_results.box.map(iou_thres=0.4))
        )
        
        if results_conf_search_gauge and hasattr(results_conf_search_gauge, 'box'):
            # results_conf_search_gauge.box.map50 is mAP@0.5
            # results_conf_search_gauge.box.map is mAP@0.5:0.95
            mean_precision_gauge = results_conf_search_gauge.box.mp
            mean_recall_gauge = results_conf_search_gauge.box.mr
            f1_val_gauge = 0
            if mean_precision_gauge > 0 and mean_recall_gauge > 0:
                 f1_val_gauge = 2 * (mean_precision_gauge * mean_recall_gauge) / (mean_precision_gauge + mean_recall_gauge)
            elif hasattr(results_conf_search_gauge.box, 'f1') and len(results_conf_search_gauge.box.f1) > 0: # Fallback
                 f1_val_gauge = np.mean(results_conf_search_gauge.box.f1)

            threshold_results_list_gauge.append({
                'Conf': conf_val_search,
                'mAP50': results_conf_search_gauge.box.map50, # mAP at IoU=0.5
                'mAP50_95': results_conf_search_gauge.box.map, 
                'MeanPrecision': mean_precision_gauge,
                'MeanRecall': mean_recall_gauge,
                'MeanF1': f1_val_gauge
            })
        else:
            print(f"Warning: Error or no results evaluating gauge model with confidence threshold {conf_val_search:.3f}")

    if threshold_results_list_gauge:
        threshold_df_gauge = pd.DataFrame(threshold_results_list_gauge)
        print("\nGauge Model Confidence Threshold Analysis (Metrics @ IoU for mAP50 i.e. 0.5):")
        print(threshold_df_gauge.to_string(index=False))

        threshold_df_gauge.to_csv('confidence_threshold_analysis_gauge_model.csv', index=False)
        print("Gauge model confidence threshold analysis saved to confidence_threshold_analysis_gauge_model.csv")

        # Optimize for MeanF1, fallback to mAP50 if F1 is zero everywhere
        metric_to_optimize_for_threshold_gauge = 'MeanF1' 
        if not (threshold_df_gauge[metric_to_optimize_for_threshold_gauge] > 0).any():
            metric_to_optimize_for_threshold_gauge = 'mAP50' # mAP@0.5

        if metric_to_optimize_for_threshold_gauge in threshold_df_gauge.columns and \
           (threshold_df_gauge[metric_to_optimize_for_threshold_gauge] > 0).any() :
            optimal_idx_gauge = threshold_df_gauge[metric_to_optimize_for_threshold_gauge].idxmax()
            optimal_threshold_glob = threshold_df_gauge.loc[optimal_idx_gauge, 'Conf']
            optimal_metric_val_gauge = threshold_df_gauge.loc[optimal_idx_gauge, metric_to_optimize_for_threshold_gauge]
            print(f"\nOptimal confidence threshold from Gauge Model (based on {metric_to_optimize_for_threshold_gauge}): {optimal_threshold_glob:.4f} (Value: {optimal_metric_val_gauge:.4f})")
        else:
            print(f"Could not determine optimal threshold for gauge model based on {metric_to_optimize_for_threshold_gauge}, using default: {optimal_threshold_glob}")
        
        plt.figure(figsize=(12, 8))
        plt.plot(threshold_df_gauge['Conf'], threshold_df_gauge['MeanPrecision'], marker='o', linestyle='-', label='Mean Precision')
        plt.plot(threshold_df_gauge['Conf'], threshold_df_gauge['MeanRecall'], marker='s', linestyle='-', label='Mean Recall')
        plt.plot(threshold_df_gauge['Conf'], threshold_df_gauge['MeanF1'], marker='^', linestyle='-', label='Mean F1-Score')
        plt.plot(threshold_df_gauge['Conf'], threshold_df_gauge['mAP50'], marker='x', linestyle='-', label='mAP@0.5')
        plt.axvline(x=optimal_threshold_glob, color='grey', linestyle='--', label=f'Optimal Conf ({optimal_threshold_glob:.3f})')
        plt.xlabel('Confidence Threshold', fontsize=12)
        plt.ylabel('Metric Value', fontsize=12)
        plt.title('Gauge Model: Metrics vs. Confidence Threshold', fontsize=14)
        plt.legend(fontsize=10)
        plt.grid(True, linestyle=':', alpha=0.6)
        plt.tight_layout()
        plt.savefig('confidence_threshold_metrics_gauge_model.png', dpi=150)
        print("Gauge model confidence threshold analysis plot saved to confidence_threshold_metrics_gauge_model.png")
        plt.close()
    else:
        print("Gauge model confidence threshold evaluation failed to produce results. Using default threshold.")
else:
    print(f"Gauge model weights not found at {path_to_gauge_model_weights}. Skipping optimal threshold search, using default: {optimal_threshold_glob}")

In [None]:
# --- PREPARE COMBINED DATASET (TRAIN+VAL) FOR FINAL TRAINING ---
print("\n--- PREPARING COMBINED DATASET (TRAIN+VAL) FOR FINAL TRAINING ---")

# train_images and val_images are from In[9]
all_image_ids_final_combined_data = train_images + val_images
print(f"Total images for final combined training: {len(all_image_ids_final_combined_data)}")

final_combined_dataset_root_name = 'yolov8_dataset_final_combined' # Directory name for the combined dataset

# Create directories for the combined dataset
os.makedirs(f'{final_combined_dataset_root_name}/images/train_all', exist_ok=True)
os.makedirs(f'{final_combined_dataset_root_name}/labels/train_all', exist_ok=True)

print("\nConverting combined (train+val) annotations to YOLOv8 format for final training...")
# Re-use the convert_to_yolo_format function from In[10]
# train_df is the full training dataframe from In[4]
# original_dimensions is from In[5]
# PNG_HEIGHT, PNG_WIDTH are from In[6]
_, _, final_combined_label_class_counts = convert_to_yolo_format(
    train_df, 
    all_image_ids_final_combined_data,
    f'{final_combined_dataset_root_name}/labels/train_all', # Output to the new combined labels directory
    original_dimensions,
    (PNG_HEIGHT, PNG_WIDTH)
)

In [None]:
print("\nCopying all images (initial train + initial val) for final training...")
for img_id in tqdm(all_image_ids_final_combined_data, desc="Copying combined images for final training"):
    src_path = os.path.join(image_source_dir, f"{img_id}.png")
    dst_path = os.path.join(f'{final_combined_dataset_root_name}/images/train_all', f"{img_id}.png")
    if os.path.exists(src_path):
        shutil.copy(src_path, dst_path)
    else:
        print(f"Image not found during combined dataset copy: {src_path}")

# Create the YAML file for the final combined dataset
path_to_final_combined_yaml = 'dataset_final_combined.yaml'
data_yaml_final_combined_content = {
    'path': os.path.abspath(final_combined_dataset_root_name), # Absolute path to the combined dataset root
    'train': 'images/train_all',  # Path to training images (all of them)
    'val': 'images/train_all',    # For final training, val can point to train or be an empty dir if val=False in train()
    'nc': data_yaml_initial_content['nc'],
    'names': data_yaml_initial_content['names']
}
with open(path_to_final_combined_yaml, 'w') as f:
    yaml.dump(data_yaml_final_combined_content, f, sort_keys=False)
print(f"Created '{path_to_final_combined_yaml}' for the final combined training dataset.")

In [None]:
# --- STEP 3: FINAL TRAINING ON ALL DATA (TRAIN+VAL) ---
print("\n--- STEP 3: FINAL TRAINING ON ALL COMBINED (TRAIN+VAL) DATA ---")

model_final_submission = YOLO('yolov8m.pt')
print(f"Starting final training on all combined data with HPs from '{path_to_best_hyperparameters_yaml}'.")
print(f"Training for {epochs_for_final_run} epochs.")
print(f"Using combined dataset: {path_to_final_combined_yaml}")

results_final_submission = model_final_submission.train(
    data=path_to_final_combined_yaml,    # YAML file for the combined (train+val) dataset
    epochs=epochs_for_final_run,         # Number of epochs determined from gauge training
    patience=0,                          # No early stopping needed here, train for the full determined duration
    batch=8,                             # Consistent batch size (adjust if needed)
    imgsz=1024,
    device=device,                       # Use the auto-detected device
    val=False,                           # Set to False as we are training on all data and not validating against a separate set
                                         # If val=True, and YAML points val to train_all, metrics will be on training data.
    amp=True,                            # Automatic Mixed Precision training
    cfg=path_to_best_hyperparameters_yaml, # Use HPs from tuning (or base HPs)
    optimizer='AdamW',                   # This might be overridden by 'cfg'
    project='chest_xray_runs',           # Main project for all runs
    name='final_submission_model_run',   # Specific name for this final submission model training run
    exist_ok=True,                       # Overwrite if previous run with same name exists
    cos_lr=True,                         # Use cosine learning rate scheduler (often in tuned HPs)
    close_mosaic=10 if epochs_for_final_run > 10 else 0, # Disable mosaic in last N epochs (consistent with HPs)
    freeze=freeze_N_layers,              # Apply consistent freezing strategy
    multi_scale=True,                    # Enable multi-scale training (consistent with strategy)
    verbose=True,
)

# Path to the weights of the final model trained on all data
# If val=False during training, 'last.pt' is usually the one to use.
# If val=True (and val pointed to train data), 'best.pt' might also be considered, but 'last.pt' represents training for full epochs.
path_to_submission_model_weights = os.path.join('chest_xray_runs', 'final_submission_model_run', 'weights', 'last.pt')
print(f"Final model for submission (trained on all data) should be at: {path_to_submission_model_weights}")

# Verify if the path exists, if not, maybe 'best.pt' was saved due to some internal logic even with val=False
if not os.path.exists(path_to_submission_model_weights):
    alt_path = os.path.join('chest_xray_runs', 'final_submission_model_run', 'weights', 'best.pt')
    if os.path.exists(alt_path):
        path_to_submission_model_weights = alt_path
        print(f"'last.pt' not found, using 'best.pt' instead from final run: {path_to_submission_model_weights}")
    else:
        print(f"CRITICAL WARNING: Final model weights ('last.pt' or 'best.pt') not found at expected location.")

In [None]:
# Function to scale bounding boxes back to original DICOM dimensions
def scale_bbox_back(bbox_xyxy, png_dims_tuple, original_dims_tuple):
    """Scale bounding box coordinates from PNG dimensions back to original dimensions"""
    png_height, png_width = png_dims_tuple
    orig_height, orig_width = original_dims_tuple
    
    if png_width == 0 or png_height == 0: # Avoid division by zero if PNG dims are invalid
        print(f"Warning: Invalid PNG dimensions ({png_height}x{png_width}) for scaling back. Returning unscaled box.")
        return [int(round(c)) for c in bbox_xyxy]

    # Scale factors
    width_scale_factor = orig_width / png_width
    height_scale_factor = orig_height / png_height
    
    # Scale coordinates back to original dimensions
    x_min_orig = int(round(bbox_xyxy[0] * width_scale_factor))
    y_min_orig = int(round(bbox_xyxy[1] * height_scale_factor))
    x_max_orig = int(round(bbox_xyxy[2] * width_scale_factor))
    y_max_orig = int(round(bbox_xyxy[3] * height_scale_factor))
    
    # Ensure x_max > x_min and y_max > y_min after scaling and rounding
    # Also ensure coordinates are within original image bounds
    x_min_orig = max(0, min(x_min_orig, orig_width -1))
    y_min_orig = max(0, min(y_min_orig, orig_height -1))
    x_max_orig = max(0, min(x_max_orig, orig_width -1))
    y_max_orig = max(0, min(y_max_orig, orig_height -1))

    if x_max_orig <= x_min_orig: x_max_orig = x_min_orig + 1
    if y_max_orig <= y_min_orig: y_max_orig = y_min_orig + 1
        
    # Final check to ensure max is not beyond image boundary if min was already at boundary-1
    x_max_orig = min(x_max_orig, orig_width)
    y_max_orig = min(y_max_orig, orig_height)

    return [x_min_orig, y_min_orig, x_max_orig, y_max_orig]

In [None]:
# --- STEP 4: TEST SET INFERENCE AND SUBMISSION FILE GENERATION ---
print("\n--- TEST SET INFERENCE ---")

submission_predictions_list = [] 
test_images_processed_count = 0
test_no_detections_count = 0
submission_file_path = 'submission.csv' 

# path_to_submission_model_weights is from In[20] (final model trained on all data)
# optimal_threshold_glob is from In[18] (determined using gauge model)
# original_dimensions is from In[5] (loaded from img_size.csv)
# data_yaml_initial_content contains class names (from In[12])
# PNG_HEIGHT, PNG_WIDTH are from In[6]

model_for_submission = None
if os.path.exists(path_to_submission_model_weights):
    print(f"Loading final submission model from: {path_to_submission_model_weights}")
    model_for_submission = YOLO(path_to_submission_model_weights)
else:
    print(f"ERROR: Final submission model weights not found at {path_to_submission_model_weights}. Cannot perform inference.")

if model_for_submission is not None:
    print(f"Starting inference on test set using optimal confidence: {optimal_threshold_glob:.4f}")
    
    # Path to test images and sample submission file
    # root_path is from In[3]
    test_data_path = os.path.join(root_path, "test", "test") # Ensure this is the correct path to test PNGs
    sample_submission_df = pd.read_csv(os.path.join(root_path, "sample_submission.csv"))
    
    print(f"Found {len(sample_submission_df)} images in sample_submission.csv for test inference.")

    for img_id_test in tqdm(sample_submission_df['image_id'], desc="Running inference on test set"):
        current_img_path = os.path.join(test_data_path, f"{img_id_test}.png")
        
        if os.path.exists(current_img_path):
            test_images_processed_count += 1
            
            # Get original dimensions for this test image
            orig_dims_test = original_dimensions.get(img_id_test)
            if orig_dims_test is None:
                print(f"Warning: Original dimensions not found for test image {img_id_test}. Using PNG dimensions ({PNG_HEIGHT}x{PNG_WIDTH}) as fallback for scaling back.")
                orig_dims_test = (PNG_HEIGHT, PNG_WIDTH) # Fallback, though this should ideally not happen
            
            # Run inference
            # augment=True enables Test-Time Augmentation (TTA) which can improve results
            # iou for NMS, can be tuned. 0.45 is a reasonable default.
            inference_results = model_for_submission(
                current_img_path, 
                conf=optimal_threshold_glob, 
                iou=0.45,  # NMS IoU threshold
                imgsz=1024, 
                augment=True, # Enable TTA
                verbose=False # Reduce log spam during loop
            )
            
            # Results are usually a list, even for a single image
            results_for_image = inference_results[0] if isinstance(inference_results, list) else inference_results

            pred_boxes_xyxy_png = results_for_image.boxes.xyxy.cpu().numpy()  # Coords on 1024x1024 scale
            pred_scores = results_for_image.boxes.conf.cpu().numpy()
            pred_classes_indices = results_for_image.boxes.cls.cpu().numpy().astype(int)
            
            if len(pred_boxes_xyxy_png) == 0:
                test_no_detections_count += 1
                # Format for "No finding": class_id 14, score 1.0, and dummy box (0 0 1 1)
                submission_predictions_list.append(f"{img_id_test},14 1.0 0 0 1 1")
            else:
                img_prediction_strings_parts = []
                for box_coords_png, score_val, class_idx in zip(pred_boxes_xyxy_png, pred_scores, pred_classes_indices):
                    # Ensure predicted class index is valid
                    if class_idx >= data_yaml_initial_content['nc']: 
                        print(f"Warning: Predicted class index {class_idx} is out of bounds for image {img_id_test}. Skipping this box.")
                        continue

                    # Scale box back to original DICOM dimensions
                    scaled_box_orig_dims = scale_bbox_back(box_coords_png, (PNG_HEIGHT, PNG_WIDTH), orig_dims_test)
                    
                    # Format: "class_id score x_min y_min x_max y_max"
                    img_prediction_strings_parts.append(
                        f"{class_idx} {score_val:.6f} {scaled_box_orig_dims[0]} {scaled_box_orig_dims[1]} {scaled_box_orig_dims[2]} {scaled_box_orig_dims[3]}"
                    )
                
                if img_prediction_strings_parts: # If there were any valid predictions for this image
                    submission_predictions_list.append(f"{img_id_test},{' '.join(img_prediction_strings_parts)}")
                else: # If all predictions were filtered out (e.g., due to invalid class index)
                    test_no_detections_count += 1
                    submission_predictions_list.append(f"{img_id_test},14 1.0 0 0 1 1")

        else: # If test image file does not exist
            print(f"Test image not found: {current_img_path}. Adding as 'No finding' to submission.")
            # Per competition rules, if an image is missing, it might need a specific format.
            submission_predictions_list.append(f"{img_id_test},14 1.0 0 0 1 1") # Default for missing images

    print(f"\nProcessed {test_images_processed_count} test images.")
    print(f"Number of images with no detections (classified as 'No finding'): {test_no_detections_count}")
    if test_images_processed_count > 0:
        print(f"Percentage of 'No finding' classifications in processed images: {(test_no_detections_count/test_images_processed_count)*100:.2f}%")

    # Create the submission CSV file
    with open(submission_file_path, 'w') as f:
        f.write("image_id,PredictionString\n") # Header
        for pred_line in submission_predictions_list:
            f.write(f"{pred_line}\n")

    print(f"\nSubmission file created at: {submission_file_path}")
else:
    print("\nSkipping Test Set Inference: Final submission model not loaded.")

In [None]:
# --- VISUALIZE SOME TEST PREDICTIONS (Optional) ---
print("\n--- CREATING TEST PREDICTION VISUALIZATIONS (OPTIONAL) ---")

# model_for_submission is from In[22]
# optimal_threshold_glob from In[18]
# data_yaml_initial_content for class names from In[12]
# test_data_path for loading images from In[22]

if model_for_submission is not None and test_images_processed_count > 0:
    try:
        num_images_to_visualize = min(5, test_images_processed_count) # Visualize up to 5 images
        
        # Ensure we sample from image IDs that were actually processed and exist in sample_submission_df
        processed_img_ids_for_viz = [
            entry.split(',')[0] for entry in submission_predictions_list 
            if entry.split(',')[0] in sample_submission_df['image_id'].values
        ]
        if not processed_img_ids_for_viz : # Fallback if list is empty for some reason
             processed_img_ids_for_viz = sample_submission_df['image_id'].sample(n=num_images_to_visualize, replace=False).tolist() \
                                         if len(sample_submission_df) >= num_images_to_visualize else sample_submission_df['image_id'].tolist()


        sample_ids_for_viz = np.random.choice(
            processed_img_ids_for_viz, 
            size=min(num_images_to_visualize, len(processed_img_ids_for_viz)), # Ensure sample size isn't too large
            replace=False
        )
        
        print(f"Generating visualizations for {len(sample_ids_for_viz)} random test images...")
        
        for img_id_viz in sample_ids_for_viz:
            img_path_viz = os.path.join(test_data_path, f"{img_id_viz}.png")
            if not os.path.exists(img_path_viz):
                print(f"Skipping visualization for missing image: {img_path_viz}")
                continue
            
            # Load image using PIL
            pil_img = Image.open(img_path_viz).convert("RGB") # Ensure 3 channels for matplotlib if it's grayscale
            
            # Run inference again for this image (TTA can be off for single viz clarity)
            viz_results_list = model_for_submission(
                img_path_viz, 
                conf=optimal_threshold_glob, 
                iou=0.45, 
                imgsz=1024, 
                augment=False # TTA off for single visualization clarity
            )
            viz_results_img = viz_results_list[0] if isinstance(viz_results_list, list) else viz_results_list

            plt.figure(figsize=(12, 12))
            plt.imshow(np.array(pil_img)) # Display using matplotlib
            plt.title(f"Test Image: {img_id_viz} (Conf: {optimal_threshold_glob:.3f})", fontsize=14)
            ax = plt.gca() # Get current axes

            viz_boxes_png = viz_results_img.boxes.xyxy.cpu().numpy() # BBoxes on 1024x1024 scale
            viz_scores = viz_results_img.boxes.conf.cpu().numpy()
            viz_classes_indices = viz_results_img.boxes.cls.cpu().numpy().astype(int)
            
            if len(viz_boxes_png) == 0:
                ax.text(pil_img.width * 0.05, pil_img.height * 0.05, 
                        "Prediction: No finding", fontsize=14, color='red', 
                        bbox=dict(facecolor='white', alpha=0.7, edgecolor='red'))
            else:
                for box_png, score_viz, cls_idx_viz in zip(viz_boxes_png, viz_scores, viz_classes_indices):
                    x_min_png, y_min_png, x_max_png, y_max_png = box_png
                    
                    class_name_viz = data_yaml_initial_content['names'][cls_idx_viz] \
                                     if cls_idx_viz < len(data_yaml_initial_content['names']) \
                                     else f"Unknown Class {cls_idx_viz}"
                    
                    # Create a rectangle patch (coordinates are for the displayed image, which is 1024x1024)
                    rect = plt.Rectangle((x_min_png, y_min_png), x_max_png - x_min_png, y_max_png - y_min_png,
                                         fill=False, edgecolor=plt.cm.get_cmap('tab20', data_yaml_initial_content['nc'])(cls_idx_viz % 20), 
                                         linewidth=2.5)
                    ax.add_patch(rect)
                    
                    # Add label text
                    label_text = f"{class_name_viz}: {score_viz:.2f}"
                    ax.text(x_min_png, y_min_png - 10, label_text, 
                            color='white', fontsize=10, 
                            bbox=dict(facecolor=plt.cm.get_cmap('tab20', data_yaml_initial_content['nc'])(cls_idx_viz % 20), 
                                      alpha=0.8, edgecolor='none', pad=1))
            
            plt.axis('off')
            plt.tight_layout()
            viz_save_path = f"test_prediction_visualization_{img_id_viz}.png"
            plt.savefig(viz_save_path, dpi=150, bbox_inches='tight')
            print(f"Saved visualization: {viz_save_path}")
            plt.close() # Close plot to free memory
            
    except Exception as e_viz:
        print(f"Could not generate test visualizations: {e_viz}")
else:
    print("\nSkipping Test Visualizations: Final submission model not loaded or no test images processed.")

In [None]:
# In[24]:
# --- PER-CLASS METRICS AND CONFUSION MATRIX (from Gauge Model evaluation) ---
# This refers to the validation performed on the Gauge Model (Step 2), as it used a proper validation set.
# The 'val_results' needed here would be from the `safe_val` call during threshold finding for the gauge model,
# or from the `results_gauge.val()` if you ran it explicitly after gauge training.
# For simplicity, let's assume we want to re-run validation on the Gauge Model with the optimal threshold to get these plots.

print("\n--- ANALYSIS OF GAUGE MODEL ON INITIAL VALIDATION SET ---")
# path_to_gauge_model_weights from In[18]
# optimal_threshold_glob from In[18]
# path_to_initial_yaml from In[12] ('dataset_initial.yaml')

if os.path.exists(path_to_gauge_model_weights):
    print(f"Loading gauge model from {path_to_gauge_model_weights} for final analysis on validation set.")
    gauge_model_for_analysis = YOLO(path_to_gauge_model_weights)
    
    print(f"Running final validation on gauge model using Conf: {optimal_threshold_glob:.4f} and IoU: 0.4")
    # Note: plots=True in safe_val will generate confusion_matrix.png, P_curve.png, R_curve.png, PR_curve.png etc.
    # in the run directory (e.g., runs/detect/valX/)
    gauge_val_results_final_analysis = safe_val(
        gauge_model_for_analysis,
        data_path=path_to_initial_yaml,
        split_name='val',
        imgsz_val=1024,
        batch_val=max(4, 8 // 2),
        conf_val=optimal_threshold_glob, # Use the determined optimal threshold
        iou_val=0.4, # Competition relevant IoU for mAP
        project='chest_xray_runs',
        name='gauge_model_final_val_analysis' # Specific directory for these validation results
    )

    if gauge_val_results_final_analysis and hasattr(gauge_val_results_final_analysis, 'box'):
        print("\nGauge Model Final Validation Metrics (IoU @ 0.4):")
        # map50 here is actually mAP@0.4 because iou_val=0.4 was passed to safe_val,
        # and safe_val passes iou_val to model.val(iou=iou_val) which then influences how map50 is calculated if iou is primary.
        # However, ultralytics .val() uses a range for .map and typically .map50 is specifically IoU=0.5.
        # For specific IoU mAP: results.box.maps[iou_threshold_index] where iou_threshold_index corresponds to 0.4 in the iou_vector.
        # For simplicity, we'll report map50 (often IoU@0.5) and the mean P, R.
        print(f"mAP@0.5 (map50): {gauge_val_results_final_analysis.box.map50:.4f}")
        print(f"mAP@0.5:0.95 (map): {gauge_val_results_final_analysis.box.map:.4f}")
        mean_p_gauge_final = gauge_val_results_final_analysis.box.mp
        mean_r_gauge_final = gauge_val_results_final_analysis.box.mr
        print(f"Mean Precision: {mean_p_gauge_final:.4f}")
        print(f"Mean Recall: {mean_r_gauge_final:.4f}")
        if mean_p_gauge_final > 0 and mean_r_gauge_final > 0:
            mean_f1_gauge_final = 2 * (mean_p_gauge_final * mean_r_gauge_final) / (mean_p_gauge_final + mean_r_gauge_final)
            print(f"Mean F1-Score (calculated): {mean_f1_gauge_final:.4f}")
        
        if hasattr(gauge_val_results_final_analysis, 'save_dir'):
            print(f"Validation plots (like confusion matrix) for Gauge Model saved in: {gauge_val_results_final_analysis.save_dir}")
            # Example: cm_path = os.path.join(gauge_val_results_final_analysis.save_dir, 'confusion_matrix.png')
        
        # Per-class metrics
        class_names_viz = data_yaml_initial_content['names']
        metrics_data_gauge = []
        print("\nGauge Model Per-Class Metrics (from its validation run, typically IoU@0.5 for AP50):")
        for i in range(len(class_names_viz)):
            try:
                # class_result(i) returns (p[i], r[i], ap50[i], ap[i])
                p_cls, r_cls, ap50_cls, _ = gauge_val_results_final_analysis.box.class_result(i)
                metrics_data_gauge.append({
                    'Class ID': i, 'Class Name': class_names_viz[i],
                    'AP@0.5': float(ap50_cls), # AP50 is mAP for the class at IoU 0.5
                    'Precision': float(p_cls), 'Recall': float(r_cls)
                })
            except IndexError: # Should not happen if nc is correct
                 metrics_data_gauge.append({'Class ID': i, 'Class Name': class_names_viz[i], 'AP@0.5': 0.0, 'Precision': 0.0, 'Recall': 0.0})

        metrics_df_gauge = pd.DataFrame(metrics_data_gauge)
        print(metrics_df_gauge.to_string(index=False))
        metrics_df_gauge.to_csv('per_class_metrics_gauge_model.csv', index=False)
        print("Per-class metrics for gauge model saved to per_class_metrics_gauge_model.csv")

    else:
        print("Could not retrieve final validation metrics for the Gauge Model.")
else:
    print("Gauge model weights not found. Skipping final analysis of gauge model.")

In [None]:
print("\n--- FULL PIPELINE EXECUTION COMPLETE ---")
if os.path.exists(path_to_submission_model_weights):
    print(f"Final model for submission is located at: {path_to_submission_model_weights}")
    print(f"Optimal confidence threshold used for test inference: {optimal_threshold_glob:.4f}")
    print(f"Submission file should be at: {submission_file_path}")
else:
    print("Pipeline completed, but the final submission model might not have been generated successfully.")