# Phase 3: Final Submission

This notebook implements the final inference and submission pipeline. Based on the analysis in `03_planning_and_cv.ipynb`, the YOLOX models trained for only 2 epochs are unusable. Therefore, this notebook will use a hybrid strategy:

1.  **Detector:** Use the single, stronger **YOLOv5s model** trained on fold 0 from Phase 1 (`yolov5_runs/train/baseline_fold0/weights/best.pt`).
2.  **Classifier:** Use the **5-fold ensembled EfficientNet-B5 models**, which were trained successfully and represent our best classifier asset.

## Plan

1.  **Setup & Configuration:**
    *   Import libraries.
    *   Define paths to the 5 classifier models and the single YOLOv5 detector model.

2.  **Load Metadata:**
    *   Load `sample_submission.csv` and prepare test dataframes.
    *   Get image dimensions for scaling bounding boxes.

3.  **Classifier Inference:**
    *   Run inference on the test set with all 5 `EfficientNet-B5` models.
    *   Average the predictions to get the ensembled study-level probabilities.

4.  **Detector (YOLOv5) Inference:**
    *   Load the trained YOLOv5s model using `torch.hub`.
    *   Run batched inference on the test images.
    *   Collect all bounding box predictions.

5.  **Submission Formatting:**
    *   Combine the study-level and image-level predictions into the required format.
    *   Save the final predictions to `submission.csv`.

In [1]:
# --- 1. Setup & Configuration ---

import os
import sys
import glob
import json
import pandas as pd
import numpy as np
import cv2
import torch
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast
from tqdm.auto import tqdm

# --- Configuration ---
DATA_DIR = './'
TEST_DIR = 'test/'
TEST_PNG_3CH_DIR = 'test_png_3ch/' # For classifier
TEST_PNG_1CH_DIR = 'test_png/'     # For YOLOv5 detector

N_SPLITS = 5
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Classifier Config
IMG_SIZE_CLS = 512
MODEL_NAME_CLS = 'tf_efficientnet_b5_ns'
NUM_CLASSES_CLS = 4
CLASSIFIER_MODEL_PATHS = [f'classifier_fold{i}_best.pth' for i in range(N_SPLITS)]
BATCH_SIZE_CLS = 32

# Detector Config (YOLOv5)
IMG_SIZE_DET = 640
DETECTOR_MODEL_PATH = 'yolov5_runs/train/baseline_fold0/weights/best.pt'
BATCH_SIZE_DET = 8 # Reduced from 16 to prevent OOM
CONF_TH = 0.001 # YOLOv5 confidence threshold

# Add yolov5 to path
sys.path.append('yolov5')

print(f"Setup complete. Using device: {DEVICE}")

Setup complete. Using device: cuda


In [2]:
# --- 2. Load Metadata ---

df_sub = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

# Create image-level and study-level test dataframes
df_sub['image_id'] = df_sub['id'].apply(lambda x: x.replace('_image', '') if '_image' in x else '')
df_sub['StudyInstanceUID'] = df_sub['id'].apply(lambda x: x.replace('_study', '') if '_study' in x else '')

df_test_img = df_sub[df_sub['image_id'] != ''].copy()
df_test_study = df_sub[df_sub['StudyInstanceUID'] != ''].copy()

# Get image paths and merge study info
test_png_paths = glob.glob(f'{TEST_PNG_3CH_DIR}/*.png')
image_id_to_path = {os.path.basename(p).replace('.png', ''): p for p in test_png_paths}
df_test_img['filepath'] = df_test_img['image_id'].map(image_id_to_path)

# Get StudyInstanceUID for each image_id from the test folder structure
test_dcm_files = glob.glob(f'{TEST_DIR}/*/*/*.dcm')
image_id_to_study_uid = {
    os.path.basename(p).replace('.dcm', ''): p.split('/')[-3] for p in test_dcm_files
}
df_test_img['StudyInstanceUID'] = df_test_img['image_id'].map(image_id_to_study_uid)

print(f"Found {len(df_test_img)} test images.")
print(f"Found {len(df_test_study)} test studies.")
display(df_test_img.head())

Found 638 test images.
Found 606 test studies.


Unnamed: 0,id,PredictionString,image_id,StudyInstanceUID,filepath
606,004cbd797cd1_image,none 1 0 0 1 1,004cbd797cd1,30e45593ba08,test_png_3ch/004cbd797cd1.png
607,008ca392cff3_image,none 1 0 0 1 1,008ca392cff3,39a80a14bfda,test_png_3ch/008ca392cff3.png
608,00b8180bd3a8_image,none 1 0 0 1 1,00b8180bd3a8,dadc2e3842e5,test_png_3ch/00b8180bd3a8.png
609,00e3a7e91a34_image,none 1 0 0 1 1,00e3a7e91a34,74ba8f2badcb,test_png_3ch/00e3a7e91a34.png
610,0124f624dacb_image,none 1 0 0 1 1,0124f624dacb,0acf45b01bdf,test_png_3ch/0124f624dacb.png


In [3]:
# --- 3. Classifier Inference ---

# --- Dataset and Transforms (adapted from training) ---
class SIIMCVClassifierDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.image_ids = df['image_id'].values
        self.filepaths = df['filepath'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_path = self.filepaths[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        return image, self.image_ids[idx]

def get_cls_test_transforms(img_size):
    return A.Compose([
        A.Resize(img_size, img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

# --- Inference Function ---
def run_classifier_inference(df_test, model_paths):
    test_dataset = SIIMCVClassifierDataset(df_test, TEST_PNG_3CH_DIR, transform=get_cls_test_transforms(IMG_SIZE_CLS))
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE_CLS, shuffle=False, num_workers=4, pin_memory=True)
    
    all_fold_logits = []
    image_ids_ordered = []

    for i, model_path in enumerate(model_paths):
        print(f"--- Running Inference with Fold {i} Model --- ({model_path})")
        model = timm.create_model(MODEL_NAME_CLS, pretrained=False, num_classes=NUM_CLASSES_CLS).to(DEVICE)
        model.load_state_dict(torch.load(model_path, map_location=DEVICE))
        model.eval()

        fold_logits = []
        current_image_ids = []
        with torch.no_grad():
            for images, img_ids in tqdm(test_loader, desc=f'Fold {i} Inference'):
                images = images.to(DEVICE, non_blocking=True)
                with autocast():
                    outputs = model(images)
                fold_logits.append(outputs.cpu().numpy())
                if i == 0: # Only need to get the order once
                    current_image_ids.extend(img_ids)
        
        all_fold_logits.append(np.concatenate(fold_logits))
        if i == 0:
            image_ids_ordered = current_image_ids
            
    # Average logits across folds
    avg_logits = np.mean(all_fold_logits, axis=0)
    
    # Create a dataframe with results
    df_preds = pd.DataFrame(avg_logits, columns=['Negative for Pneumonia', 'Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance'])
    df_preds['image_id'] = image_ids_ordered
    
    return df_preds

print("Classifier inference functions defined.")

Classifier inference functions defined.


In [4]:
# --- 4. Detector (YOLOv5) Inference ---

def run_detector_inference(df_test, model_path):
    """Runs inference using a trained YOLOv5 model with manual batching to prevent OOM."""
    # Create a list of image paths for the 1-channel PNGs, which YOLOv5 was trained on.
    image_paths = [os.path.join(TEST_PNG_1CH_DIR, f'{img_id}.png') for img_id in df_test['image_id'].values]
    
    print("Loading YOLOv5 model...")
    model = torch.hub.load(
        'yolov5', 
        'custom', 
        path=model_path, 
        source='local',
        force_reload=True
    )
    model.to(DEVICE)
    model.conf = CONF_TH
    model.iou = 0.5
    model.agnostic = True
    
    print(f"Running YOLOv5 inference on {len(image_paths)} images...")
    all_preds = {}
    
    # Manually batch the inference to avoid OOM error
    # The torch.hub model loads all images into memory at once if given the full list.
    for i in tqdm(range(0, len(image_paths), BATCH_SIZE_DET), desc="YOLOv5 Batched Inference"):
        batch_paths = image_paths[i : i + BATCH_SIZE_DET]
        
        results = model(batch_paths, size=IMG_SIZE_DET)
        
        preds_dfs = results.pandas().xyxy
        
        # The results.files attribute holds the original file paths for the batch.
        for j, pred_df in enumerate(preds_dfs):
            image_id = os.path.basename(results.files[j]).replace('.png', '')
            all_preds[image_id] = pred_df[['xmin', 'ymin', 'xmax', 'ymax', 'confidence']].values
            
    return all_preds

print("Detector (YOLOv5) inference functions defined.")

Detector (YOLOv5) inference functions defined.


In [5]:
# --- 5. Submission Formatting ---

# Map model output class names to the required submission format class names
CLASS_NAME_MAP = {
    'Negative for Pneumonia': 'negative',
    'Typical Appearance': 'typical',
    'Indeterminate Appearance': 'indeterminate',
    'Atypical Appearance': 'atypical'
}

def format_study_prediction(df_study_preds):
    """Formats the study-level predictions into the required submission string."""
    # Find the class with the highest probability
    pred_class_model = df_study_preds.idxmax()
    # Map to the correct submission class name
    pred_class_submission = CLASS_NAME_MAP[pred_class_model]
    # Format: 'class_name 1 0 0 1 1'
    return f"{pred_class_submission} 1 0 0 1 1"

def format_image_prediction(preds):
    """Formats the image-level predictions from YOLOv5 into the required submission string."""
    # preds is a numpy array of [xmin, ymin, xmax, ymax, confidence]
    if preds is None or len(preds) == 0:
        return "none 1 0 0 1 1"
        
    pred_strings = []
    for p in preds:
        x_min, y_min, x_max, y_max, score = p
        # Ensure coordinates are positive
        x_min, y_min, x_max, y_max = max(0, x_min), max(0, y_min), max(0, x_max), max(0, y_max)
        pred_strings.append(f"opacity {score:.4f} {x_min:.2f} {y_min:.2f} {x_max:.2f} {y_max:.2f}")
    
    return ' '.join(pred_strings)

print("Submission formatting functions defined.")

Submission formatting functions defined.


In [7]:
# --- 6. Main Execution Block ---

# --- Run Classifier Inference ---
print("Starting classifier inference...")
df_cls_preds = run_classifier_inference(df_test_img, CLASSIFIER_MODEL_PATHS)
print("Classifier inference complete.")

# --- Aggregate to Study Level ---
print("Aggregating classifier predictions to study level...")
df_cls_preds = df_cls_preds.merge(df_test_img[['image_id', 'StudyInstanceUID']], on='image_id', how='left')
# Using max for Typical/Atypical, mean for Indeterminate, min for Negative
df_study_preds = df_cls_preds.groupby('StudyInstanceUID').agg({
    'Negative for Pneumonia': 'min',
    'Typical Appearance': 'max',
    'Indeterminate Appearance': 'mean',
    'Atypical Appearance': 'max'
}).reset_index()
print("Study-level aggregation complete.")

# --- Run Detector Inference ---
print("Starting detector inference...")
all_detector_preds = run_detector_inference(df_test_img, DETECTOR_MODEL_PATH)
print("Detector inference complete.")

# --- Generate Submission ---
print("Generating final submission file...")
submission_rows = []

# Image-level predictions
for image_id in tqdm(df_test_img['image_id'].values, desc="Processing Images"):
    preds = all_detector_preds.get(image_id)
    pred_str = format_image_prediction(preds)
    submission_rows.append({'id': f'{image_id}_image', 'PredictionString': pred_str})

# Study-level predictions
for _, row in tqdm(df_study_preds.iterrows(), total=len(df_study_preds), desc="Processing Studies"):
    study_id = row['StudyInstanceUID']
    pred_str = format_study_prediction(row[['Negative for Pneumonia', 'Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance']])
    submission_rows.append({'id': f'{study_id}_study', 'PredictionString': pred_str})

df_submission = pd.DataFrame(submission_rows)

# --- Check for duplicate IDs before reindexing ---
if df_submission['id'].duplicated().any():
    print("!!! WARNING: Found duplicate IDs in generated submission rows before reindexing!")
    display(df_submission[df_submission['id'].duplicated(keep=False)])

# Ensure the submission is in the same order as sample_submission.csv
df_submission = df_submission.set_index('id')
df_submission = df_submission.reindex(df_sub['id']).reset_index()

# --- Final Check for NaNs and Fallback ---
print("Checking for any null values in the final submission dataframe...")
if df_submission['PredictionString'].isnull().any():
    print("!!! WARNING: Found null values in PredictionString column! This will cause a submission error.")
    print("Rows with null values:")
    display(df_submission[df_submission['PredictionString'].isnull()])
    print("Filling NaNs with 'none 1 0 0 1 1' as a fallback.")
    df_submission['PredictionString'].fillna('none 1 0 0 1 1', inplace=True)
else:
    print("No null values found. The submission dataframe appears to be complete.")

df_submission.to_csv('submission.csv', index=False)

print("Submission file 'submission.csv' created successfully!")
display(df_submission.head())

In [6]:
# --- 7. Final Submission Sanity Check ---
import pandas as pd

print("--- Reading and Verifying Generated submission.csv ---")
df_check = pd.read_csv('submission.csv')
df_sample = pd.read_csv('sample_submission.csv')

print("\nMy Submission Info:")
df_check.info()

print("\nSample Submission Info:")
df_sample.info()

print(f"\nMy submission has {len(df_check)} rows.")
print(f"Sample submission has {len(df_sample)} rows.")

ids_match = df_check['id'].equals(df_sample['id'])
print(f"\nAre the 'id' columns identical in content and order? {ids_match}")

print("\nMy Submission Head:")
display(df_check.head())

print("\nMy Submission Tail:")
display(df_check.tail())

print("\nSample Submission Head:")
display(df_sample.head())

--- Reading and Verifying Generated submission.csv ---

My Submission Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1244 entries, 0 to 1243
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   id                1244 non-null   object
 1   PredictionString  1244 non-null   object
dtypes: object(2)
memory usage: 19.6+ KB

Sample Submission Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1244 entries, 0 to 1243
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   id                1244 non-null   object
 1   PredictionString  1244 non-null   object
dtypes: object(2)
memory usage: 19.6+ KB

My submission has 1244 rows.
Sample submission has 1244 rows.

Are the 'id' columns identical in content and order? True

My Submission Head:


Unnamed: 0,id,PredictionString
0,000c9c05fd14_study,typical 1 0 0 1 1
1,00c74279c5b7_study,typical 1 0 0 1 1
2,00ccd633fb0e_study,negative 1 0 0 1 1
3,00e936c58da6_study,typical 1 0 0 1 1
4,01206a422293_study,typical 1 0 0 1 1



My Submission Tail:


Unnamed: 0,id,PredictionString
1239,ff03d1d41968_image,none 1 0 0 1 1
1240,ff0743bee789_image,none 1 0 0 1 1
1241,ffab0f8f27f0_image,none 1 0 0 1 1
1242,ffbeafe30b77_image,none 1 0 0 1 1
1243,ffe942c8655f_image,none 1 0 0 1 1



Sample Submission Head:


Unnamed: 0,id,PredictionString
0,000c9c05fd14_study,negative 1 0 0 1 1
1,00c74279c5b7_study,negative 1 0 0 1 1
2,00ccd633fb0e_study,negative 1 0 0 1 1
3,00e936c58da6_study,negative 1 0 0 1 1
4,01206a422293_study,negative 1 0 0 1 1
