# Phase 1: Stable Baseline

## Goal
The primary goal of this notebook is to execute a complete, end-to-end training and inference pipeline without encountering the catastrophic CUDA OOM errors from the previous session. This serves as a sanity check for the environment's stability.

## Strategy
To ensure stability, we are using smaller, more memory-efficient models:
1.  **Classifier:** `EfficientNet-B2` (a step down from the unstable `EfficientNet-B5`).
2.  **Detector:** `YOLOv5s` (which was generally stable).

The entire process will be run on a **single fold (fold 0)** for a limited number of epochs to establish a baseline and confirm the workflow is viable.

## Workflow
1.  **Setup:** Configure paths and parameters.
2.  **Classifier Training:** Train `EfficientNet-B2` on fold 0 for 5 epochs.
3.  **Detector Training:** Train `YOLOv5s` on fold 0 for 10 epochs.
4.  **Inference:** Run both trained models on the test set.
5.  **Submission:** Generate a `submission.csv` file.

**Success Criterion:** The notebook completes all cells without a CUDA OOM error.

In [1]:
# --- Part 1: Setup & Classifier Training ---

import os
import pandas as pd
import numpy as np
import torch
import timm
import gc
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import average_precision_score
import cv2
from tqdm import tqdm

# --- Configuration ---
FOLD = 0
TRAIN_IMAGE_DIR = 'train_png_3ch/'
MODEL_NAME = 'efficientnet_b2'
IMG_SIZE = 384
BATCH_SIZE = 16
EPOCHS = 5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
CLASSES = ['Negative for Pneumonia', 'Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance']
NUM_CLASSES = len(CLASSES)
CLASSIFIER_MODEL_PATH = f'stable_classifier_fold{FOLD}.pth'

print(f'Using device: {DEVICE}')
print(f'Training classifier: {MODEL_NAME} on fold {FOLD} for {EPOCHS} epochs')

# --- Data Loading ---
df = pd.read_csv('df_train_folds.csv')
df_train = df[df['fold'] != FOLD].reset_index(drop=True)
df_valid = df[df['fold'] == FOLD].reset_index(drop=True)

# --- Dataset ---
class CovidStudyDataset(Dataset):
    def __init__(self, df, image_dir):
        self.df = df
        self.image_dir = image_dir
        self.labels = self.df[CLASSES].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = os.path.join(self.image_dir, row['image_id'] + '.png')
        image = cv2.imread(image_path)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        image = image.astype(np.float32) / 255.0
        image = image.transpose(2, 0, 1)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return torch.from_numpy(image), label

train_dataset = CovidStudyDataset(df_train, TRAIN_IMAGE_DIR)
valid_dataset = CovidStudyDataset(df_valid, TRAIN_IMAGE_DIR)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# --- Training Loop ---
model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=NUM_CLASSES)
model.to(DEVICE)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = torch.nn.BCEWithLogitsLoss()
scaler = torch.cuda.amp.GradScaler()

best_score = 0.0
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{EPOCHS} [Train]'):
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        train_loss += loss.item()

    model.eval()
    val_preds = []
    val_labels = []
    with torch.no_grad():
        for images, labels in tqdm(valid_loader, desc=f'Epoch {epoch+1}/{EPOCHS} [Valid]'):
            images = images.to(DEVICE)
            with torch.cuda.amp.autocast():
                outputs = model(images)
            val_preds.append(torch.sigmoid(outputs).cpu().numpy())
            val_labels.append(labels.cpu().numpy())

    val_preds = np.concatenate(val_preds)
    val_labels = np.concatenate(val_labels)
    score = average_precision_score(val_labels, val_preds, average='macro')

    print(f'Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader):.4f}, Val mAP: {score:.4f}')

    if score > best_score:
        best_score = score
        torch.save(model.state_dict(), CLASSIFIER_MODEL_PATH)
        print(f'New best score: {best_score:.4f}. Model saved to {CLASSIFIER_MODEL_PATH}')

print('\nClassifier training complete.')
del model, train_loader, valid_loader, train_dataset, valid_dataset
gc.collect()
torch.cuda.empty_cache()

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
Training classifier: efficientnet_b2 on fold 0 for 5 epochs


AcceleratorError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
