In [1]:
# Install required packages
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q ultralytics
!pip install -q pandas numpy matplotlib seaborn Pillow opencv-python scikit-learn tqdm albumentations
!pip install -q pycocotools

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25h

# 1. Import Libraries

In [2]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from tqdm import tqdm

# 2. Data Exploration

In [3]:
BASE_PATH = "/kaggle/input/m-2-big-data-dlia-project-2025-clean-dataset"

TRAIN_CSV = f"{BASE_PATH}/train.csv"
ID_MAP   = f"{BASE_PATH}/ID_to_Image_Mapping.csv"
TRAIN_IMG_DIR = f"{BASE_PATH}/train"
TEST_IMG_DIR  = f"{BASE_PATH}/test"

In [4]:
train_df = pd.read_csv(TRAIN_CSV)
id_map = pd.read_csv(ID_MAP)

In [5]:
import pandas as pd
labels = sorted(train_df['label'].unique())
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for label, i in label2id.items()}

num_classes = len(label2id) + 1
label2id, id2label, num_classes

({'Atelectasis': 0,
  'Cardiomegaly': 1,
  'Effusion': 2,
  'Infiltrate': 3,
  'Mass': 4,
  'Nodule': 5,
  'Pneumonia': 6,
  'Pneumothorax': 7},
 {0: 'Atelectasis',
  1: 'Cardiomegaly',
  2: 'Effusion',
  3: 'Infiltrate',
  4: 'Mass',
  5: 'Nodule',
  6: 'Pneumonia',
  7: 'Pneumothorax'},
 9)

In [6]:
class ChestXRayDataset(Dataset):
  def __init__(self, df, image_dir, transforms=None):
    self.df = df
    self.image_dir = image_dir
    self.transforms = transforms
    self.images = df['image_id'].unique()

  def __len__(self):
    return len(self.images)

  def __getitem__(self, idx):
    img_name = self.images[idx]
    records = self.df[self.df['image_id'] == img_name]
    img_path = os.path.join(self.image_dir, img_name)
    img = Image.open(img_path).convert('RGB')

    boxes = []
    labels = []
    for _, row in records.iterrows():
      x_min = row['x_min']
      y_min = row['y_min']
      x_max = row['x_max']
      y_max = row['y_max']
      boxes.append([x_min, y_min, x_max, y_max])
      labels.append(label2id[row['label']])

    target = {
        'boxes': torch.as_tensor(boxes, dtype=torch.float32),
        'labels': torch.as_tensor(labels, dtype=torch.int64),
    }

    image = torchvision.transforms.ToTensor()(img) # Corrected typo and variable
    return image, target

In [7]:
image_dir = TRAIN_IMG_DIR # This should be the directory containing image files, not the CSV file

transform = torchvision.transforms.Compose([
torchvision.transforms.Resize((1024, 1024)),
torchvision.transforms.ToTensor()
])

dataset = ChestXRayDataset(train_df, image_dir, transform)

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(
    dataset,
    batch_size=4,             # detection models prefer small batches
    shuffle=True,
    num_workers=8,            # try 4–8
    pin_memory=True,
    persistent_workers=True,
    collate_fn=collate_fn     # required for detection
)



In [8]:
model = fasterrcnn_resnet50_fpn(pretrained=True)

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model = model.cuda() if torch.cuda.is_available() else model.cpu()



Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:00<00:00, 219MB/s]


In [9]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
num_epochs = 2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

scaler = torch.cuda.amp.GradScaler()  # AMP

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0

    for images, targets in tqdm(train_loader, leave=False):
        images = [img.to(device, non_blocking=True) for img in images]
        targets = [
            {k: v.to(device, non_blocking=True) for k, v in t.items()}
            for t in targets
        ]

        optimizer.zero_grad(set_to_none=True)

        with torch.cuda.amp.autocast():
            loss_dict = model(images, targets)
            loss = sum(loss for loss in loss_dict.values())

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {epoch_loss:.4f}")

  scaler = torch.cuda.amp.GradScaler()  # AMP
  with torch.cuda.amp.autocast():
                                                 

Epoch 1/2 | Loss: 48.4482


                                                 

Epoch 2/2 | Loss: 43.0143


