In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/colab/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c rsna-pneumonia-detection-challenge

In [None]:
!unzip rsna-pneumonia-detection-challenge.zip -d /content/rsna_pneumonia

In [None]:
!pip install pydicom

In [None]:
from tqdm.notebook import tqdm
import pydicom
import cv2
import pandas as pd

In [None]:
DATA_PATH = 'rsna_pneumonia/'
SAVE_PATH = Path(DATA_PATH + 'processed')
ROOT_PATH = Path(DATA_PATH + 'stage_2_train_images')

In [None]:
labels = pd.read_csv(DATA_PATH + 'stage_2_train_labels.csv')
labels = labels.drop_duplicates('patientId')

In [None]:
sums, sums_squared = 0, 0
for c, patient_id in enumerate(tqdm(labels.patientId)):
  patient_id = labels.patientId.iloc[c]
  dcm_path = ROOT_PATH/patient_id
  dcm_path = dcm_path.with_suffix(".dcm")
  dcm = pydicom.dcmread(dcm_path).pixel_array / 255

  dcm_array = cv2.resize(dcm, (224, 224)).astype(np.float16)

  label = labels.Target.iloc[c]

  train_or_val = "train" if c < 24000 else "val"

  current_save_path = SAVE_PATH/train_or_val/str(label)
  current_save_path.mkdir(parents=True, exist_ok=True)
  np.save(current_save_path/patient_id, dcm_array)

  normalizer = 224*224
  if train_or_val == "train":
    sums += np.sum(dcm_array) / normalizer
    sums_squared += (dcm_array ** 2).sum() / normalizer

In [None]:
!pip install lightning

In [None]:
import torch
import torchvision
from pathlib import Path
import numpy as np
import os
from torchvision import transforms
import glob
import torchmetrics

In [None]:
def load_file(path):
    return np.load(path).astype(float)

In [None]:
val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.49, 0.248)
])

In [None]:
val_dataset = torchvision.datasets.DatasetFolder(
    root=os.path.join(SAVE_PATH, 'val'),
    loader=load_file,
    extensions=('npy',),
    transform=val_transforms
)

In [None]:
!wget https://raw.githubusercontent.com/stiltskincode/pneumonia-classification/refs/heads/main/models/pneumonia_resnet152.py

In [None]:
DRIVE_PATH = '/content/drive/MyDrive/colab'

In [None]:
checkpoints_dir = os.path.join(DRIVE_PATH, "logs/lightning_logs/version_1/checkpoints/")

In [None]:
from pneumonia_resnet152 import PneumoniaResNet152

In [None]:
def get_latest_checkpoint(checkpoints_dir):
    checkpoint_files = glob.glob(os.path.join(checkpoints_dir, '*.ckpt'))

    if checkpoint_files:
        latest_checkpoint = max(checkpoint_files, key=os.path.getmtime)
        print(f"Resuming from checkpoint: {latest_checkpoint}")
        return latest_checkpoint
    else:
        print("No checkpoint found. Starting training from scratch.")
        return None

In [None]:
latest_checkpoint = get_latest_checkpoint(checkpoints_dir)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
model = PneumoniaResNet152.load_from_checkpoint(checkpoint_path=latest_checkpoint)
model.eval()
model.to(device)

In [None]:
preds = []
labels = []


with torch.no_grad():
    for data, label, in tqdm(val_dataset):
        data = data.to(device).float().unsqueeze(0)
        pred = torch.sigmoid(model(data)[0].cpu())
        pred_binary = (pred > 0.5).int()  # Threshold at 0.5 to get binary output
        preds.append(pred_binary)
        labels.append(label)
preds = torch.tensor(preds)
labels = torch.tensor(labels).int()


acc = torchmetrics.Accuracy(task="binary")(preds, labels)
precision = torchmetrics.Precision(task="binary")(preds, labels)
recall = torchmetrics.Recall(task="binary")(preds, labels)
cm = torchmetrics.ConfusionMatrix(task="binary")(preds, labels)

print(f"Val Acc {acc}")
print(f"Val Precision {precision}")
print(f"Val Recall {recall}")
print(f"Confucion Matrix {cm}")