# PyTorchCV starter notebook [Infer]

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
!pip install /kaggle/input/pydicom-and-torchmetrics/pydicom-2.4.3-py3-none-any.whl
!pip install /kaggle/input/pydicom-and-torchmetrics/torchmetrics-1.2.0-py3-none-any.whl

Processing /kaggle/input/pydicom-and-torchmetrics/pydicom-2.4.3-py3-none-any.whl
pydicom is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.
Processing /kaggle/input/pydicom-and-torchmetrics/torchmetrics-1.2.0-py3-none-any.whl
Installing collected packages: torchmetrics
  Attempting uninstall: torchmetrics
    Found existing installation: torchmetrics 1.1.1
    Uninstalling torchmetrics-1.1.1:
      Successfully uninstalled torchmetrics-1.1.1
Successfully installed torchmetrics-1.2.0


## Imports and Setup

In [3]:
import cv2
import gc
from glob import glob
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from PIL import Image
import pydicom
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics import MeanMetric
from torchmetrics.classification import MulticlassAccuracy
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm.notebook import tqdm



## Configuration

In [4]:
class Config:
    SEED = 42
    IMAGE_SIZE = [256, 256]
    BATCH_SIZE = 20
    EPOCHS = 5
    TARGET_COLS = [
        "bowel_injury", "extravasation_injury",
        "kidney_healthy", "kidney_low", "kidney_high",
        "liver_healthy", "liver_low", "liver_high",
        "spleen_healthy", "spleen_low", "spleen_high",
    ]

config = Config()

## Reproducibility

In [5]:
random_seed = config.SEED
np.random.seed(random_seed)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [7]:
BASE_PATH = '/kaggle/input/rsna-2023-abdominal-trauma-detection'
IMAGE_DIR = '/tmp/dataset/rsna-atd'
INPUT_MODEL_PATH = '/kaggle/input/pytorchcv-starter-notebook-train/model_5.pth'
MODEL_PATH = '/kaggle/working/model_5.pth'
STRIDE = 10

In [8]:
class Config:
    SEED = 42
    IMAGE_SIZE = [256, 256]
    RESIZE_DIM = 256
    BATCH_SIZE = 20
    EPOCHS = 5
    TARGET_COLS = [
        "bowel_injury", "extravasation_injury",
        "kidney_healthy", "kidney_low", "kidney_high",
        "liver_healthy", "liver_low", "liver_high",
        "spleen_healthy", "spleen_low", "spleen_high",
    ]

config = Config()

## Initialize the Trained Model

In [9]:
class build_model(nn.Module):
    def __init__(
        self, num_classes_bowel,
        num_classes_extra, num_classes_liver,
        num_classes_kidney, num_classes_spleen
    ):
        super(build_model, self).__init__()
        
        # define backbone
        self.backbone = models.efficientnet_b5(pretrained=False)
        self.backbone._fc = nn.Identity()

        # delete 'necks' for each head
        self.neck_bowel = nn.Linear(1000, 32) # self.backbone._swish._num_features
        self.neck_extra = nn.Linear(1000, 32)
        self.neck_liver = nn.Linear(1000, 32)
        self.neck_kidney = nn.Linear(1000, 32)
        self.neck_spleen = nn.Linear(1000, 32)

        # define heads
        self.head_bowel = nn.Linear(32, num_classes_bowel)
        self.head_extra = nn.Linear(32, num_classes_extra)
        self.head_liver = nn.Linear(32, num_classes_liver)
        self.head_kidney = nn.Linear(32, num_classes_kidney)
        self.head_spleen = nn.Linear(32, num_classes_spleen)
    
    def forward(self, x):
        # forward pass through the backbone
        # print(x.shape)
        x = self.backbone(x)
        # print(x.shape)

        # forward pass through 'necks' and heads
        x_bowel = self.head_bowel(self.neck_bowel(x))
        x_extra = self.head_extra(self.neck_extra(x))
        x_liver = self.head_liver(self.neck_liver(x))
        x_kidney = self.head_kidney(self.neck_kidney(x))
        x_spleen = self.head_spleen(self.neck_spleen(x))

        return x_bowel, x_extra, x_liver, x_kidney, x_spleen

In [10]:
! cp {INPUT_MODEL_PATH} ./

model_variable = torch.load(MODEL_PATH, map_location = 'cpu')
model = build_model(
    num_classes_bowel=1,
    num_classes_extra=1,
    num_classes_liver=3,
    num_classes_kidney=3,
    num_classes_spleen=3,
)
model.load_state_dict(model_variable, strict = False)
model.to(device)



build_model(
  (backbone): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(48, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
              (1): BatchNorm2d(48, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (sc

In [11]:
meta_df = pd.read_csv(f"{BASE_PATH}/test_series_meta.csv")

# Checking if patients are repeated by finding the number of unique patient IDs
num_rows = meta_df.shape[0]
unique_patients = meta_df["patient_id"].nunique()

print(f"{num_rows=}")
print(f"{unique_patients=}")

num_rows=6
unique_patients=3


In [12]:
meta_df["dicom_folder"] = BASE_PATH + "/" + "test_images"\
                                    + "/" + meta_df.patient_id.astype(str)\
                                    + "/" + meta_df.series_id.astype(str)

test_folders = meta_df.dicom_folder.tolist()
test_paths = []
for folder in test_folders:
    test_paths += sorted(glob(os.path.join(folder, "*dcm")))[::STRIDE]

In [13]:
test_df = pd.DataFrame(test_paths, columns=["dicom_path"])
test_df["patient_id"] = test_df.dicom_path.map(lambda x: x.split("/")[-3]).astype(int)
test_df["series_id"] = test_df.dicom_path.map(lambda x: x.split("/")[-2]).astype(int)
test_df["instance_number"] = test_df.dicom_path.map(lambda x: x.split("/")[-1].replace(".dcm","")).astype(int)

test_df["image_path"] = f"{IMAGE_DIR}/test_images"\
                    + "/" + test_df.patient_id.astype(str)\
                    + "/" + test_df.series_id.astype(str)\
                    + "/" + test_df.instance_number.astype(str) +".png"

test_df.head(2)

Unnamed: 0,dicom_path,patient_id,series_id,instance_number,image_path
0,/kaggle/input/rsna-2023-abdominal-trauma-detec...,48843,62825,30,/tmp/dataset/rsna-atd/test_images/48843/62825/...
1,/kaggle/input/rsna-2023-abdominal-trauma-detec...,50046,24574,30,/tmp/dataset/rsna-atd/test_images/50046/24574/...


In [14]:
# Checking if patients are repeated by finding the number of unique patient IDs
num_rows = test_df.shape[0]
unique_patients = test_df["patient_id"].nunique()

print(f"{num_rows=}")
print(f"{unique_patients=}")

num_rows=3
unique_patients=3


In [15]:
!rm -r {IMAGE_DIR}
os.makedirs(f"{IMAGE_DIR}/train_images", exist_ok=True)
os.makedirs(f"{IMAGE_DIR}/test_images", exist_ok=True)

rm: cannot remove '/tmp/dataset/rsna-atd': No such file or directory


In [16]:
def standardize_pixel_array(dcm):
    # Correct DICOM pixel_array if PixelRepresentation == 1.
    pixel_array = dcm.pixel_array
    if dcm.PixelRepresentation == 1:
        bit_shift = dcm.BitsAllocated - dcm.BitsStored
        dtype = pixel_array.dtype 
        new_array = (pixel_array << bit_shift).astype(dtype) >>  bit_shift
        pixel_array = pydicom.pixel_data_handlers.util.apply_modality_lut(new_array, dcm)
    return pixel_array

def read_xray(path, fix_monochrome=True):
    dicom = pydicom.dcmread(path)
    data = standardize_pixel_array(dicom)
    data = data - np.min(data)
    data = data / (np.max(data) + 1e-5)
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = 1.0 - data
    return data

def resize_and_save(file_path):
    img = read_xray(file_path)
    h, w = img.shape[:2]  # orig hw
    img = cv2.resize(img, (config.RESIZE_DIM, config.RESIZE_DIM), cv2.INTER_LINEAR)
    img = (img * 255).astype(np.uint8)
    
    sub_path = file_path.split("/",4)[-1].split(".dcm")[0] + ".png"
    infos = sub_path.split("/")
    sub_path = file_path.split("/",4)[-1].split(".dcm")[0] + ".png"
    infos = sub_path.split("/")
    pid = infos[-3]
    sid = infos[-2]
    iid = infos[-1]; iid = iid.replace(".png","")
    new_path = os.path.join(IMAGE_DIR, sub_path)
    os.makedirs(new_path.rsplit("/",1)[0], exist_ok=True)
    cv2.imwrite(new_path, img)
    return

In [17]:
%%time

file_paths = test_df.dicom_path.tolist()
_ = Parallel(n_jobs=2, backend="threading")(
    delayed(resize_and_save)(file_path) for file_path in tqdm(file_paths, leave=True, position=0)
)

del _; gc.collect()

  0%|          | 0/3 [00:00<?, ?it/s]

CPU times: user 307 ms, sys: 50.1 ms, total: 357 ms
Wall time: 404 ms


141

## Data Pipeline /w dataloader

In [18]:
class RandomCutout(transforms.RandomApply):
    def __init__(self, p, cutout_height_factor=0.2, cutout_width_factor=0.2):
        cutout = transforms.RandomErasing(
            p=1.0, scale=(cutout_height_factor, cutout_width_factor), ratio=(1,1)
        )
        super(RandomCutout, self).__init__([cutout], p=p)

class CustomDataset(Dataset):
    def __init__(self, image_paths, image_size, transform=None):
        self.image_paths = image_paths
        self.transform = transform
        self.image_size = image_size

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]

        # load and preprocess the image
        image = Image.open(image_path).convert('RGB')
        image = transforms.Resize(self.image_size)(image)
        image = transforms.ToTensor()(image)
        image = image / 255.0

        # for additional transformation
        if self.transform:
            image = self.transform(image)

        return image

In [19]:
cutout_transform = RandomCutout(p=0.5, cutout_height_factor=0.2, cutout_width_factor=0.2)
transform = transforms.Compose([cutout_transform])

def build_dataset(image_paths, batch_size, image_size, transform=None):
    dataset = CustomDataset(image_paths, image_size=image_size, transform=transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
    return dataloader

## Inference

In [20]:
def post_proc(pred):
    proc_pred = np.empty((pred.shape[0], 2*2 + 3*3), dtype="float32")
#     print(proc_pred.shape)

    # bowel, extravasation
    proc_pred[:, 0] = pred[:, 0]
    proc_pred[:, 1] = 1 - proc_pred[:, 0]
    proc_pred[:, 2] = pred[:, 1]
    proc_pred[:, 3] = 1 - proc_pred[:, 2]
    
    # liver, kidney, sneel
    proc_pred[:, 4:7] = pred[:, 2:5]
    proc_pred[:, 7:10] = pred[:, 5:8]
    proc_pred[:, 10:13] = pred[:, 8:11]

    return proc_pred

In [22]:
# Getting unique patient IDs from test dataset
patient_ids = test_df['patient_id'].unique()

# Initializing array to store predictions
patient_preds = np.zeros(
    shape=(len(patient_ids), 2*2 + 3*3),
    dtype="float32"
)

for pidx, patient_id in tqdm(enumerate(patient_ids), total=len(patient_ids), desc="Patients "):
    print(f'Patient ID: {patient_id}')
    
    # Query the dataframe for a particualr patient
    patient_df = test_df[test_df["patient_id"] == patient_id]
    
    # Getting image paths for a particular patient
    patient_paths = patient_df.image_path.tolist()
    
    # Bulding dataset for prediction
    dtest = build_dataset(patient_paths, config.BATCH_SIZE, config.IMAGE_SIZE)
    
    # Predicting with the model
    model.eval()
    with torch.no_grad():
        preds = []
        for image in dtest:
            image = image.to(device)
            pred_bowel, pred_extra, pred_liver, pred_kidney, pred_spleen = model(image)
            
            tensors = [F.softmax(tensor, dim=1) for tensor in [pred_bowel, pred_extra, pred_liver, pred_kidney, pred_spleen]]
            pred = torch.cat(tensors, dim=-1).float()
            
            preds.append(pred)
            
            
        preds = torch.cat(preds, dim=0)
        preds = preds[:len(patient_paths), :]
        preds = torch.mean(preds.reshape(1, len(patient_paths), -1), dim=0)
        preds = torch.max(preds, dim=0, keepdim=True)[0]
            
        patient_preds[pidx, :] = post_proc(pred.cpu().numpy())
            
        del patient_df, patient_paths, dtest, pred_bowel, pred_extra, pred_liver, pred_kidney, pred_spleen, pred
        gc.collect()

Patients :   0%|          | 0/3 [00:00<?, ?it/s]

Patient ID: 48843
Patient ID: 50046
Patient ID: 63706


In [23]:
columns = [
    'bowel_healthy',
    'bowel_injury',
    'extravasation_healthy',
    'extravasation_injury',
    'kidney_healthy',
    'kidney_low',
    'kidney_high',
    'liver_healthy',
    'liver_low',
    'liver_high',
    'spleen_healthy',
    'spleen_low',
    'spleen_high'
]

pred_df = pd.DataFrame({"patient_id":patient_ids,})
pred_df[columns] = patient_preds.astype("float32")

In [24]:
sub_df = pd.read_csv(f"{BASE_PATH}/sample_submission.csv")
sub_df = sub_df[["patient_id"]]
sub_df = sub_df.merge(pred_df, on="patient_id", how="left")

sub_df.to_csv("submission.csv",index=False, float_format='%.5f')
sub_df.head(3)

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,1.0,0.0,1.0,0.0,0.998065,0.000225,0.00171,0.004138,0.029599,0.966262,0.068456,0.005505,0.926038
1,50046,1.0,0.0,1.0,0.0,0.998677,0.001287,3.6e-05,0.222285,0.022886,0.754829,0.732094,0.00068,0.267226
2,63706,1.0,0.0,1.0,0.0,0.999968,2.1e-05,1.1e-05,0.607014,0.00263,0.390356,0.043677,0.000987,0.955335
