Preprocessing XRAY images for Cardiac Detection
---
- [Dataset Source](https://www.kaggle.com/competitions/rsna-pneumonia-detection-challenge/data)
- [Label Source](https://github.com/fshnkarimi/Cardiac-Detection)

##### Import

In [1]:
from pathlib import Path
import pydicom
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches

##### Load Ground Truth

In [2]:
label_df = pd.read_csv('../data/cardiac_detection/rsna_heart_detection.csv')
label_df.head()

Unnamed: 0,label,x0,y0,w,h,name,img_shape_x,img_shape_y
0,heart,91,71,77,102,ca112a3c-b701-48b8-a94f-725ea65416a7,224,224
1,heart,70,64,89,67,e5f292ae-f1e5-497c-b0a0-68f1aa2196b2,224,224
2,heart,82,55,81,76,67af68f7-7ba0-49a0-a373-3a8801eb0faa,224,224
3,heart,93,74,78,89,814289a3-9ce8-4cba-842f-45d6a36f540d,224,224
4,heart,85,61,85,93,e508969f-c931-462b-aa90-ccde34a4d9e5,224,224


##### Set Directory

In [3]:
ROOT_PATH = Path("../data/cardiac_detection/stage_2_train_images/")
SAVE_PATH = Path("../data/cardiac_detection/processed")

##### Read Image Function

In [4]:
def dicomreadimage(name, root=ROOT_PATH):
    dcmpath = root / f"{name}.dcm"
    dcm = pydicom.dcmread(dcmpath)
    img = dcm.pixel_array
    # img = cv2.resize(img, (224,224))
    return img

##### Plot Sample Image

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(4, 4))

for i in range(2):
    for j in range(2):
        data = label_df.sample()
        name = data["name"].values[0]
        img = dicomreadimage(name)

        x = data["x0"].values[0]
        y = data["y0"].values[0]
        w = data["w"].values[0]
        h = data["h"].values[0]

        rect = patches.Rectangle((x,y),w,h,linewidth=1,edgecolor='r',facecolor='none')
        ax[i,j].imshow(img, cmap="gray")
        ax[i,j].add_patch(rect)
        ax[i,j].axis('off')

##### Preprocess Image and Save it as numpy

In [5]:
sums, sums_squared = 0, 0
img_id = []

for counter, name in enumerate(list(label_df.name)):
    img = dicomreadimage(name)
    img = (img / 255).astype(np.float16)
    img_id.append(name)

    save_path = SAVE_PATH / "images"
    save_path.mkdir(parents=True, exist_ok=True)

    np.save(save_path / name, img)

    # normalizer = 224*224
    # if train_or_val == "train":
    #     sums += np.sum(img) / normalizer
    #     sums_squared += np.sum(img**2) / normalizer
    

##### Save Train and Validation ID

In [None]:
np.save(SAVE_PATH / "img_id", img_id)

##### Save Mean and Std

In [None]:
mean = sums / len(train_ids)
std = np.sqrt(sums_squared / len(train_ids) - mean**2)

mean_std = {"mean": mean, "std": std}
np.save(SAVE_PATH / "mean_std", mean_std)