In [1]:
import numpy as np 
import pandas as pd 
from tqdm import tqdm
from scipy import ndimage
# import nibabel as nib
import pydicom
import matplotlib.pyplot as plt
from tensorflow.keras import backend as K
import keras_cv
# import keras_core as keras
from tensorflow.keras.optimizers import Adadelta, Nadam ,Adam
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import  plot_model ,Sequence
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import VGG16
import tensorflow as tf
from tensorflow.python.keras.losses import binary_crossentropy
import keras
from keras_cv.models import ResNetBackbone
from tensorflow.keras.utils import custom_object_scope
import os
from glob import glob  # for getting list paths of image and labels
from random import choice,sample
from matplotlib import pyplot as plt
import cv2
from sklearn.model_selection import train_test_split



Using TensorFlow backend


In [2]:
class Config:
    image_size = [256,256,256]
    batch_size = 19
    base_path = "/kaggle/input/rsna-2023-abdominal-trauma-detection"
    custom_path = "/kaggle/input/images-for-training-png-rsna2023"
    train_images_path = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/'
    data_path="/kaggle/input/train4/"
    dim = (256,256,256)
    columns = [
        ["bowel_injury"], ["extravasation_injury"],
        ["kidney_healthy", "kidney_low", "kidney_high"],
        ["liver_healthy", "liver_low", "liver_high"],
        ["spleen_healthy", "spleen_low", "spleen_high"],
    ]
    epochs = 100
    SEED = 65
    AUTO = tf.data.AUTOTUNE
cfg = Config()
tf.keras.utils.set_random_seed(seed=cfg.SEED)

In [3]:
train = pd.read_csv(f'{cfg.base_path}/train.csv')

bowel_injury = train.loc[train.bowel_injury==1]
bowel_healthy = train.loc[train.bowel_injury==0].sample(bowel_injury.shape[0], random_state=cfg.SEED)

extra_injury = train.loc[train.extravasation_injury==1]
extra_healthy = train.loc[train.extravasation_injury ==0].sample(extra_injury.shape[0], random_state=cfg.SEED)

kidney_high = train.loc[train.kidney_high==1]
kidney_low = train.loc[train.kidney_low==1]
avg = (kidney_high.shape[0] + kidney_low.shape[0])//2
kidney_healthy = train.loc[train.kidney_healthy==1].sample(avg,random_state=cfg.SEED)

liver_high = train.loc[train.liver_high==1]
liver_low = train.loc[train.liver_low==1]
avg = (liver_high.shape[0] + liver_low.shape[0])//2 - 1
liver_healthy = train.loc[train.liver_healthy==1].sample(avg,random_state=cfg.SEED)

spleen_high = train.loc[train.spleen_high==1]
spleen_low = train.loc[train.spleen_low==1]
avg = (spleen_high.shape[0] + spleen_low.shape[0])//2 - 3
spleen_healthy = train.loc[train.spleen_healthy==1].sample(avg,random_state=cfg.SEED)

bowel = pd.concat([bowel_healthy,bowel_injury])
extra = pd.concat([extra_healthy,extra_injury])
kidney = pd.concat([kidney_healthy,kidney_high,kidney_low])
liver = pd.concat([liver_healthy,liver_high,liver_low])
spleen = pd.concat([spleen_healthy,spleen_high,spleen_low])

In [4]:
train_sm = pd.read_csv(f'{cfg.base_path}/train_series_meta.csv')
def trim_dataset(df,columns):
    df = df.merge(train_sm,on=["patient_id"], how="inner")
    df = df.sort_values('aortic_hu', ascending=False).drop_duplicates(subset=['patient_id'])
    columns = ["patient_id","series_id"] + columns
    df = df[columns]
    return df
bowel = trim_dataset(bowel, cfg.columns[0])
extra = trim_dataset(extra, cfg.columns[1])
kidney = trim_dataset(kidney, cfg.columns[2])
liver = trim_dataset(liver, cfg.columns[3])
spleen = trim_dataset(spleen, cfg.columns[4])

In [5]:
bowel_train_images, bowel_val_images, bowel_train_labels, bowel_val_labels = train_test_split(bowel[["patient_id","series_id"]],bowel[cfg.columns[0]],random_state=cfg.SEED,test_size=0.125)
extra_train_images, extra_val_images, extra_train_labels, extra_val_labels = train_test_split(extra[["patient_id","series_id"]],extra[cfg.columns[1]],random_state=cfg.SEED,test_size=0.20)
kidney_train_images, kidney_val_images, kidney_train_labels, kidney_val_labels = train_test_split(kidney[["patient_id","series_id"]],kidney[cfg.columns[2]],random_state=cfg.SEED,test_size=0.142857)
liver_train_images, liver_val_images, liver_train_labels, liver_val_labels = train_test_split(liver[["patient_id","series_id"]],liver[cfg.columns[3]],random_state=cfg.SEED,test_size=0.16665)
spleen_train_images, spleen_val_images, spleen_train_labels, spleen_val_labels = train_test_split(spleen[["patient_id","series_id"]],spleen[cfg.columns[4]],random_state=cfg.SEED,test_size=0.25)

bowel_train_images, bowel_val_images, bowel_train_labels, bowel_val_labels = bowel_train_images.values, bowel_val_images.values, bowel_train_labels.values, bowel_val_labels.values
extra_train_images, extra_val_images, extra_train_labels, extra_val_labels = extra_train_images.values, extra_val_images.values, extra_train_labels.values, extra_val_labels.values
kidney_train_images, kidney_val_images, kidney_train_labels, kidney_val_labels = kidney_train_images.values, kidney_val_images.values, kidney_train_labels.values, kidney_val_labels.values
liver_train_images, liver_val_images, liver_train_labels, liver_val_labels = liver_train_images.values, liver_val_images.values, liver_train_labels.values, liver_val_labels.values
spleen_train_images, spleen_val_images, spleen_train_labels, spleen_val_labels = spleen_train_images.values, spleen_val_images.values, spleen_train_labels.values, spleen_val_labels.values

In [6]:
import pydicom
def standardize_pixel_array(dcm: pydicom.dataset.FileDataset) -> np.ndarray:
    """
    Source : https://www.kaggle.com/competitions/rsna-2023-abdominal-trauma-detection/discussion/427217
    """
    # Correct DICOM pixel_array if PixelRepresentation == 1.
    pixel_array = dcm.pixel_array
    if dcm.PixelRepresentation == 1:
        bit_shift = dcm.BitsAllocated - dcm.BitsStored
        dtype = pixel_array.dtype 
        pixel_array = (pixel_array << bit_shift).astype(dtype) >>  bit_shift
#         pixel_array = pydicom.pixel_data_handlers.util.apply_modality_lut(new_array, dcm)

    intercept = float(dcm.RescaleIntercept)
    slope = float(dcm.RescaleSlope)
    center = int(dcm.WindowCenter)
    width = int(dcm.WindowWidth)
    low = center - width / 2
    high = center + width / 2    
    
    pixel_array = (pixel_array * slope) + intercept
    pixel_array = np.clip(pixel_array, low, high)

    return pixel_array

def resize_volume(img):
    """Resize across z-axis"""
    # Set the desired depth
    desired_depth = 128
    desired_width = 256
    desired_height = 256
    # Get current depth
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    # Compute depth factor
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height
    # Rotate
    img = ndimage.rotate(img, 90, reshape=False)
    # Resize across z-axis
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    return img


def process(patient,study, data_path=""):
    imgs = {}
    for f in sorted(glob(data_path + f"{patient}/{study}/*.dcm")):
#         if f == f"{data_path}3124/5842/514.dcm":
#             continue
            
        dicom = pydicom.dcmread(f)

        pos_z = dicom[(0x20, 0x32)].value[-1]

        img = standardize_pixel_array(dicom)
        img = (img - img.min()) / (img.max() - img.min() + 1e-6)

        if dicom.PhotometricInterpretation == "MONOCHROME1":
            img = 1 - img
        img = tf.image.resize(tf.expand_dims(img,axis=2),size=[256,256])[:,:,0]
        imgs[pos_z] = img
        
    image = []
    for i, k in enumerate(sorted(imgs.keys())):
        img = imgs[k]
        image.append(img)
    scan = tf.stack(image)
    scan = resize_volume(scan)
    return scan

In [7]:
def _bytes_feature(value):
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(value).numpy()]))

def serialize_example(images,labels):
    feature = {
        'image': _bytes_feature(images),
        'labels' : _bytes_feature(labels)
    }
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

def writeTFR(filename,train_img_meta, val_img_meta, train_img_labels, val_img_labels):
    
    train_filename = f"{filename}_train.tfrecord"
    number_train = len(train_img_meta)
    with tf.io.TFRecordWriter(train_filename) as writer:
        for i in tqdm(range(number_train)):
            feature1 = process(train_img_meta[i][0],train_img_meta[i][1],data_path="/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/")
            feature2 = train_img_labels[i]
            example = serialize_example(feature1,feature2)
            writer.write(example)
            
    number_val = len(val_img_meta)
    val_filename = f"{filename}_val.tfrecord"
    with tf.io.TFRecordWriter(val_filename) as writer:
        for i in tqdm(range(number_val)):
            feature1 = process(val_img_meta[i][0],val_img_meta[i][1],data_path="/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/")
            feature2 = val_img_labels[i]
            example = serialize_example(feature1,feature2)
            writer.write(example)

In [8]:
writeTFR("spleen",spleen_train_images,spleen_val_images,spleen_train_labels, spleen_val_labels)
print("spleen done")

100%|██████████| 396/396 [1:14:01<00:00, 11.22s/it]
100%|██████████| 132/132 [25:08<00:00, 11.43s/it]

spleen done



