# Initializations

In [None]:
from glob import glob

import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import seaborn as sns

import random
import numpy as np
import pandas as pd
import os
import copy

import re

import cv2

from tqdm import tqdm
import gc
import pydicom as dcm
import nibabel as nib

import torchvision
import torchvision.transforms.v2 as T
import albumentations as A
from albumentations.core.transforms_interface import DualTransform

def set_device():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(device)
    return device    

def set_data_paths():
    data_path = {}
    data_path['root'] = '/kaggle/input/rsna-2023-abdominal-trauma-detection'
    data_path['train'] = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images'
    data_path['segmentations'] = '/kaggle/input/rsna-2023-abdominal-trauma-detection/segmentations'
    data_path['test'] = '/kaggle/input/rsna-2023-abdominal-trauma-detection/test_images'
    data_path['masks'] = '/kaggle/working/mask_slices'
    return data_path

def create_segmentation_dict():
    # Mask organ label mapping
    segmentation_dict = {}
    segmentation_dict['segmentation_class_to_inx'] = {'Background': 0, 'Liver': 1, 'Spleen': 2, 'Kidney_left': 3, 'Kidney_right': 4, 'Bowel': 5}
    segmentation_dict['segmentaiton_inx_to_class'] = {0: 'Background', 1: 'Liver', 2:'Spleen', 3:'Kidney_left', 4:'Kidney_right', 5 :'Bowel'}
    segmentation_dict['final_output'] = ['Background', 'Bowel', 'Kidney' , 'Liver' , 'Spleen']
    return segmentation_dict

# ________________________________________________________________________________________________________________ DATA PREPROCESSING
def sorted_dcm_labels(folder):
    try:
        filenames = os.listdir(folder)
        names = sorted( [int(filename.split('.')[0]) for filename in filenames] )
        return names
    except:
        return []


def standardize_pixel_array(dcm: dcm.dataset.FileDataset) -> np.ndarray:
    # Correct DICOM pixel_array if PixelRepresentation == 1.
    pixel_array = dcm.pixel_array
    if dcm.PixelRepresentation == 1:
        bit_shift = dcm.BitsAllocated - dcm.BitsStored
        dtype = pixel_array.dtype 
        pixel_array = (pixel_array << bit_shift).astype(dtype) >>  bit_shift
    return pixel_array

def load_CT_slice(filepath, downsample_rate=1):
    ds = dcm.dcmread(filepath)
    image = standardize_pixel_array(ds)
            
    # find rescale params
    if ("RescaleIntercept" in ds) and ("RescaleSlope" in ds):
        intercept = float(ds.RescaleIntercept)
        slope = float(ds.RescaleSlope)

    # find clipping params
    center = int(ds.WindowCenter)
    width = int(ds.WindowWidth)
    low = center - width / 2
    high = center + width / 2    

    image = (image * slope) + intercept
    image = np.clip(image, low, high)
    image = image - image.min()
    if image.max() > 0:
        image = (image / image.max() ).astype(np.float64)
    image = image[::downsample_rate, ::downsample_rate]
    
    return image


def get_z_acquisition_direction(session_path):
    instances = sorted_dcm_labels(session_path)
    
    # extract z_start
    instance_start = instances[0]
    filepath = session_path + '/' + str(instance_start) + '.dcm'
    z_start = dcm.dcmread( filepath ).ImagePositionPatient[-1]
    # extract z_end
    instance_end = instances[-1]
    filepath = session_path + '/' + str(instance_end) + '.dcm'
    z_end = dcm.dcmread( filepath ).ImagePositionPatient[-1]
        
    if z_end < z_start:
        return 'downward'
    else:
        return 'upward'
    
    
def create_3D_segmentations(filepath, downsample_rate=1):
    img = nib.load(filepath).get_fdata().astype(np.int8)
    img = np.transpose(img, [1, 0, 2])
    img = np.rot90(img, 1, (1,2))
    img = img[::-1,:,:]
    img = np.transpose(img, [1, 0, 2])
    img = img[::downsample_rate, ::downsample_rate, ::downsample_rate]
    return img


def CT_remove_distractions(img):
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.normalize(img, None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_8U)

    thresh = cv2.threshold(img, 2, 255, cv2.THRESH_BINARY)[1]
    output = cv2.connectedComponentsWithStats(thresh, cv2.CV_32S)
    (numLabels, labels, stats, centroids) = output
    components = {label: (labels == label).sum() for label in sorted(np.unique(labels))}

    good_components = np.argsort(list(components.values()))[::-1][:2]
    good_areas = np.isin(labels, good_components)

    img_clean = img * good_areas
    return img_clean

def CT_crop_background(img):
    i, j = np.where(img > 5)
    i_min = i.min()
    i_max = i.max()
    j_min = j.min()
    j_max = j.max()
    
    img_cropped = img[i_min:i_max, j_min:j_max]
    return img_cropped, (i_min, i_max, j_min, j_max)

class Clean_CT(DualTransform):

    def __init__(self, image_size=None, always_apply=True, p=1.0):
        super().__init__(always_apply, p)
        self.borders = None
        self.image_size = image_size
        
    def apply(self, img, **params):
        img = img.copy()
        img = np.array(img)
        img_clean = CT_remove_distractions(img)
        img_clean_cropped, self.borders = CT_crop_background(img_clean)
        img_clean_cropped_resized = A.Compose([
            A.Resize(int(self.image_size[1]*0.7), self.image_size[1] - 10, interpolation=cv2.INTER_NEAREST_EXACT),
            A.PadIfNeeded(min_height=self.image_size[0], min_width=self.image_size[1], p=1, border_mode=cv2.BORDER_CONSTANT, value=0)
        ])(image=img_clean_cropped)['image']
        return img_clean_cropped_resized
    
    def apply_to_mask(self, mask, fill_value=0, **params):
        mask = mask.copy()
        mask = np.array(mask)
        mask = mask[self.borders[0]:self.borders[1], self.borders[2]:self.borders[3]]
        mask = A.Compose([
            A.Resize(int(self.image_size[1]*0.7), self.image_size[1] - 10, interpolation=cv2.INTER_NEAREST_EXACT),
            A.PadIfNeeded(min_height=self.image_size[0], min_width=self.image_size[1], p=1, border_mode=cv2.BORDER_CONSTANT, value=fill_value)
        ])(image=mask)['image']
        return mask
    
    def apply_to_masks(self, masks, **params):
        out = []
        for ind, mask in enumerate(masks):
            if ind == 0: # background
                val = 1.0
            else: 
                val = 0.0
            out.append( self.apply_to_mask(mask, fill_value=val, **params) )
        return out
    
    
def create_augmentations(organ, filenames, transform_list, repetitions=1):  
    images_dict = {} # create a dict for albumentation transform function
    filenames_dict = {} # relates image key to filename to relate Albumentation output to filename
    for ind, filename in enumerate(filenames):
        filepath = os.path.join(series_path, filename)
        img = cv2.imread(filepath, cv2.COLOR_BGRA2GRAY)
        
        if organ == 'Background':
            img = np.where(img == 0, np.nan, img)
            img_minimum = np.nanmin(img)
            img = np.where(np.isnan(img), img_minimum, img)
            img = cv2.normalize(img, None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype=cv2.CV_8U)
            
        if ind == 0:
            image_key = 'image'
        else:
            image_key = 'image'+str(ind+1)
            
        images_dict[image_key] = img
        filenames_dict[image_key] = filename
        
    # ----------- transform images
    transform = A.Compose(transform_list
    , additional_targets={key: 'image' for key in list(images_dict.keys())[1:]})
    
    
    augmentations = []
    for _ in range(repetitions):
        out = transform(**images_dict)
        augmentation = {filenames_dict[key]: val for key, val in out.items()}
        augmentations.append( augmentation )
    
    reals = {key: val for key, val in zip(filenames, images_dict.values())}
    return reals, augmentations

def plot_augmentations(reals, augs, title=False):
    fig, ax = plt.subplots(2, 15, figsize=(20, 6))
    ax = ax.ravel()
    for ind, (filename, image) in enumerate(reals.items()):
        ax[ind].imshow(image, cmap='gray')
        ax[ind].axis('off')
        if title:
            ax[ind].set_title( filename.split('.')[0] )
    fig.tight_layout(h_pad=0.2, w_pad=0.2)
    fig.suptitle('Real images')

    
    for aug_ind, aug in enumerate(augs):
        fig, ax = plt.subplots(2, 15, figsize=(20, 6))
        ax = ax.ravel()
        for ind, (filename, image) in enumerate(aug.items()):
            ax[ind].imshow( image, cmap='gray')
            ax[ind].axis('off')
            if title:
                ax[ind].set_title( filename.split('.')[0] )
        fig.tight_layout(h_pad=0.2, w_pad=0.2)
        fig.suptitle('augmentation '+str(aug_ind))

# Visual inspection of a problematic sample

In [None]:
fig, ax = plt.subplots(5,6, figsize=(20, 15))
ax = ax.ravel()

for ind, val in enumerate(np.round(np.linspace(1, 501, 30)).astype(int)):
    ct = load_CT_slice('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/62847/16405/' + str(val) + '.dcm')
    ct = Clean_CT(image_size=(128, 128))(image=np.array(ct*255).astype(np.uint8))['image']
    ct = A.Equalize(p=1)(image=ct)['image']
    ax[ind].imshow(ct)
    ax[ind].set_title(str(val) + '.dcm')

# Define hyperparameters

In [None]:
class CFG():
    def __init__(self):
        self.data_path = set_data_paths()
        self.segmentation_dict = create_segmentation_dict()
        
        self.create_masks = 0
        self.create_patient_list = 0
        
        self.DS_RATE = 2 # downsamples slices in each session
        self.image_size = (128, 128)
        self.validation_size = 0.1
        
        # image transforms
        self.transform_dict = {}

        
        # ---------------- unet
        self.UNet = {}
        self.UNet['channel_list'] = [1, 32, 64, 128, 256]
        self.UNet['n_epochs'] = 100
        self.UNet['batch_size'] = 64
        self.UNet['trevsky_alpha'] = 0.5
        self.UNet['trevsky_beta'] = 0.5
        
        self.UNet['self_spuervised_learning'] = 0
        self.UNet['confidence_threshold'] = 0.99
        self.UNet['use_unlabeled_frac'] = 0.1
        
        # ---------------- CNN
        self.CNN = {}
        self.CNN['n_epochs'] = 100
        self.CNN['batch_size'] = 128
        
        # ---------------- output
        self.output = {}
        df = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train.csv')
        self.output['columns_weights'] = {col: weight for col, weight in zip(list(df.columns[1:-1]), [1, 2, 1, 6, 1, 2, 4, 1, 2, 4, 1, 2, 4]) }
        self.output['organs_weights'] = {'bowel':3, 'extravasation':7, 'kidney':7, 'liver':7, 'spleen':7}
        self.output['nodes'] = ['bowel', 'extravasation', 'kidney_healthy', 'kidney_low', 'kidney_high', 'liver_healthy', 'liver_low', 'liver_high', 'spleen_healthy', 'spleen_low', 'spleen_high']
        print(f'output nodes: {self.output["nodes"]}')
        print(f'organs: {self.output["organs_weights"]}')
        print(f'output columns: {self.output["columns_weights"]}')
        
gc.collect()
    
cfg = CFG()
cfg.data_path['masks'] = '/kaggle/input/rsna-masks'

np.set_printoptions(precision=2,suppress=True)

In [None]:
BASE_PATH = "/kaggle/input/rsna-2023-abdominal-trauma-detection"
IMAGE_DIR = "/tmp/dataset/rsna-atd"
STRIDE = 1
jump_subjects = 10

# Create full patient_list database

In [None]:
patient_list_train = pd.read_parquet('/kaggle/input/patient-list-train/patient_list_train.parquet')
patient_list_train = patient_list_train.iloc[::cfg.DS_RATE, :].copy()
patient_list_train['DS_RATE'] = cfg.DS_RATE
patient_list_train['DS_RATE'] = patient_list_train['DS_RATE'].astype('category')
patient_list_train.reset_index(inplace=True, drop=True)

patient_list_train.head(5)

# find histogram of number of pixels over Z dimension per organ

In [None]:
filenames = os.listdir('/kaggle/input/rsna-2023-abdominal-trauma-detection/segmentations')
filepaths = [os.path.join('/kaggle/input/rsna-2023-abdominal-trauma-detection/segmentations', filename) for filename in filenames]

count_organs = {organ: [] for organ in cfg.segmentation_dict['segmentation_class_to_inx'].keys()}

for filepath in tqdm(filepaths[::5]):
    img = create_3D_segmentations(filepath, downsample_rate=2)
    
    session_id = re.search(r'\d+(?=.nii)', filepath)[0]
    patient_id = str(patient_list_train.loc[patient_list_train['series_id']==int(session_id), 'patient_id'].unique()[0])
    session_path = os.path.join('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images', patient_id, session_id)
    
    if get_z_acquisition_direction(session_path) == 'upward':
        img = img[::1]
        
    for organ, index in cfg.segmentation_dict['segmentation_class_to_inx'].items():
        pix_count = (img == index).reshape(img.shape[0], -1).sum(-1).flatten()
        pix_count = ( pix_count - pix_count.min() ) / ( pix_count.max() - pix_count.min() )
        inds = np.linspace(0, pix_count.size-1, 50).astype(int)
        count_organs[organ].append( pix_count[inds] )
        
for organ, index in cfg.segmentation_dict['segmentation_class_to_inx'].items():
    count_organs[organ] = np.stack(count_organs[organ], axis=-1).mean(-1)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(12, 5))
for organ, index in cfg.segmentation_dict['segmentation_class_to_inx'].items():
    plt.plot(count_organs[organ], marker='o', label=organ)
plt.legend()

### Take patients with an injury

In [None]:
organ = 'liver'
liver_injured = patient_list_train.loc[(patient_list_train[organ+'_healthy']==False) & (patient_list_train['has_seg']==True), :]
organ_cols = [col for col in liver_injured.columns if organ in col and 'healthy' not in col]
liver_injured = liver_injured.sort_values(organ_cols, ascending=True)
liver_injured

In [None]:
num_slices = 30
num_patients = 10
patient_list = liver_injured['patient_id'].unique().tolist()
patient_list = patient_list[:num_patients]
print(patient_list)

hists = []

fig, ax = plt.subplots(num_slices, num_patients, figsize=(2*num_patients, 2*num_slices))
for h in ax.ravel():
    h.axis('off')
    
for patient_counter, patient_id in tqdm(enumerate(patient_list)):
    df = liver_injured.loc[liver_injured['patient_id']==patient_id, :]
    series_id = df['series_id'].unique()[0]
    instance_numbers = df.loc[df['series_id']==series_id, 'instance_number'].values.tolist()
    instance_numbers = [instance_numbers[int(a)] for a in np.linspace(0, len(instance_numbers)-1, num_slices)]
                              
    for instance_counter, instance_number in enumerate(instance_numbers):
        filename = os.path.join(cfg.data_path['train'], str(patient_id), str(series_id), str(instance_number)+'.dcm')
        ct = load_CT_slice( filename, downsample_rate=4)
        ax[instance_counter, patient_counter].imshow(cv2.equalizeHist((ct * 255).astype(np.uint8))/255, cmap='gray')
        
        file_path =  os.path.join(cfg.data_path['masks'], str(patient_id), str(series_id), str(instance_number)+'.png')
        mask = cv2.imread(file_path)
        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)[::4, ::4]
        mask = np.where((mask == 1) | (mask == 1), 1, np.nan)
        ax[instance_counter, patient_counter].imshow(mask, alpha=0.3, cmap='Reds',  vmin=0, vmax=1)
        
#         hists.append(cv2.equalizeHist(mask * ct))

# Create patient list for EDA

In [None]:
meta_df = pd.read_csv(f"{BASE_PATH}/train_series_meta.csv")

meta_df["dicom_folder"] = BASE_PATH + "/" + "train_images"\
                                    + "/" + meta_df.patient_id.astype(str)\
                                    + "/" + meta_df.series_id.astype(str)


test_folders = meta_df.dicom_folder.tolist()
test_paths = []
for folder in tqdm(test_folders[::jump_subjects]):
    temp = sorted(glob(os.path.join(folder, "*dcm")))[::STRIDE]
    
    # this is a corrupt file in the test set
    if '/kaggle/input/rsna-2023-abdominal-trauma-detection/test_images/3124/5842/514.dcm' in temp:
        temp.remove( '/kaggle/input/rsna-2023-abdominal-trauma-detection/test_images/3124/5842/514.dcm' )

    N = len(temp)
    if N < 3:
        test_paths += temp
    elif (N >= 3) and (N<10):
        test_paths += temp[1:-1]
    else:
        inds = list( [np.floor(N * i/20).astype(int) for i in range(1, 20)] )
        test_paths += [temp[ind] for ind in inds]
        
test_df = pd.DataFrame(test_paths, columns=["dicom_path"])
test_df["patient_id"] = test_df.dicom_path.map(lambda x: x.split("/")[-3]).astype(int)
test_df["series_id"] = test_df.dicom_path.map(lambda x: x.split("/")[-2]).astype(int)
test_df["instance_number"] = test_df.dicom_path.map(lambda x: x.split("/")[-1].replace(".dcm","")).astype(int)

test_df.head(6)

In [None]:
test_df = test_df.sort_values(['patient_id', 'series_id', 'instance_number'])
test_df = test_df.reset_index(drop=True)
test_df

# Extract brightness histogram of images

In [None]:
for i in tqdm(range(test_df.shape[0])[::10]):
    row = test_df.iloc[i,:]
    ct = load_CT_slice( row['dicom_path'], downsample_rate=8).flatten()
    plt.hist(ct, bins=20, alpha=0.1)

# extract dcm features from every slice of all patients

In [None]:
features = ['FrameOfReferenceUID', 'SliceThickness', 'KVP', 'PatientPosition', 'ImagePositionPatient', 'ImageOrientationPatient', 'PhotometricInterpretation', 'BitsStored', 'PixelRepresentation',\
            'WindowCenter', 'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'RescaleType']
patient_list = test_df['patient_id'].unique().tolist()[::1]
out = pd.DataFrame(columns=features, index=patient_list)

In [None]:
for patient in tqdm(patient_list):
    instance = test_df.loc[test_df['patient_id'] == patient, 'instance_number'].tolist()[0]
    
    # load slice
    filepath = test_df.loc[ (test_df['patient_id'] == patient) & (test_df['instance_number'] == instance), 'dicom_path'].values[0]
    ds = dcm.dcmread( filepath )

    # extract features
    for feature in features:
        try:
            this_feature = eval('ds.' + feature)
            out.loc[patient, feature] = this_feature
        except:
            out.loc[patient, feature] = 'NOT_FOUND'
            
out = out.sort_values('PatientPosition')
out

# extract range and directionality of Z direction for all subjects

In [None]:
ranges = pd.DataFrame(index=patient_list, columns=[0, 1])
    
fig, ax = plt.subplots(2, 10, figsize=(20, 5))
    
for patient_ind, patient in tqdm(enumerate(patient_list)):
    instances = test_df.loc[test_df['patient_id'] == patient, 'instance_number'].tolist()
    instances = [instances[ind] for ind in [0, -1]]
    serie_id = test_df.loc[test_df['patient_id'] == patient, 'series_id'].unique().tolist()[0]
    direction = get_z_acquisition_direction( os.path.join('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images', str(patient), str(serie_id)) )

    # load slice
    for ind, instance in enumerate(instances):
        filepath = test_df.loc[ (test_df['patient_id'] == patient) & (test_df['instance_number'] == instance), 'dicom_path'].values[0]
        ds = dcm.dcmread( filepath )
        z = ds.ImagePositionPatient[-1]

        ranges.loc[ranges.index == patient, ind] = z
        ranges.loc[ranges.index == patient, 2] = ds.PatientPosition
        if patient_ind < 10:
            ax[ind, patient_ind].imshow( load_CT_slice(filepath, downsample_rate=8) )
            ax[ind, patient_ind].axis('off')
            ax[ind, patient_ind].set_title(f'{direction}\n {ds.PatientPosition} {z:.0f}')

# histogram o range of Z

In [None]:
ranges.apply(lambda x: x[1] - x[0], axis=1).hist(bins=100)

In [None]:
fig, ax = plt.subplots(2,2,figsize=(10, 8))
for this_axis in ax.ravel():
    this_axis.set_xlim([-2000, 2000])
    
for ind in range(ranges.shape[0]):
    if ranges.iloc[ind, -1] == 'FFS':
        clr='r'
        if ranges.iloc[ind, 0] > ranges.iloc[ind, 1]:
            ax[0,0].plot(ranges.iloc[ind, :2], ind*0.5 + np.array([0, 0]), markevery=[0], marker='o', color=clr, alpha=0.2)
            
        else:
            ax[0,1].plot(ranges.iloc[ind, :2], ind*0.5 + np.array([0, 0]), markevery=[0], marker='o', color=clr, alpha=0.2)
    else:
        clr='b'
        if ranges.iloc[ind, 0] > ranges.iloc[ind, 1]:
            ax[1,0].plot(ranges.iloc[ind, :2], ind*0.5 + np.array([0, 0]), markevery=[0], marker='o', color=clr, alpha=0.2)
        else:
            ax[1,1].plot(ranges.iloc[ind, :2], ind*0.5 + np.array([0, 0]), markevery=[0], marker='o', color=clr, alpha=0.2)

# plot first slices

In [None]:
fig, ax = plt.subplots(10, 10, figsize=(20, 40))
ax = ax.ravel()

plist = [patient_list[a] for a in np.random.randint(0, len(patient_list), 100)]

for patient_ind, patient in tqdm(enumerate(plist)):
    instances = test_df.loc[test_df['patient_id'] == patient, 'instance_number'].tolist()[0]
    serie_id = test_df.loc[test_df['patient_id'] == patient, 'series_id'].unique().tolist()[0]
    direction = get_z_acquisition_direction( os.path.join('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images', str(patient), str(serie_id)) )

    # load slice
    filepath = test_df.loc[ (test_df['patient_id'] == patient) & (test_df['instance_number'] == instances), 'dicom_path'].values[0]
    ds = dcm.dcmread( filepath )
    z = ds.ImagePositionPatient[-1]

    ax[patient_ind].imshow( load_CT_slice(filepath, downsample_rate=8) )
    ax[patient_ind].axis('off')
    ax[patient_ind].set_title(f'{patient}{direction[:2]}\n {z:.0f}')

# plot some patients at their first slice

In [None]:
fig, ax = plt.subplots(10, 10, figsize=(20, 40))
ax = ax.ravel()

plist = [patient_list[a] for a in np.random.randint(0, len(patient_list), 100)]

for patient_ind, patient in tqdm(enumerate(plist)):
    instances = test_df.loc[test_df['patient_id'] == patient, 'instance_number'].tolist()[-1]
    serie_id = test_df.loc[test_df['patient_id'] == patient, 'series_id'].unique().tolist()[0]
    direction = get_z_acquisition_direction( os.path.join('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images', str(patient), str(serie_id)) )

    # load slice
    filepath = test_df.loc[ (test_df['patient_id'] == patient) & (test_df['instance_number'] == instances), 'dicom_path'].values[0]
    ds = dcm.dcmread( filepath )
    z = ds.ImagePositionPatient[-1]

    ax[patient_ind].imshow( load_CT_slice(filepath, downsample_rate=8) )
    ax[patient_ind].axis('off')
    ax[patient_ind].set_title(f'{patient}{direction[:2]}\n {z:.0f}')

# Slice thickness

In [None]:
out.SliceThickness.hist(bins=20)
plt.xlabel('in mm')

# KVP (Peak kilo voltage output of the X-Ray generator used.)

In [None]:
out.KVP.hist(bins=20)
plt.xlabel('in mm')

# PatientPosition (with respect to machine)

In [None]:
out.PatientPosition.hist(bins=20)
_ = plt.xticks(ticks=[0, 1], labels=['Head First Supine', 'Foot First Supine'])

# ImagePositionPatient (specifies the x, y, and z coordinates of the upper left hand corner of the image)

> Here we pick the slice number with the smallest value (e.g. 30.dcm) meaning that it was the first image acquired. So Z histogram shows the directionality??

In [None]:
fig = plt.figure(figsize=(15,5))
directions = {'x': 0, 'y': 1, 'z': 2}
for key, val in directions.items():
    ax = fig.add_subplot(1, 3, 1+val)
    df = pd.DataFrame( out.ImagePositionPatient.apply(lambda x: x[val]) )
    df['type'] = out['PatientPosition'].astype('category')
    
    sns.histplot(df, x='ImagePositionPatient', hue='type', binwidth=10, label=key, alpha=0.2, kde=True, linewidth=0, ax=ax)
    plt.title(key)

# ImageOrientationPatient (specifies the direction cosines of the first row and the first column with respect to the patient)

In [None]:
print( out.ImageOrientationPatient.apply(lambda x: [int(a) for a in x]).value_counts() )

print('\nThe unique values are probably false reports since they miss 1s in either first or second triple')

In [None]:
fig = plt.figure(figsize=(15,5))
directions = {'xrow': 0, 'yrow': 1, 'zrow': 2, 'xcol': 3, 'ycol': 4, 'zcol': 5}
for key, val in directions.items():
    ax = fig.add_subplot(2, 3, 1+val)
    sns.histplot(out.ImageOrientationPatient.apply(lambda x: np.array(x[val])).astype(int), alpha=0.2, ax=ax)
    ax.set_xlim([-1, 2])
    plt.title(key)

# PixelRepresentation (0000H unsigned integer. 0001H 2's complement)

In [None]:
out.PixelRepresentation.hist(bins=20)