# Setting up :

## Importing librairies :

In [1]:
# Importing librairies unable to download directly as the internet is disabled :

import pkgutil
check_module = True if pkgutil.find_loader("hydra") else False
if not check_module:
    import subprocess    
    subprocess.run('python -m pip install --no-index --find-links=/kaggle/input/notebook-to-download-packages python-gdcm'.split())
    subprocess.run('python -m pip install --no-index --find-links=/kaggle/input/notebook-to-download-packages pylibjpeg'.split())
    subprocess.run('python -m pip install --no-index --find-links=/kaggle/input/notebook-to-download-packages pylibjpeg-libjpeg'.split())
    subprocess.run('python -m pip install --no-index --find-links=/kaggle/input/notebook-to-download-packages pydicom'.split())    
    subprocess.run('python -m pip install --no-index --find-links=/kaggle/input/notebook-to-download-packages livelossplot'.split())    
else:
    print("Environment is already setup")

del check_module

Looking in links: /kaggle/input/notebook-to-download-packages
Processing /kaggle/input/notebook-to-download-packages/python_gdcm-3.0.19-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Installing collected packages: python-gdcm
Successfully installed python-gdcm-3.0.19




Looking in links: /kaggle/input/notebook-to-download-packages
Processing /kaggle/input/notebook-to-download-packages/pylibjpeg-1.4.0-py3-none-any.whl
Installing collected packages: pylibjpeg
Successfully installed pylibjpeg-1.4.0




Looking in links: /kaggle/input/notebook-to-download-packages
Processing /kaggle/input/notebook-to-download-packages/pylibjpeg_libjpeg-1.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Installing collected packages: pylibjpeg-libjpeg
Successfully installed pylibjpeg-libjpeg-1.3.2




Looking in links: /kaggle/input/notebook-to-download-packages




Looking in links: /kaggle/input/notebook-to-download-packages
Processing /kaggle/input/notebook-to-download-packages/livelossplot-0.5.5-py3-none-any.whl
Installing collected packages: livelossplot
Successfully installed livelossplot-0.5.5




In [2]:
import os
import glob
import gc

import numpy as np
import pandas as pd
from scipy import ndimage

from joblib import Parallel, delayed
from tqdm.notebook import tqdm

import pydicom as dicom
import gdcm

import tensorflow as tf
from tensorflow import keras
from livelossplot import PlotLossesKeras

## Defining useful features and pathes :

In [3]:
## Defining each useful path :

# Where the original images are :
TRAIN_IMAGES_PATH = '/kaggle/input/rsna-2022-cervical-spine-fracture-detection/train_images/'
TEST_IMAGES_PATH = '/kaggle/input/rsna-2022-cervical-spine-fracture-detection/test_images/'

# Pathes of the dataframes :
TRAIN_CSV_PATH = '/kaggle/input/rsna-2022-cervical-spine-fracture-detection/train.csv'
TEST_CSV_PATH = '/kaggle/input/rsna-2022-cervical-spine-fracture-detection/test.csv'

# Where we are going to save the preprocessed data :
TEST_OUTPUT_PATH = './test_arrays/'
if not os.path.exists(TEST_OUTPUT_PATH): os.mkdir(TEST_OUTPUT_PATH)  
    
# Where the trained model is :
MODEL_PATH = "/kaggle/input/3d-conv-training-phase"

In [4]:
## Defining useful features :

# Defining desired width and depth of the output arrays :

desired_width = 64
desired_height = 64
desired_depth = 64

# Putting images paths into lists : 

train_images = os.listdir(TRAIN_IMAGES_PATH)
test_images = os.listdir(TEST_IMAGES_PATH)

# Reading dataframes :

train_df = pd.read_csv(TRAIN_CSV_PATH)
test_df = pd.read_csv(TEST_CSV_PATH)


# Loading and transforming test images into 3D volumes :

In [5]:
### Creating a function to load dicom files, even if compressed :

def load_dicom(path: str):
    """Load a dicom file (.dcm) even if it is compressed."""
    try:
        file_as_array = dicom.dcmread(path).pixel_array
    except:
        decompressed_file = gdcm.ImageReader().SetFileName(path).Read()
        file_as_array = decompressed_file.pixel_array
    return(file_as_array)

In [6]:
# Creating the preprocessing function including loading dicom normalization and resize :

def preprocessing_slice(slice_path: str):
    """Load dicom of a slice, normalize and resize it"""
    # Loading dicom :
    slice_array = load_dicom(slice_path)
    slice_array = slice_array.astype(np.uint8)
    
    # Normalization :
    slice_array = slice_array - np.min(slice_array)
    if np.max(slice_array) != 0:
        slice_array = slice_array / np.max(slice_array)
    slice_array = (slice_array * 255).astype(np.uint8)
        
    # Resize (2D) :  
    width_factor = desired_width / slice_array.shape[0]
    height_factor = desired_height / slice_array.shape[1]
    
    slice_array = ndimage.zoom(slice_array, (width_factor, height_factor), order=3) # resize with spline interpolation of order 3
    
    return(slice_array)

In [7]:
def resize_depth(numpy_volume: np.array, desired_depth=desired_depth):
    """Resize across z-axis"""
    ## Get current depth
    current_depth = numpy_volume.shape[0]
    ## Compute depth factor
    depth_factor = desired_depth / current_depth
    
    ## Resize across z-axis
    # Rotate
    numpy_volume = ndimage.rotate(numpy_volume, 90, reshape=False)
    # Resize
    volume = ndimage.zoom(numpy_volume, (depth_factor, 1, 1), order=1) # resize with spline interpolation of order 1
    return volume

In [8]:
# Creating a function to parallelize most of the charge of loading the dicom files :

def load_and_stack_dicom_parallel(scan_path: str):
    """Load all dicom files from a scan and stack them all in a numpy array"""
    # Defining slice paths :
    slice_paths = sorted(glob.glob(os.path.join(scan_path, "*")),
                         key=lambda x: int(x.split('/')[-1].split(".")[0]))
    
    # Preprocessing slices :
    images = Parallel(n_jobs=-1)(delayed(preprocessing_slice)(filename) for filename in slice_paths)
    
    # Returning stacked slices as a resized on depth (3rd dimension) volume :
    return(tf.expand_dims(resize_depth(np.array(images)), axis=3))

In [9]:
# Function to create and save the 3D volumes corresponding to a scan :

def save_3D_arrays(scan_path: str, output_path: str):
    """Create and save the 3D arrays corresponding to a scan"""
    
    # Preprocessing and creation :
    volume = load_and_stack_dicom_parallel(scan_path=scan_path)
    
    # Saving the numpy array :
    volume_file_name = output_path + scan_path.split('/')[-1] + '.npy'
    np.save(volume_file_name, volume)
    
    # Deleting in memory :
    del volume
    
    return None

In [10]:
# Creation of the preprocessed array volumes :

for i in tqdm(range(len(test_images))):
    case_path = TEST_IMAGES_PATH + test_images[i]
    save_3D_arrays(case_path, TEST_OUTPUT_PATH)


# Free up memory :

gc.collect()

  0%|          | 0/3 [00:00<?, ?it/s]

2022-10-23 12:26:38.609885: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-23 12:26:38.778681: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-23 12:26:38.779491: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-23 12:26:38.781780: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

1161

# Inferences : Loading the trained model and making predictions :

In [11]:
# Loading the trained model :

model = keras.models.load_model(os.path.join(MODEL_PATH,"InceptionV3-a-64x64x64"))

In [12]:
# Testing its predictions using a bacth sample :

#sample_indexes = np.random.randint(0, len(val_IDs)-1, 40) # getting random indexes to take samples from the validation set
batch_x = np.empty((len(test_images), 64, 64, 64, 1))
for i_slice in range(0, len(test_images)):
    batch_x[i_slice,] = np.load(os.path.join(TEST_OUTPUT_PATH, os.listdir(TEST_OUTPUT_PATH)[i_slice]))

predictions = model.predict(batch_x)
print(predictions)

2022-10-23 12:27:16.555586: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-10-23 12:27:18.571683: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


[[0.46677107 0.07674474 0.13378428 0.02854701 0.07287808 0.0864919
  0.13052486 0.19334179]
 [0.4667692  0.07673637 0.13377388 0.02854234 0.07286994 0.08648307
  0.1305145  0.19333133]
 [0.4667689  0.07673476 0.13377188 0.02854146 0.0728684  0.08648145
  0.13051249 0.19332929]]


# Creating submission :

In [13]:
# Putting predictions outputs in the right order for the submission file ('patient_overall' at the end of each sequence) :

submission = predictions.copy()

for sequence_index in range(submission.shape[0]):
    submission[sequence_index] = np.concatenate((submission[sequence_index][1:], [submission[sequence_index][0]]))
    
submission

array([[0.07674474, 0.13378428, 0.02854701, 0.07287808, 0.0864919 ,
        0.13052486, 0.19334179, 0.46677107],
       [0.07673637, 0.13377388, 0.02854234, 0.07286994, 0.08648307,
        0.1305145 , 0.19333133, 0.4667692 ],
       [0.07673476, 0.13377188, 0.02854146, 0.0728684 , 0.08648145,
        0.13051249, 0.19332929, 0.4667689 ]], dtype=float32)

In [14]:
## Creating dataframe :

# Putting every predictions output as a single vector (corresponding to the column 'fractured' in the dataframe) :

fractured_col_values = np.concatenate(submission.copy())
    
# Creating the 'row_id' column values :

row_id_values = []
for ID in test_images:
    row_id_values.append(ID+'_C1')
    row_id_values.append(ID+'_C2')
    row_id_values.append(ID+'_C3')
    row_id_values.append(ID+'_C4')
    row_id_values.append(ID+'_C5')
    row_id_values.append(ID+'_C6')
    row_id_values.append(ID+'_C7')
    row_id_values.append(ID+'_patient_overall')
    
# Creating the dataframe :

submissions_df = pd.DataFrame({"row_id": row_id_values, "fractured": fractured_col_values})

submissions_df

Unnamed: 0,row_id,fractured
0,1.2.826.0.1.3680043.22327_C1,0.076745
1,1.2.826.0.1.3680043.22327_C2,0.133784
2,1.2.826.0.1.3680043.22327_C3,0.028547
3,1.2.826.0.1.3680043.22327_C4,0.072878
4,1.2.826.0.1.3680043.22327_C5,0.086492
5,1.2.826.0.1.3680043.22327_C6,0.130525
6,1.2.826.0.1.3680043.22327_C7,0.193342
7,1.2.826.0.1.3680043.22327_patient_overall,0.466771
8,1.2.826.0.1.3680043.25399_C1,0.076736
9,1.2.826.0.1.3680043.25399_C2,0.133774


In [15]:
# Uploading submission csv file :

submissions_df.to_csv("submission.csv", index=False)