In [5]:
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import load_img
import numpy as np
import os
from matplotlib import pyplot as plt

import nibabel as nib

In [6]:
# Formulate train, val, test file paths for "scans" and "labels"
# Stored as filepaths, as the generator will do the file reading
def get_nifti_files_in(directory):
    paths = []
    for file in os.listdir(directory):
        if file.endswith(".nii.gz"):
            paths.append(os.path.join(directory, file))
    
    # Sort so ordering is not file system order dependent
    return sorted(paths)

base_path = "data"
data_dimensions = (256, 256, 128)

# Background, Body, Bone, Bladder, Rectum, Prostate
class_count = 6

scans = get_nifti_files_in(os.path.join(base_path, "semantic_MRs_anon"))
labels = get_nifti_files_in(os.path.join(base_path, "semantic_labels_anon"))

# Make sure we have an equal number of scans and labels
assert len(scans) == len(labels)

# Zips the two "scans" and "labels" arrays together to produce [[scan_filename, label_filename], ...]
data_paths = np.dstack((scans, labels))[0]

print("Raw Labelled Scans: " + str(len(data_paths)))

Raw Labelled Scans: 211


In [7]:
# Generator:
# Maximum scan voxel value in dataset was above 512 but below 1023 so i use closest power of two -> 1023, for normalisation
class Prostate3DGenerator(keras.utils.Sequence):
    def __init__(self, data_paths, batch_size, data_dimensions, class_count):
        self.data_paths = data_paths
        self.batch_size = batch_size
        self.data_dimensions = data_dimensions
        self.class_count = class_count
        
    def __len__(self):
        return int(np.floor(len(self.data_paths) / self.batch_size))
    
    def __getitem__(self, index):
        start_index = index * self.batch_size
        batch_data_paths = self.data_paths[start_index : start_index + self.batch_size]
        
        scans = np.empty((self.batch_size, *self.data_dimensions))
        labels = np.empty((self.batch_size, *self.data_dimensions, self.class_count), dtype=int)
        
        for dataIndex in range(len(batch_data_paths)):
            # Populate "scans"
            scan_voxels = nib.load(batch_data_paths[dataIndex][0])
            scans[dataIndex] = tf.cast(np.array(scan_voxels.dataobj) / 1023.0, tf.float32)
            
            # Populate "labels"
            nibabel_voxels = nib.load(batch_data_paths[dataIndex][1])
            prepared_voxels = tf.cast(np.array(nibabel_voxels.dataobj), tf.float32)
            labels[dataIndex] = keras.utils.to_categorical(prepared_voxels, num_classes=self.class_count)
            
        return scans, labels

In [17]:
# Create Train, Val, Test split (move into utility function)
train_split = 0.85
val_split = 0.1
test_split = 0.05

assert train_split + val_split + test_split == 1.0

# Randomise input pairs array to get a random distribution in train, val, test
np.random.shuffle(data_paths)
source_data_length = len(data_paths)

# Calculate number of data pairs for each bucket (test doesn't need to be calculated)
train_count = int(np.floor(source_data_length * train_split))
val_count = int(np.floor(source_data_length * val_split))

# Slice data paths into each bucket to be fed into different generators
train_paths = data_paths[0:train_count + 1]
val_paths = data_paths[train_count + 1:train_count + 1 + val_count + 1]
test_paths = data_paths[train_count + 1 + val_count + 1:]

assert len(train_paths) + len(val_paths) + len(test_paths) == source_data_length

print("Train Count: " + str(len(train_paths)))
print("Val Count: " + str(len(val_paths)))
print("Test Count: " + str(len(test_paths)))

Train Count: 180
Val Count: 22
Test Count: 9


In [28]:
# Initialize generators
batch_size = 1

train_generator = Prostate3DGenerator(train_paths, batch_size, data_dimensions, class_count)
val_generator = Prostate3DGenerator(val_paths, batch_size, data_dimensions, class_count)
test_generator = Prostate3DGenerator(test_paths, batch_size, data_dimensions, class_count)

In [29]:
# Construct keras model
first_layer = layers.Input((*data_dimensions, 1))

# We will pass through "previous" to represent last layer
previous = first_layer

filters = [64, 128, 256]

# LEFT SIDE
downscale_layers = len(filters)
downscale_filters = filters
downscale_tails = []
for i in range(downscale_layers):
    previous = layers.Conv3D(downscale_filters[i], (3, 3, 3), padding='same', activation='relu')(previous)
    previous = layers.BatchNormalization()(previous)
    previous = layers.Conv3D(downscale_filters[i], (3, 3, 3), padding='same', activation='relu')(previous)
    
    if i != downscale_layers - 1:
        downscale_tails.append(previous)
        previous = layers.MaxPool3D((2, 2, 2), strides=(2, 2, 2))(previous)
        previous = layers.Dropout(0.2)(previous)
        
# RIGHT SIDE
# reverse references, since upscale is looped other way
downscale_tails = list(reversed(downscale_tails))

upscale_layers = len(filters) - 1
upscale_filters = list(reversed(filters))[1:]
for i in range(upscale_layers):
    # Up Convolution
    previous = layers.Conv3DTranspose(upscale_filters[i], (2, 2, 2), strides=(2, 2, 2))(previous)
    previous = layers.Dropout(0.2)(previous)
    
    # Pull across
    tail = downscale_tails[i]
    previous = layers.concatenate([previous, tail])
    
    # Convolutions
    previous = layers.Conv3D(upscale_filters[i], (3, 3, 3), padding='same', activation='relu')(previous)
    previous = layers.BatchNormalization()(previous)
    previous = layers.Conv3D(upscale_filters[i], (3, 3, 3), padding='same', activation='relu')(previous)
        
last_layer = layers.Conv3D(class_count, (1, 1, 1), activation='softmax')(previous)

model = keras.Model(first_layer, last_layer)

print(model.summary())

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 256, 256, 12 0                                            
__________________________________________________________________________________________________
conv3d_17 (Conv3D)              (None, 256, 256, 128 1792        input_4[0][0]                    
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 256, 256, 128 256         conv3d_17[0][0]                  
__________________________________________________________________________________________________
conv3d_18 (Conv3D)              (None, 256, 256, 128 110656      batch_normalization_7[0][0]      
____________________________________________________________________________________________

In [30]:
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=["accuracy"])

callbacks = [
    keras.callbacks.ModelCheckpoint("oasis.h5", save_best_only=True)
]

# Train the model, doing validation at the end of each epoch.
epochs = 15

#model.load_weights("oasis.h5")
train_data = model.fit(train_generator, epochs=epochs, validation_data=val_generator, callbacks=callbacks)

Epoch 1/15


ResourceExhaustedError:  OOM when allocating tensor with shape[1,256,256,128,64] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node model_1/batch_normalization_7/moments/SquaredDifference-0-0-TransposeNCDHWToNDHWC-LayoutOptimizer}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_6633]

Function call stack:
train_function
