## DataGenerator.py

In [None]:
import numpy as np
import nibabel as nib
import keras
import cv2

class DataGenerator(keras.utils.Sequence):
    """Generates data for Keras to process.nii files"""
    def __init__(self, list_IDs, labels, max_brightness, batch_size=64, dim=(128,64,1), n_channels=1,
                 n_classes=10, shuffle=True):
        """- list_IDs should be a list of tupples, each tupples consists of (file_path, vol_num, slice_type, slice_num).
           - labels should be a dictionary, the key is a tupple of (file_path, vol_num, slice_type, slice_num), and value
            is the label.
           - max_brightness should be a dictionary, the key is tuple of (file_path and vol_num), value is max voxel brightness of the volume, 
           used for normalizaing image data in the volume.
        """
        
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.max_vox_val = max_brightness
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __normalize(self, img, file_path, vol_num):
        """Normalize slices in a volume by the vox brightness value provided in self.max_vox_val"""
        maxVal = self.max_vox_val.get((file_path,vol_num), np.amax(img))
        return img/maxVal
    
    def __resize(self, img):
        """Ensure consistent size of each slice of data"""
        return cv2.resize(img, (self.dim[0],self.dim[1]), interpolation=cv2.INTER_NEAREST)
    
    def __load_nii_slice(self, file_path, vol_num, slice_type, slice_num):
        """Load a single slice from nii file"""
        nii_file = nib.load(file_path)
        
        if slice_type == 0:  # Axial slice
            img = nii_file.dataobj[:,:,slice_num,vol_num]
        elif slice_type == 1:  # Sagittal slice
            img = nii_file.dataobj[slice_num,:,:,vol_num]
        elif slice_type == 2:  # Coronal slice
            img = nii_file.dataobj[:,slice_num,:,vol_num]
        
        normalized = self.__normalize(img, file_path, vol_num)
        
        return self.__resize(normalized)
    
    def __get_slice_label(self, file_path, vol_num, slice_type, slice_num):
        """Look for slice label given file_path, volume, slice_type, and slice_num,
        returns a default_label value if the label not found in the dictionary"""
        default_label = 0
        return self.labels.get((file_path, vol_num, slice_type, slice_num), default_label)
    
    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

#         # Generate data for standard images
#         for i, ID in enumerate(list_IDs_temp):
#             # Store sample
#             X[i,] = np.load('data/' + ID + '.npy')
#             # Store class
#             y[i] = self.labels[ID]

        # Generate data for nii slices
        for i, ID in enumerate(list_IDs_temp):
            file_path, vol_num, slice_type, slice_num = ID
            X[i,:,:,0] = self.__load_nii_slice(file_path, vol_num, slice_type, slice_num)
            y[i] = self.__get_slice_label(file_path, vol_num, slice_type, slice_num)

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

## LabelGenerator.py

In [37]:
import os
import pickle

class LabelGenerator:
    
    def __init__(self):
        folder = "../Calgary_PS_DTI_Dataset/"
        labelfname = "Bad750Volumes.csv"
        sliceStart = 96
        sliceEnd = 160
        niiFiles = list()
        sNames = dict()
        
        self.idList = list()
        self.labels = dict()
        self.maxVals = None
        
        for dirpaths, dirs, files in os.walk(folder):
            for file in files:
                if file.endswith('.nii'):
                    filePath = os.path.join(dirpaths, file)
                    niiFiles.append(filePath)
                    sEnd = file.rfind('_')
                    if sEnd == -1:
                        sEnd = len(file)-4
                    sName = file[0:sEnd]
                    sNames[filePath] = sName

        #dict of bad volumes based on scan name
        print("Getting bad volumes from csv")
        badVols = dict()
        with open(labelfname) as f:
            lines = f.readlines()
            for i in range(1, len(lines)):
                line = lines[i].split(',')
                vols = line[1].strip()
                vols = vols.split(';')
                #subtract one for 0 indexing
                vols = [int(vol)-1 for vol in vols if vol != '']
                sName = line[0].upper().strip().replace('-','')
                badVols[sName] = vols

        print("Generating slice ids and labels")
        #ID format: (filepath, volume, direction, slice number)
        
        for file in niiFiles:
            sName = sNames[file]
            for volNum in range(35):
                label = 0
                if sName in badVols:
                    if volNum in badVols[sName]:
                        label = 1
                #64 slices centered around the middle assuming size 255
                for sliceNum in range(96,160):
                    #sagittal
                    tempId = (file, volNum, 1, sliceNum)
                    self.idList.append(tempId)
                    self.labels[tempId] = label
                    #coronal
                    tempId = (file, volNum, 2, sliceNum)
                    self.idList.append(tempId)
                    self.labels[tempId] = label

        print("Getting max values from pickle file")
        
        with open ("maxVals.pickle", "rb") as f:
            self.maxVals = pickle.load(f)
        for file in niiFiles:
            for vol in range(35):
                self.maxVals[file, vol] = self.maxVals.pop((sNames[file], vol))
        print("Done")

        ##use idList, labels, and maxVals for machine learning part
    
    def get_idList(self):
        return self.idList
    def get_labels(self):
        return self.labels
    def get_maxVals(self):
        return self.maxVals

In [38]:
labelGenerator = LabelGenerator()
idList = labelGenerator.get_idList()
labels = labelGenerator.get_labels()
maxVals = labelGenerator.get_maxVals()

Getting bad volumes from csv
Generating slice ids and labels
Getting max values from pickle file
Done


In [40]:
idList[0]

('../Calgary_PS_DTI_Dataset/10001/PS14_006/b750/PS14_006_750.nii', 0, 1, 96)

In [41]:
labels[idList[0]]

0

In [42]:
maxVals[idList[0][0], 0]

7435.0

## GeneratorTest.py

In [None]:
import numpy as np
import random
import time
from keras.models import Sequential
from keras.utils import Sequence
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, ZeroPadding2D, BatchNormalization
# from DataGenerator import DataGenerator
# from LabelGenerator import LabelGenerator
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import TensorBoard

labelGenerator = LabelGenerator()
idList = labelGenerator.get_idList()
labels = labelGenerator.get_labels()
maxVals = labelGenerator.get_maxVals()

random.seed(1)
random.shuffle(idList)

train_listIDs= idList[:int(len(idList)*0.05)]
val_listIDs = idList[int(len(idList)*0.05):]

# Parameters
params = {'labels': labels,
          'max_brightness': maxVals,
          'dim': (128,128),
          'batch_size': 32,
          'n_classes': 2,
          'n_channels': 1,
          'shuffle': True}

# Generators
training_generator = DataGenerator(train_listIDs, **params)
validation_generator = DataGenerator(val_listIDs, **params)

# Design model
layer_size = 16
NAME = '{}'.format(int(time.time()))  # model name with timestamp
model = Sequential()
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
checkpoint = ModelCheckpoint('weights/{}.h5'.format(NAME), monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
callbacks = [tensorboard, checkpoint]

#### Architecture ####
model.add(Conv2D(layer_size, (3,3), padding="same", activation="relu", input_shape=(128, 128, 1)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,3)))

for _ in range(2):
    model.add(Conv2D(layer_size, (3,3), padding="same", activation="relu"))
    model.add(BatchNormalization())
    model.add(Conv2D(layer_size, (3,3), padding="same", activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.35))
    layer_size *= 2
    
model.add(Flatten())

layer_size *= 2

for _ in range(2):
    model.add(Dense(layer_size, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.6))

model.add(Dense(2))
model.add(Activation('sigmoid'))

model.compile(loss='categorical_crossentropy',
             optimizer=Adam(lr=0.008),
             metrics=['accuracy'])

# Train model on dataset
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    workers=6)