In [1]:
from   ipywidgets import interactive, fixed
import matplotlib.pyplot as plt
from   model.cnn import CNN
import numpy as np
import pandas as pd
import random
from   scipy.ndimage import gaussian_filter1d
from   scipy.stats import norm
from   source.dataloader import BatchgenWrapper, TrainingDataGenerator, ValidationDataGenerator
from   source.metrics import roc_auc
from   source.preprocessing import get_preprocessed_polyp_segmentation_mask_info, train_validation_split
from   source.visualization import plotter_batch, plotter_gradcam
from   tensorflow.keras.callbacks import Callback, EarlyStopping
from   tensorflow.keras.optimizers import Adam, SGD
%config Completer.use_jedi = False

#### Settings

In [2]:
data_dir = "preprocessed-data"

# Number of trained estimators in the ensemble
n_estimators = 2

# Image properties
raw_image_size = (100,100,100)
patch_size     = (50,50,50) # Field-of-view of the network
n_channels     = 1 # 1 = CT image only, 2 = CT image + manual expert segmentation mask

# Training
batch_size = 1
train_size = 0.8 # Train-Validation Split

# Model
persistance        = False
pretrained         = True
pretrained_weights = 'weights/noseg_cnn_pretraining'

# Fix global seed (as good as possible) to ensure reproducibility of results
seed = 42
np.random.seed(seed)
random.seed(seed)

### Meta information

Load information of the CT scans from 'ct_info.csv' and get a list of the preprocessed ct scans and segmentation masks that are available in 'preprocessed-data/'.

In [3]:
df_ct_info = pd.read_csv('ct_info.csv')
df_ct_info

Unnamed: 0,patient,polyp,segmentation,histopathology,class_label,position,ct_file,segmentation_file
0,1,1,1,XXX,benign,prone,demo-data/demo_ct_001.npy,demo-data/demo_seg_001.npy
1,2,2,2,XXX,benign,prone,demo-data/demo_ct_002.npy,demo-data/demo_seg_002.npy
2,3,3,3,XXX,premalignant,prone,demo-data/demo_ct_003.npy,demo-data/demo_seg_003.npy


In [4]:
df_preprocessed_info = get_preprocessed_polyp_segmentation_mask_info(data_dir)
df_preprocessed_info

Unnamed: 0,patient,polyp,segmentation,preprocessed_ct_file,preprocessed_segmentation_file
0,1,1,1,/home/philipp/Projects/deep-learning-ct-colono...,/home/philipp/Projects/deep-learning-ct-colono...
1,2,2,2,/home/philipp/Projects/deep-learning-ct-colono...,/home/philipp/Projects/deep-learning-ct-colono...
2,3,3,3,/home/philipp/Projects/deep-learning-ct-colono...,/home/philipp/Projects/deep-learning-ct-colono...


Merge information into a single dataframe

In [5]:
df_data = df_ct_info.merge(df_preprocessed_info, how='inner', on=['patient', 'polyp', 'segmentation'])
df_data

Unnamed: 0,patient,polyp,segmentation,histopathology,class_label,position,ct_file,segmentation_file,preprocessed_ct_file,preprocessed_segmentation_file
0,1,1,1,XXX,benign,prone,demo-data/demo_ct_001.npy,demo-data/demo_seg_001.npy,/home/philipp/Projects/deep-learning-ct-colono...,/home/philipp/Projects/deep-learning-ct-colono...
1,2,2,2,XXX,benign,prone,demo-data/demo_ct_002.npy,demo-data/demo_seg_002.npy,/home/philipp/Projects/deep-learning-ct-colono...,/home/philipp/Projects/deep-learning-ct-colono...
2,3,3,3,XXX,premalignant,prone,demo-data/demo_ct_003.npy,demo-data/demo_seg_003.npy,/home/philipp/Projects/deep-learning-ct-colono...,/home/philipp/Projects/deep-learning-ct-colono...


### Test training datagenerator and augmentations (sanity check)

In [6]:
train_data_generator = BatchgenWrapper(data=df_data,
                                       batch_size=batch_size,
                                       raw_image_size=raw_image_size,
                                       patch_size=patch_size,
                                       n_channels=n_channels,
                                       one_hot=False,
                                       num_processes=1,
                                       num_cached_per_queue=1)

In [7]:
test_batch   = train_data_generator[0]
X_test_batch = test_batch[0]
y_test_batch = test_batch[1]
print('X_test_batch:', X_test_batch.shape, ', y_test_batch:', y_test_batch.shape)

X_test_batch: (1, 50, 50, 50, 1) , y_test_batch: (1,)


In [8]:
interactive(plotter_batch,
            batch        = fixed(test_batch),
            sample_nr    = (0,X_test_batch.shape[0]-1),
            channel      = (0,X_test_batch.shape[4]-1),
            slice_x      = (0,X_test_batch.shape[1]-1),
            slice_y      = (0,X_test_batch.shape[2]-1),
            slice_z      = (0,X_test_batch.shape[3]-1),
            cmap         = ["gist_yarg", "cool", "inferno", "magma", "plasma", "viridis"],
            reverse_cmap = [True, False])

interactive(children=(IntSlider(value=0, description='sample_nr', max=0), IntSlider(value=0, description='chan…

#### Model

In [9]:
model = CNN(input_shape=(50, 50, 50, n_channels), classes=1, dropout=0.1, mc=False)
model.summary()

Model: "cnn"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 50, 50, 50,  0                                            
__________________________________________________________________________________________________
res1a_branch2a (Conv3D)         (None, 25, 25, 25, 1 448         input[0][0]                      
__________________________________________________________________________________________________
bn1a_branch2a (BatchNormalizati (None, 25, 25, 25, 1 64          res1a_branch2a[0][0]             
__________________________________________________________________________________________________
activation (Activation)         (None, 25, 25, 25, 1 0           bn1a_branch2a[0][0]              
________________________________________________________________________________________________

In [10]:
if pretrained:
    # Get layer names
    layer_names = [layer.name for layer in model.layers]
    print('network layers:', layer_names)

    # Freeze layers
    for layer_name in layer_names[:9]:
        model.get_layer(layer_name).trainable = True

    # Verify trainability
    for layer in model.layers:
        print('layer:', layer.name, 'trainable:', layer.trainable)

model.compile(optimizer=SGD(lr=0.01),
             loss='binary_crossentropy',
             metrics=['accuracy', roc_auc])

if pretrained:
    model.load_weights(pretrained_weights)

initial_weights = model.get_weights()

network layers: ['input', 'res1a_branch2a', 'bn1a_branch2a', 'activation', 'res1a_branch2b', 'res1a_branch1', 'bn1a_branch2b', 'bn1a_branch1', 'add', 'activation_1', 'res1b_branch2a', 'bn1b_branch2a', 'activation_2', 'res1b_branch2b', 'bn1b_branch2b', 'add_1', 'activation_3', 'res2a_branch2a', 'bn2a_branch2a', 'activation_4', 'res2a_branch2b', 'res2a_branch1', 'bn2a_branch2b', 'bn2a_branch1', 'add_2', 'activation_5', 'res2b_branch2a', 'bn2b_branch2a', 'activation_6', 'res2b_branch2b', 'bn2b_branch2b', 'add_3', 'activation_7', 'res3a_branch2a', 'bn3a_branch2a', 'activation_8', 'res3a_branch2b', 'res3a_branch1', 'bn3a_branch2b', 'bn3a_branch1', 'add_4', 'activation_9', 'res3b_branch2a', 'bn3b_branch2a', 'activation_10', 'res3b_branch2b', 'bn3b_branch2b', 'add_5', 'activation_11', 'avg_pool', 'dropout', 'fc', 'sigmoid']
layer: input trainable: True
layer: res1a_branch2a trainable: True
layer: bn1a_branch2a trainable: True
layer: activation trainable: True
layer: res1a_branch2b trainable: 

#### Callbacks

In [11]:
cb_es = EarlyStopping(monitor='val_loss',
                      mode='min',
                      patience=4*((df_data.shape[0]*train_size)//batch_size),
                      restore_best_weights=True)

#### Training and validation

In [None]:
for est in range(n_estimators):
    
    print('\nEstimator #{:d}\n'.format(est))
    
    ##########################################
    ######### Create dataset splits ##########
    ##########################################
    
    df_data_train, df_data_valid = train_validation_split(df_data, train_size=0.5, random_state=est)
    print("\nDatasets: Train set =", df_data_train.shape[0], ", Validation set =", df_data_valid.shape[0])
    
    ###############################################################
    ######### #### MIC-DKFZ Batchgenerator (for training) #########
    ###############################################################
    
    train_data_generator = BatchgenWrapper(data=df_data_train,
                                           batch_size=batch_size,
                                           raw_image_size=raw_image_size,
                                           patch_size=patch_size,
                                           n_channels=n_channels,
                                           one_hot=False,
                                           num_processes=1,
                                           num_cached_per_queue=1)
    
    ########################################################
    ######### Keras datagenerator (for validation) #########
    ########################################################
    
    valid_data_generator = ValidationDataGenerator(data=df_data_valid,
                                                   batch_size=df_data_valid.shape[0],
                                                   patch_size=patch_size,
                                                   n_channels=n_channels,
                                                   num_threads=1,
                                                   shuffle=False)
    
    ##################################
    ######### Check batches ##########
    ##################################  
    
    train_test_batch = train_data_generator[0]
    print('\nTrain batch:', train_test_batch[0].shape)
    
    valid_test_batch = valid_data_generator[0]
    print('Valid batch:', valid_test_batch[0].shape)
    
    ##############################
    ######### CNN Model ##########
    ##############################
    
    # Set initial weights
    model.set_weights(initial_weights)
    
    #############################
    ######### Training ##########
    #############################
    
    history = model.fit(train_data_generator,
                        epochs           = 1000,
                        validation_data  = valid_data_generator,
                        verbose          = 0,
                        callbacks        = [cb_es])
    history = history.history
    
    #######################################
    ######### Plot model history ##########
    #######################################
    
    keys    = ['loss','roc_auc']
    fig, ax = plt.subplots(1, len(keys), figsize=(8*len(keys),6), num=0, clear=True)
    for i, key in enumerate(keys):
        ax[i].plot(history[key], c='orange', alpha=0.5)
        ax[i].plot(history['val_'+key], c='lightblue', alpha=0.5)
        ax[i].plot(gaussian_filter1d(history[key], 3), c='red', lw=2, label='training')
        ax[i].plot(gaussian_filter1d(history['val_'+key], 3), c='blue', lw=2, label='validation')
        ax[i].legend(fontsize=16)
        ax[i].set_xlabel('epoch', fontsize=20)
        ax[i].set_ylabel(keys[i], fontsize=20)
        ax[i].tick_params(labelsize=16)
    plt.show()
    plt.close()
    
    #########################################
    ######### Validation & Testing ##########
    #########################################
    
    # Ground truth
    y_true_post_train = np.expand_dims(np.asarray(post_train_data_generator[0][1]), -1)
    y_true_valid      = np.expand_dims(np.asarray(valid_data_generator[0][1]), -1)
    
    # Model predictions
    predictions_train = np.asarray(model.predict(post_train_data_generator))
    predictions_valid = np.asarray(model.predict(valid_data_generator))
    
    eval_train = np.concatenate([predictions_train, y_true_post_train], 1)
    eval_valid = np.concatenate([predictions_valid, y_true_valid], 1)
    
    ################################
    ######### Persistance ##########
    ################################
    
    if persistance:
        weights_file    = 'weights/ensemble/{:s}_{:s}'.format(model_name, str(run+1))
        history_file    = 'results/ensemble/history/history_{:s}_{:s}.npy'.format(model_name, str(run+1))
        train_eval_file = 'results/ensemble/train_eval_{:s}_{:s}.npy'.format(model_name, str(run+1))
        valid_eval_file = 'results/ensemble/valid_eval_{:s}_{:s}.npy'.format(model_name, str(run+1))

        print('\nSave weights:               {:s}'.format(weights_file))
        print('Save history:               {:s}'.format(history_file))
        print('Save train evaluation:      {:s}'.format(train_eval_file))
        print('Save validation evaluation: {:s}'.format(valid_eval_file))

        model.save_weights(weights_file)
        np.save(history_file,    history)
        np.save(train_eval_file, eval_train)
        np.save(valid_eval_file, eval_valid)


Estimator #0

Train/validation split...
	- Train set:      [class, n] = [['benign', 1]]
	- Validation set: [class, n] = [['premalignant', 1], ['benign', 1]]

Datasets: Train set = 1 , Validation set = 2

Train batch: (1, 50, 50, 50, 1)
Valid batch: (3, 50, 50, 50, 1)






























































































































































































































































































































































































































































































































































































































































































































































































































































































































