# Model 17 Straight Net Depth

Straight residual convolutions.

How many layers deep can the network be before the GPU runs out of memory?

### Results

A 128 residual convolutions (2 layers each) deep network trained for an epoch, very slowly.  This network, with 3x3x3 convs, has a ~257 fov, possibly more than the guestimated 201 fov of u-net.

Depth affects computation and training speed but getting 128 deep (on 128x128x128 crops) is fine in terms of GPU memory. Tesla K80 11GB.


### To Try

- Use small patches, small batches, batchnorm
- making sure my loss functions work.
- Try mean squared error loss or weighted binary cross entropy
- Add dilated convolution stack to end of network (small fov increase).
- Using Dropout (try 0.1)
- A shallow u-net: Pooling once and taking advantage of the smaller volume to increase channels and layers.  This would lead to a greatly increased fov.


## Imports and Constants, etc.

In [None]:
import datetime
import importlib
import keras
from keras.layers import (Dense, SimpleRNN, Input, Conv1D, 
                          LSTM, GRU, AveragePooling3D, MaxPooling3D, GlobalMaxPooling3D,
                          Conv3D, UpSampling3D, BatchNormalization, Concatenate, Add,
                          GaussianNoise, Dropout
                         )
from keras.models import Model
import nibabel as nib
import numpy as np
import pandas as pd
from pathlib import Path
import pickle
import projd
import random
import re
import scipy
import shutil
import SimpleITK # xvertseg MetaImage files
import sys
from sklearn.model_selection import train_test_split
import uuid

import matplotlib.pyplot as plt # data viz
import seaborn as sns # data viz

import imageio # display animated volumes
from IPython.display import Image # display animated volumes

from IPython.display import SVG # visualize model
from keras.utils.vis_utils import model_to_dot # visualize model

# for importing local code
src_dir = str(Path(projd.cwd_token_dir('notebooks')) / 'src') # $PROJECT_ROOT/src
if src_dir not in sys.path:
    sys.path.append(src_dir)

import util
import preprocessing
import datagen
import modelutil
import xvertseg
import augmentation
import metrics

MODEL_NAME = 'model_17'

DATA_DIR = Path('/data2').expanduser()
# DATA_DIR = Path('~/data/2018').expanduser()
# UVMMC
NORMAL_SCANS_DIR = DATA_DIR / 'uvmmc/nifti_normals'
PROJECT_DATA_DIR = DATA_DIR / 'uvm_deep_learning_project'
PP_IMG_DIR = PROJECT_DATA_DIR / 'uvmmc' / 'preprocessed' # preprocessed scans dir
PP_MD_PATH = PROJECT_DATA_DIR / 'uvmmc' / 'preprocessed_metadata.pkl'
# xVertSeg
XVERTSEG_DIR = DATA_DIR / 'xVertSeg.v1'
PP_XVERTSEG_DIR = PROJECT_DATA_DIR / 'xVertSeg.v1' / 'preprocessed' # preprocessed scans dir
PP_XVERTSEG_MD_PATH = PROJECT_DATA_DIR / 'xVertSeg.v1' / 'preprocessed_metadata.pkl'


MODELS_DIR = PROJECT_DATA_DIR / 'models'
LOG_DIR = PROJECT_DATA_DIR / 'log'
TENSORBOARD_DIR = PROJECT_DATA_DIR / 'tensorboard'
TMP_DIR = DATA_DIR / 'tmp'

for d in [DATA_DIR, NORMAL_SCANS_DIR, PROJECT_DATA_DIR, PP_IMG_DIR, MODELS_DIR, LOG_DIR, 
          TENSORBOARD_DIR, TMP_DIR, PP_MD_PATH.parent, PP_XVERTSEG_DIR, PP_XVERTSEG_MD_PATH.parent]:
    if not d.exists():
        d.mkdir(parents=True)
        
%matplotlib inline
sns.set()

# I love u autoreload!
%load_ext autoreload
%autoreload 2

## Hyperparameters

In [None]:
SEED = 25 # random seed for dataset shuffling and splitting.
VALIDATION_SPLIT = 0.2 # 3 samples for validation
TEST_SPLIT = 0.134 # 2 samples for test

BATCH_SIZE = 1
N_BATCHES = 10 # The number of batches per epoch or None
NUM_SAMPLES = 1 # Show each image num_samples times per epoch. Ignored if N_BATCHES is set.
MAX_QUEUE_SIZE = 20
EPOCHS = 30

# PATCH_SHAPE = (32, 32, 32)
# PATCH_SHAPE = (64, 64, 64) # Used to crop images for training (data augmentation, memory, speed)
PATCH_SHAPE = (128, 128, 128) # Big.  Good for visualization.
# PATCH_SHAPE = None # Full sized images

# INPUT_SHAPE = (PATCH_SHAPE + (1,)) # Model input shape adds channel dimension, but not examples dim.
INPUT_SHAPE = (None, None, None, 1) # Accept variable size volumes/images.

BINARY_MASK_THRESH = 0.5 # > threshold = 1. <= thresh = 0.

TRANSPOSE = False
FLIP = 0.5
GRAY_STD = 0.01

# Visualize model using the first set of hyperparams
# KERNEL_SIZE = (7, 7, 7)
# KERNEL_SIZE = (5, 5, 5)
KERNEL_SIZE = (3, 3, 3)
# n_a = 2, n_r = 8.  NaN loss.  Why?
# straight net: (128, 128, 128) patch 4 channels * 32 residual layers too much memory
# straight net: (128, 128, 128) patch 4 channels * 16 residual blocks ok (memory wise)
# seems near the limit of memory.
# straight net: (128, 128, 128) patch 4 channels * 24 residual blocks ok (memory wise)
N_A = 4 # number of channels # 4 and 4 works for full sized testing, I think, memory-wise.
N_B = 128 + 32 # number of blocks of residual blocks.
N_R = 1 # number of repeated layers/blocks.  33 pixel field of view after 8 5x convolutions.
DROPOUT = None # 0.1
NOISE = None # 0.0001

W0 = 1 # binary cross entropy weight for class 0
W1 = 100 # weight informed by the 1-to-0 ratio in the training data.


## Data Generation

In [None]:
infos_func = lambda: xvertseg.read_xvertseg_metadata(PP_XVERTSEG_MD_PATH)
train_gen, val_gen, test_gen = xvertseg.get_xvertseg_datagens(
    infos_func, seed=SEED, validation_split=VALIDATION_SPLIT, test_split=TEST_SPLIT)

train_gen.config(batch_size=BATCH_SIZE, length=N_BATCHES, crop_shape=PATCH_SHAPE, flip=FLIP, 
                 transpose=TRANSPOSE, gray_std=GRAY_STD, num_samples=NUM_SAMPLES).reindex()
val_gen.config(batch_size=BATCH_SIZE, crop_shape=PATCH_SHAPE, flip=FLIP, 
               transpose=TRANSPOSE, gray_std=GRAY_STD).reindex()
# val_gen.config(batch_size=1).reindex() # Test full image
test_gen.config(batch_size=1).reindex() # Evaluate using full image

## Build Model

In [None]:
def build_model(input_shape, n_a=4, n_b=4, n_r=4, dropout=None, noise=None, loss='binary_crossentropy', metrics=[],
                kernel_size=3):
    '''
    3D convolutional straight convolutional segmenter.
    
    dropout: proportion of activation of input to drop out. 0.0 to 1.0
    noise: std dev of noise added to input activation.
    w0: > 0.0.  A weight for this class in the binary cross entropy loss.
    w1: > 0.0.  A weight for this class in the binary cross entropy loss.

    returns: Keras model
    '''

    x_input = Input(shape=input_shape)
    x = x_input
    
    # noise regularization
    if noise: 
        x = GaussianNoise(stddev=noise)(x)

    x = Conv3D(n_a, kernel_size=kernel_size, padding='same', activation='relu')(x)

    # Dropout followed by n_r convolutions.
    for i in range(n_b):
        if dropout is not None:
            x = Dropout(rate=dropout)(x)
            
        for j in range(n_r):
            x_initial = x
            # x = Conv3D(n_a, kernel_size=kernel_size, padding='same', activation='relu')(x)
            x = Conv3D(n_a, kernel_size=kernel_size, padding='same', activation='relu')(x)
            x = Add()([x_initial, x])  
        

    y = Conv3D(1, kernel_size=(1, 1, 1), activation='sigmoid')(x)
    
    model = Model(inputs=x_input, outputs=y)
    model.compile(optimizer='adam', loss=loss, metrics=['accuracy'] + metrics)
    return model

In [None]:
# weighted_binary_crossentropy_loss = metrics.weighted_binary_crossentropy_loss_func(w0=W0, w1=W1)
dice_coefficient_loss = metrics.dice_coefficient_loss

In [None]:
model = build_model(input_shape=INPUT_SHAPE, n_a=N_A, n_b=N_B, n_r=N_R, 
                    dropout=DROPOUT, noise=NOISE, 
#                     loss='binary_crossentropy',
                    loss=dice_coefficient_loss,
                    metrics=[metrics.dice_coefficient, metrics.binary_dice_coefficient],
                    kernel_size=KERNEL_SIZE)
print(model.summary())
SVG(model_to_dot(model).create(prog='dot', format='svg'))

In [None]:
callbacks = [modelutil.get_tensorboard_callback(TENSORBOARD_DIR, MODEL_NAME),
             modelutil.get_logger_callback(LOG_DIR, MODEL_NAME),
             modelutil.get_checkpoint_callback(MODELS_DIR, MODEL_NAME),
            ]
# datagen shuffles every epoch
history = model.fit_generator(train_gen, epochs=EPOCHS, validation_data=val_gen, 
                              callbacks=callbacks, max_queue_size=MAX_QUEUE_SIZE, 
                              use_multiprocessing=False, shuffle=True)


## Experimental Notes

## Visualize Training Progress

In [None]:
# read metrics from the log file
# get latest log path
log_path = sorted(LOG_DIR.glob(f'{MODEL_NAME}*_log.csv'))[-1]
print(log_path)
log_data = pd.read_csv(log_path)

In [None]:
pd.concat([log_data[::10], log_data[-1:]]) # every 10th metric and the last one

In [None]:
# Plot Training and Validation Accuracy 
axes = plt.gca()
axes.set_ylim([0.0,1.0]) # Show results on 0..1 range
plt.plot(log_data["acc"])
plt.plot(log_data["val_acc"])
plt.legend(['Training Accuracy', "Validation Accuracy"])
plt.show()

# Plot Training and Validation Loss
plt.plot(log_data["loss"])
plt.plot(log_data["val_loss"])
plt.legend(['Training Loss', "Validation Loss"])
plt.show()

# Plot Training and Validation Dice Coefficient
plt.plot(log_data["dice_coefficient"])
plt.plot(log_data["dice_coefficient"])
plt.legend(['Training Dice Coefficient', "Validation Dice Coefficient"])
plt.show()



### Confusion Matrix Results Over Time

Visualize how the results of the model improve over time.

TODO: Why do the confusion matrices look broken for epoch 10 and 20?


In [None]:
modelutil.confusion_matrix_by_epochs(
    MODELS_DIR, MODEL_NAME, [1, 10, 20], train_gen, 
    custom_objects={'dice_coefficient_loss': dice_coefficient_loss,
                    'dice_coefficient': metrics.dice_coefficient,
                    'binary_dice_coefficient': metrics.binary_dice_coefficient})
    

### Visualize Masks by Epoch

In [None]:
epochs = [1, 10, 20]
epochs = [20]
train_gen.config(batch_size=1, length=10, num_samples=1, crop_shape=None, flip=None, transpose=None, gray_std=None)
for epoch in epochs:
    print('Epoch', epoch)
    model = modelutil.get_epoch_model(MODELS_DIR, MODEL_NAME, epoch,
                                      custom_objects={'dice_coefficient_loss': dice_coefficient_loss, 
                                                      'dice_coefficient': metrics.dice_coefficient,
                                                      'binary_dice_coefficient': metrics.binary_dice_coefficient})
    for i in range(len(train_gen)):
        print('Sequence', i)
        x, y = train_gen[i]
        print(x.shape)
        for j in range(x.shape[0]): # batch size
            print('Input')
            display(util.animate_crop(x[j, :, :, :, 0], step=20))
            print('True')
            display(util.animate_crop(y[j, :, :, :, 0], step=20))
            print('predicting...')
            y_pred = model.predict_on_batch(x)
            y_pred = y_pred > BINARY_MASK_THRESH
            print('Predicted')
            display(util.animate_crop(y_pred[j, :, :, :, 0], step=20))
            

    