# Notes / TODO  

- Random seeding  
- Preproc transforms
- augmentations
   - no augmentation for validation
- Train-Validation-Test data
    - split train data 40 - 5
    - infer all test data save results
    - visualize results
- 
    
- Train workflow
    - lr find
    - more epochs ?

In [1]:
from eisen.datasets import ABCsDataset
from eisen.models.segmentation import VNet
from eisen.io import LoadITKFromFilename
from eisen.transforms import (
    ResampleITKVolumes,
    ITKToNumpy,
    CropCenteredSubVolumes,
    AddChannelDimension,
    MapValues,
    FixedMeanStdNormalization,
    LabelMapToOneHot,
    StackImagesChannelwise,
    FilterFields
)

In [2]:
from eisen.ops.losses import DiceLoss
from eisen.ops.metrics import DiceMetric
from eisen.utils import EisenModuleWrapper, EisenTransformWrapper
from eisen.utils.workflows import Training

from eisen.utils.logging import LoggingHook
from eisen.utils.logging import TensorboardSummaryHook
from eisen.utils.artifacts import SaveTorchModelHook

from torchvision.transforms import Compose
from torch.utils.data import DataLoader
from torch.optim import Adam

In [3]:
from datetime import datetime
import os
now = datetime.now() # current date and time
date_time = now.strftime("%d-%m-%Y_%H:%M:%S")

# Defining some constants
PATH_DATA = 'Data/ABCs_training_data/'  # path of data as unpacked from the challenge files
PATH_ARTIFACTS = f'./results/{date_time}'  # path for model results

if not os.path.exists(PATH_ARTIFACTS):
    os.system('mkdir' + " " + PATH_ARTIFACTS)

TASK = 'task1'
#TASK = 'task2'

NUM_EPOCHS = 100
BATCH_SIZE = 2

VOLUMES_RESOLUTION = [2, 2, 1.5]
VOLUMES_PIXEL_SIZE = [128, 128, 128]

In [4]:
if TASK == 'task1':
    n_out_chan = 5
    label_field = 'label_task1'
else:
    n_out_chan = 10
    label_field = 'label_task2'

# Preprocesses

In [5]:
"""
Define Readers and Transforms
In order to load data and prepare it for being used by the network, we need to operate 
I/O operations and define transforms to standardize data.
You can add transforms or change the existing ones by editing this
"""

# readers: for images and labels
read_tform = LoadITKFromFilename(['ct', 't1', 't2', label_field], PATH_DATA)

# image manipulation transforms

resample_tform_img = ResampleITKVolumes(
    ['ct', 't1', 't2'],
    VOLUMES_RESOLUTION,
    'linear'
)

resample_tform_lbl = ResampleITKVolumes(
    [label_field],
    VOLUMES_RESOLUTION,
    'nearest'
)

to_numpy_tform = ITKToNumpy(['ct', 't1', 't2', label_field])

crop = CropCenteredSubVolumes(fields=['ct', 't1', 't2', label_field], size=VOLUMES_PIXEL_SIZE)

map_intensities = MapValues(['t1', 't2'], min_value=0.0, max_value=1.0)

normalize_ct = FixedMeanStdNormalization(['ct'], mean=208.0, std=388.0)

if TASK == 'task1':
    map_labels = LabelMapToOneHot([label_field], [1, 2, 3, 4, 5])
else:
    map_labels = LabelMapToOneHot([label_field], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

stack_modalities = StackImagesChannelwise(['ct', 't1', 't2'], 'image')
preserve_only_fields = FilterFields(['image', label_field])

In [6]:
# create a transform to manipulate and load data
train_tform = Compose([
    read_tform,
    resample_tform_img,
    resample_tform_lbl,
    to_numpy_tform,
    crop,
    map_intensities,
    normalize_ct,
    map_labels,

])

# Custom Augmentations  
- random op needs to be constant among applied fields
- write a new wrapper for controllig random tfms
- rewrite transforms according to the source code from torchio
    - https://torchio.readthedocs.io/transforms/augmentation.html

In [7]:
import torchio
from torchio import Subject, ScalarImage, LabelMap

If you use TorchIO for your research, please cite the following paper:
Pérez-García et al., TorchIO: a Python library for efficient loading,
preprocessing, augmentation and patch-based sampling of medical images
in deep learning. Link: https://arxiv.org/abs/2003.04696



In [8]:
from torchio.transforms import (
    RandomFlip,
    RandomElasticDeformation,
    RandomAffine,
    RandomMotion,    
    RandomBiasField,
    RandomNoise,
    OneOf,
    
    RandomBlur,
    RandomSpike,
    RandomGhosting,
)

In [9]:
class EisenTransformWrapper:
    
    def __init__(self, transform, fields, label):
        super(EisenTransformWrapper, self).__init__()
        self.fields = fields
        self.transform = transform
        self.label = label

    def __call__(self, data):
        
        subject = Subject(
        ct=ScalarImage(tensor=data['ct']),  # this class is new
        t1=ScalarImage(tensor=data['t1']),
        t2=ScalarImage(tensor=data['t2']),
        label=LabelMap(tensor=data[self.label]),
        )
        
        transformed = self.transform(subject)
        
        data['ct'] = transformed['ct'].numpy()
        data['t1'] = transformed['t1'].numpy()
        data['t2'] = transformed['t2'].numpy()
        data[self.label] = transformed[self.label].numpy()
        
        return data

In [10]:
R_noise = EisenTransformWrapper(RandomNoise(), fields=['ct', 't1', 't2', label_field], label=label_field)
R_motion = EisenTransformWrapper(RandomMotion(), fields=['ct', 't1', 't2', label_field], label=label_field)
R_bias = EisenTransformWrapper(RandomBiasField(), fields=['ct', 't1', 't2', label_field], label=label_field)
R_flip = EisenTransformWrapper(RandomFlip(axes=(0,)), fields=['ct', 't1', 't2', label_field], label=label_field)
R_deform = EisenTransformWrapper(OneOf({RandomAffine(): 0.8, RandomElasticDeformation(): 0.2,}), 
                            fields=['ct', 't1', 't2', label_field], label=label_field)

In [11]:
# create a transform to manipulate and load data
train_tform = Compose([
    read_tform,
    resample_tform_img,
    resample_tform_lbl,
    to_numpy_tform,
    crop,
    map_intensities,
    normalize_ct,
    map_labels,
    R_noise,
    R_motion,
    R_bias,
    R_flip,
    R_deform,
    stack_modalities,
    preserve_only_fields
])

In [12]:
val_tform = Compose([
    read_tform,
    resample_tform_img,
    resample_tform_lbl,
    to_numpy_tform,
    crop,
    map_intensities,
    normalize_ct,
    map_labels,
    stack_modalities,
    preserve_only_fields
])

# Train-Val Split

In [13]:
# create a dataset from the training set of the ABC dataset
dataset = ABCsDataset(
    PATH_DATA,
    training=True,
    flat_dir_structure=True,  # check documentation
    transform=val_tform
)
len(dataset)

45

In [14]:
from torch.utils.data import random_split as random_split
import numpy as np
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_set , val_set = random_split(dataset, [train_size, test_size])
train_set.transform = train_tform

In [15]:
print('train_set', len(train_set))
print('val_set', len(val_set))

train_set 36
val_set 9


In [16]:
train_loader = DataLoader(train_set, shuffle=True, batch_size=2, num_workers=6)
val_loader = DataLoader(val_set, shuffle=True, num_workers=6)

In [17]:
# specify model and loss (building blocks)

model = EisenModuleWrapper(
    module=VNet(input_channels=3, output_channels=n_out_chan),
    input_names=['image'],
    output_names=['predictions']
)


In [18]:
# CHANGE TASK HERE if needed!!
loss = EisenModuleWrapper(
    module=DiceLoss(dim=[2, 3, 4]),
    input_names=['predictions', label_field],
    output_names=['dice_loss']
)
# CHANGE TASK HERE if needed!!
metric = EisenModuleWrapper(
    module=DiceMetric(dim=[2, 3, 4]),
    input_names=['predictions', label_field],
    output_names=['dice_metric']
)

In [19]:
optimizer = Adam(model.parameters(), 0.001)

# Notes
    - write validation workflow
    - use this Vnet as baseline model
    - Evaluation function

In [20]:
from eisen.utils.workflows import Validation

In [21]:
# join all blocks into a workflow (training workflow)
training_workflow = Training(
      model=model,
      losses=[loss],
      data_loader=train_loader,
      optimizer=optimizer,
      metrics=[metric],
      gpu=True
)

validation_workflow = Validation(
      model=model,
      losses=[loss],
      data_loader=val_loader,
      metrics=[metric],
      gpu=True
)

In [22]:
# create Hook to monitor training and save models
training_loggin_hook = LoggingHook(training_workflow.id, f'Training', PATH_ARTIFACTS)

training_summary_hook = TensorboardSummaryHook(training_workflow.id, f'Training', PATH_ARTIFACTS)

validation_summary_hook = TensorboardSummaryHook(validation_workflow.id, f'Validation', PATH_ARTIFACTS)

validation_loggin_hook = LoggingHook(validation_workflow.id, f'Validation', PATH_ARTIFACTS)

save_model_hook = SaveTorchModelHook(training_workflow.id, f'Training', PATH_ARTIFACTS)

In [23]:
import gc, torch
torch.cuda.empty_cache()
gc.collect()

22

In [24]:
from IPython.display import clear_output

for i in range(NUM_EPOCHS):
    clear_output(wait=True)
    training_workflow.run()
    validation_workflow.run()

+---------------------+---------------+-----------------+
|        Phase        | dice_loss (L) | dice_metric (M) |
+---------------------+---------------+-----------------+
|  Training - Epoch 0 |   0.9632299   |   0.038508285   |
|  Training - Epoch 1 |   0.92070115  |     0.092662    |
|  Training - Epoch 2 |   0.8414877   |    0.17976911   |
|  Training - Epoch 3 |   0.7738415   |    0.23836473   |
|  Training - Epoch 4 |   0.6958168   |    0.27483618   |
|  Training - Epoch 5 |   0.6347068   |    0.29447976   |
|  Training - Epoch 6 |   0.59051394  |    0.30309832   |
|  Training - Epoch 7 |   0.54442376  |    0.32696402   |
|  Training - Epoch 8 |   0.50977635  |    0.4241309    |
|  Training - Epoch 9 |   0.4764076   |    0.47792646   |
| Training - Epoch 10 |   0.4384547   |    0.5102421    |
| Training - Epoch 11 |   0.40803704  |    0.5383605    |
| Training - Epoch 12 |   0.3875252   |    0.54361343   |
| Training - Epoch 13 |   0.34950283  |     0.579211    |
| Training - E

In [26]:
import torch
torch.save(model.state_dict(), 'models/vnet_valid13092020.pt')