In [1]:
import numpy as np
import os
import yaml
import matplotlib.pyplot as plt
import torch
import h5py
import pathlib
import pandas as pd
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torch.nn.modules.loss import _Loss
from torch.autograd import Function, Variable
from torch.autograd import Variable
import sys
from networks.data_utils import get_imdb_data, ImdbData, RandomTransforms
from networks.relay_net import ReLayNet
from networks.net_api.losses import DiceLoss, CrossEntropyLoss2d
from solver import Solver, TrainSolver
os.environ["CUDA_VISIBLE_DEVICES"]="5"




In [2]:
def load_checkpoint(checkpoint, params):
    
    """Load checkpoint
        Args:
            checkpoint (str): location of model checkpoint
            params (dict) : dictionary of parameters from train_preclinical.yaml
            
        Returns:
            relaynet_model : model instance

    """
    relaynet_model =  torch.load(checkpoint)
    layer_counter = 0
    for (name, module) in relaynet_model.named_children():
        if 'encode' in name:
            for layer in module.children():
                for param in layer.parameters():
                    param.requires_grad = False

                print('Layer "{}" in module "{}" was frozen!'.format(layer_counter, name))
                layer_counter+=1
    params['num_channels'] = 64
    relaynet_model.classifier = nn.Conv2d(params['num_channels'], params['num_class'], params['kernel_c'], params['stride_conv']) 
    return relaynet_model

def train_only(images, labels, wmaps, dimensions, model_path, exp_dir_name, param, checkpoint = None):
    
    """carry out training
        Args:
            images (numpy array): array of images in training dataset
            labels (numpy array): array of labels in training dataset
            wmaps (numpy array): array of weighted matrix in training dataset
            dimensions (dict): contains height, width and number of layers
            model_path (str): file path to save model checkpoints
            exp_dir_name (str): name of experiment
            param (dict): dictionary of parameters from train_preclinical.yaml
            checkpoint (str or None): location of checkpoint, if any 
            
            
        Returns:
            None
            
    """
    
    random_transform = RandomTransforms(dimensions)
    train_dataset = ImdbData(config, images, labels, wmaps, dimensions, transform = random_transform)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
    device = torch.device("cuda")
    num_epochs = param['epochs']
    
    if checkpoint is None:
        print('--------Training from scratch--------')
        relaynet_model = ReLayNet(param)
        
    else:
        print('--------Loading Checkpoint from pretrained model--------')
        relaynet_model = load_checkpoint(checkpoint, param)

        
    solver = TrainSolver(device, num_class = dimensions['layers'])
    solver.train(relaynet_model, train_loader, model_path=model_path, num_epochs=num_epochs, log_nth=1,  exp_dir_name=exp_dir_name)
    
def train_and_val(train_images, train_labels, train_wmaps, \
                  val_images, val_labels, val_wmaps, \
                  dimensions, model_path, exp_dir_name, param,
                 checkpoint = None):
    
    """carry out training & validation
        Args:
            train_images (numpy array): array of images in training dataset
            train_labels (numpy array): array of labels in training dataset
            train_wmaps (numpy array): array of weighted matrix in training dataset
            val_images (numpy array): array of images in val dataset
            val_labels (numpy array): array of labels in val dataset
            val_wmaps (numpy array): array of weighted matrix in val dataset
            dimensions (dict): contains height, width and number of layers
            model_path (str): file path to save model checkpoints
            exp_dir_name (str): name of experiment
            param (dict): dictionary of parameters from train_preclinical.yaml
            checkpoint (str or None): location of checkpoint, if any 
            
            
        Returns:
            None

    """
    
    random_transform = RandomTransforms(dimensions)
    train_dataset = ImdbData(config, train_images, train_labels, train_wmaps, dimensions, transform = random_transform)
    val_dataset = ImdbData(config, val_images, val_labels, val_wmaps, dimensions)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=1)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=1)
    device = torch.device("cuda")
    num_epochs = param['epochs']
    
    if checkpoint is None:
        print('--------Training from scratch--------')
        relaynet_model = ReLayNet(param)
    else:
        print('--------Loading Checkpoint from pretrained model--------')
        relaynet_model = load_checkpoint(checkpoint, param)
        
    solver = Solver(device, num_class = dimensions['layers'])
    solver.train(relaynet_model, train_loader, val_loader, model_path=model_path, num_epochs=num_epochs, log_nth=1,  exp_dir_name=exp_dir_name)
    


In [3]:
with open( "./train_preclinical.yaml") as file:
    config = yaml.load(file, Loader=yaml.FullLoader)
exp_dir_name = config['filepaths']['exp_dir_name']
model_path = config['filepaths']['model_path']
data_dir = config['filepaths']['processed_data_path']
mode = config['filepaths']['mode']
param = config['param']
checkpoint = config['checkpoint']['choosen_checkpoint']


In [4]:
if mode == 'default' or mode == 'combine':

    train_images, train_labels, train_wmaps, val_images, val_labels, val_wmaps = get_imdb_data(data_dir)

    train_images2 = np.copy(np.expand_dims(train_images, axis = 1))
    train_labels2 = np.copy(train_labels)
    train_wmaps2 = np.copy(train_wmaps)
    val_images2 = np.copy(np.expand_dims(val_images, axis = 1))
    val_labels2 = np.copy(val_labels)
    val_wmaps2 = np.copy(val_wmaps)
      
    dimensions = {'height': train_labels2.shape[2], 'width':train_labels2.shape[3], 'layers': train_labels2.shape[1]}

    if mode == 'combine': # combine training & validation
        train_images3 = np.concatenate((train_images2, val_images2), axis=0)
        train_labels3 = np.concatenate((train_labels2, val_labels2), axis=0)
        train_wmaps3 = np.concatenate((train_wmaps2, val_wmaps2), axis=0)  
        train_only(train_images3, train_labels3, train_wmaps3, dimensions, model_path, exp_dir_name, param, checkpoint)

    elif mode == 'default':
        train_and_val(train_images2, train_labels2, train_wmaps2, \
                  val_images2, val_labels2, val_wmaps2, \
                  dimensions, model_path, exp_dir_name, param, checkpoint)
    
elif mode == 'train':
    with h5py.File(os.path.join(data_dir,'training_intermediate.hdf5'),'r') as hf: 
        train_images=hf['data'][()]
        train_labels=hf['lmap'][()]
        train_wmaps=hf['wmap'][()]
    train_images2 = np.copy(np.expand_dims(train_images, axis = 1))
    train_labels2 = np.copy(train_labels)
    train_wmaps2 = np.copy(train_wmaps)
    dimensions = {'height': train_labels2.shape[2], 'width':train_labels2.shape[3], 'layers': train_labels2.shape[1]}
    train_only(train_images2, train_labels2, train_wmaps2, dimensions, model_path, exp_dir_name, param, checkpoint)




--------Training from scratch--------




START TRAIN.


100%|█████████████████████████████████████████| 900/900 [02:47<00:00,  5.39it/s]
100%|█████████████████████████████████████████| 300/300 [00:24<00:00, 12.12it/s]

[Epoch : 0 / 6]: average train dice 0.98 	 average val dice 0.99 	 average training loss 0.02
Validation loss decreased (inf --> -0.989403).  Saving model ...
Saving model... ./models/FL_Partition_Model_2/relaynet_epoch1.model



100%|█████████████████████████████████████████| 900/900 [02:47<00:00,  5.37it/s]
100%|█████████████████████████████████████████| 300/300 [00:24<00:00, 12.16it/s]

[Epoch : 1 / 6]: average train dice 0.99 	 average val dice 0.99 	 average training loss 0.05
Validation loss decreased (-0.989403 --> -0.989647).  Saving model ...
Saving model... ./models/FL_Partition_Model_2/relaynet_epoch2.model



100%|█████████████████████████████████████████| 900/900 [02:47<00:00,  5.36it/s]
100%|█████████████████████████████████████████| 300/300 [00:24<00:00, 12.03it/s]

[Epoch : 2 / 6]: average train dice 0.99 	 average val dice 0.99 	 average training loss 0.03
EarlyStopping counter: 1 out of 2



100%|█████████████████████████████████████████| 900/900 [02:47<00:00,  5.37it/s]
100%|█████████████████████████████████████████| 300/300 [00:24<00:00, 12.02it/s]

[Epoch : 3 / 6]: average train dice 0.99 	 average val dice 0.99 	 average training loss 0.03
Validation loss decreased (-0.989647 --> -0.990527).  Saving model ...
Saving model... ./models/FL_Partition_Model_2/relaynet_epoch4.model



100%|█████████████████████████████████████████| 900/900 [02:47<00:00,  5.36it/s]
100%|█████████████████████████████████████████| 300/300 [00:25<00:00, 11.97it/s]

[Epoch : 4 / 6]: average train dice 0.99 	 average val dice 0.99 	 average training loss 0.02
EarlyStopping counter: 1 out of 2



100%|█████████████████████████████████████████| 900/900 [02:48<00:00,  5.36it/s]
100%|█████████████████████████████████████████| 300/300 [00:24<00:00, 12.08it/s]

[Epoch : 5 / 6]: average train dice 0.99 	 average val dice 0.99 	 average training loss 0.04
EarlyStopping counter: 2 out of 2
Early stopping
FINISH.



