In [1]:
import sys 
sys.path.append('..')

import os
import optuna
import mlflow
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
from mlflow import pytorch
from pprint import pformat
from urllib.parse import urlparse

from Deterministic_Loss import deterministic_noisy_label_loss
from Utilis import seg_score, CustomDataset_LIDC, LIDC_collate
from Deterministic_CM import UNet_DCM
from adamW import AdamW

  
  return torch._C._cuda_getDeviceCount() > 0


In [None]:
def train(options, model, device, train_loader, optimizer, epoch, num_epochs, ramp_up):
    model.train()
    num_batches = len(train_loader)
    running_loss = 0.0
    running_iou = 0.0
    for batch_idx, (images, true_image, annots, imagename) in enumerate(train_loader):
        # zero graidents before each iteration
        optimizer.zero_grad()

        # cast numpy data into tensor float
        images = images.to(device=device, dtype=torch.float32)
        true_image = true_image.to(device=device, dtype=torch.float32)
        annots = annots.to(device=device, dtype=torch.float32)

        outputs_logits, stochastic_cm = model(images)

        # calculate loss:
        loss = deterministic_noisy_label_loss(outputs_logits, stochastic_cm, annots, epoch, num_epochs, 'lidc', ramp_up)
        # calculate the gradients:
        loss.backward()
        # update weights in model:
        optimizer.step()

        # Now outputs_logits is the noisy seg:
        b_, c_, h_, w_ = outputs_logits.size() # b: batch size, c: 
        # pred_norm_prob_noisy = nn.Softmax(dim=1)(outputs_logits)
        pred_noisy = outputs_logits.view(b_, c_, h_ * w_).permute(0, 2, 1).contiguous().view(b_ * h_ * w_, c_, 1)
        anti_corrpution_cm = stochastic_cm.view(b_, c_ ** 2, h_ * w_).permute(0, 2, 1).contiguous().view(b_ * h_ * w_, c_ * c_).view(b_ * h_ * w_, c_, c_)
        anti_corrpution_cm = torch.softmax(anti_corrpution_cm, dim=1)
        # compute the estimated annotator's segmentation probability
        outputs_clean = torch.bmm(anti_corrpution_cm, pred_noisy).view(b_ * h_ * w_, c_)
        # reshape 
        outputs_clean = outputs_clean.view(b_, h_ * w_, c_).permute(0, 2, 1).contiguous().view(b_, c_, h_, w_)

        _, train_output = torch.max(outputs_clean, dim=1)
        train_iou = seg_score(true_image.cpu().detach().numpy(), train_output.cpu().detach().numpy())
        running_loss += loss
        running_iou += train_iou

        if (batch_idx + 1) == 1:
            print('Step [{}/{}], '
                'Train loss: {:.4f}, '
                'Train dice: {:.4f},'.format(epoch + 1, num_epochs,
                                                            running_loss / (batch_idx + 1),
                                                            running_iou / (batch_idx + 1)))
    avg_loss = running_loss / num_batches
    avg_iou = running_iou / num_batches
    return avg_iou, float(avg_loss.cpu().detach().numpy())


In [None]:
def validate(model, device, test_loader, epoch, num_epochs, ramp_up):
    model.eval()
    test_loss = 0
    test_dice = 0
    # test_dice_all = []
    num_batches = len(test_loader)
    #
    for i, (v_images, v_true_image, v_annots, v_imagename) in enumerate(test_loader):
        #
        v_images = v_images.to(device=device, dtype=torch.float32)
        v_outputs_logits, cms = model(v_images)
        b, c, h, w = v_outputs_logits.size()
        v_outputs_logits = nn.Softmax(dim=1)(v_outputs_logits)
        # cms = model2(v_images)
        #
        _, v_output = torch.max(v_outputs_logits, dim=1)
        #
        v_dice_ = seg_score(v_true_image, v_output.cpu().detach().numpy())
        #
        # epoch_noisy_labels = [v_true_image.cpu().detach().numpy(), v_labels_under.cpu().detach().numpy(), v_labels_wrong.cpu().detach().numpy(), v_labels_true.cpu().detach().numpy(), v_labels_good.cpu().detach().numpy()]
        # v_ged = generalized_energy_distance(epoch_noisy_labels, v_outputs_noisy, class_no)
        test_dice += v_dice_
        # test_dice_all.append(test_dice)
        #
        loss = deterministic_noisy_label_loss(v_outputs_logits, cms, v_annots.cpu().detach(), epoch, num_epochs, data='lidc', ramp_up=ramp_up)
        test_loss += loss
        
    # print(i)
    # print(test_dice)
    # print(test_dice / (i + 1))
    #
    avg_dice = test_dice / num_batches
    avg_loss = test_loss / num_batches
    return avg_dice, float(avg_loss.cpu().detach().numpy())


# Main

In [None]:
# Get the local path of the active mlflow run to save artifacts to
def get_artifact_path(active_run):
    parsed_uri = urlparse(active_run.info.artifact_uri)
    artifact_path = os.path.abspath(os.path.join(parsed_uri.netloc, parsed_uri.path))
    return artifact_path

In [None]:
# Obtain hyperparameters for this trial
def suggest_hyperparameters(trial):
    # Obtain the learning rate on a logarithmic scale
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
    # Obtain ramp-up value
    ramp_up = trial.suggest_float("ramp_up", 0.0, 1.0, step=0.1)
    # Obtain the AdamW weight decay
    weight_d = trial.suggest_float("weight_decay", 1e-5, 1e-1, log=True)
  
    print(f"Suggested hyperparameters: \n{pformat(trial.params)}")
    # Log the obtained trial parameters using mlflow
    mlflow.log_params(trial.params)
    return lr, ramp_up, weight_d

In [None]:
def getDataLoaders(train_batchsize, validate_batchsize, data_path):
    #
    train_path = data_path + '/train'
    validate_path = data_path + '/validate'
    #
    train_dataset = CustomDataset_LIDC(dataset_location=train_path, augmentation=True)
    #
    validate_dataset = CustomDataset_LIDC(dataset_location=validate_path, augmentation=False)
    #
    trainloader = data.DataLoader(train_dataset, batch_size=train_batchsize, shuffle=True, num_workers=4, drop_last=True, collate_fn=LIDC_collate)
    #
    validateloader = data.DataLoader(validate_dataset, batch_size=validate_batchsize, shuffle=False, num_workers=4, drop_last=False, collate_fn=LIDC_collate)
    #
    return trainloader, validateloader

In [None]:
def objective(trial, experiment, options=None):
    # Initialize the best validation loss, which is the value to be minimized by the network
    best_val_loss = 0
    
    # Start mlflow run
    with mlflow.start_run(experiment_id=experiment.experiment_id):
        # Use mlflow to log experiment options
        mlflow.log_params(options)
        batch_size = 128
        num_epochs = 80

        # Get hyperparameter suggestions created by optuna
        lr, ramp_up, weight_d = suggest_hyperparameters(trial)
        
        print(f"\n**************************")

        active_run = mlflow.active_run()
        print(f"Starting run {active_run.info.run_id} and trial {trial.number}")

        # Parse the active mlflow run's artifact_uri and convert it into a system path
        parsed_uri = urlparse(active_run.info.artifact_uri)
        artifact_path = os.path.abspath(os.path.join(parsed_uri.netloc, parsed_uri.path))
        print(f"Artifact path for this run: {artifact_path}")
        
        # Use CUDA if GPU is available, else CPU
        use_cuda = options["use_cuda"] and torch.cuda.is_available()
        device = torch.device("cuda" if use_cuda else "cpu")
        # Log mlflow device parameter
        mlflow.log_param("device", device)

        # Obtain the MNIST train and validation loaders using a helper function
        train_loader, val_loader = getDataLoaders(train_batchsize=batch_size, validate_batchsize=batch_size, data_path='./LIDC_examples')
        
        # Initialize network
        model_seg = UNet_DCM(in_ch=1,
                            resolution=64,
                            width=36,
                            depth=3,
                            latent=512,
                            class_no=2,
                            norm='in').to(device)

        # Pick an optimizer based on optuna's parameter suggestion
        # if optimizer_name == "Adam":
        #     optimizer = optim.Adam(model.parameters(), lr=lr)
        # if optimizer_name == "Adadelta":
        #     optimizer = optim.Adadelta(model.parameters(), lr=lr)
        # scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
        optimizer = AdamW(model_seg.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=weight_d)
        # Network training & validation loop
        for epoch in range(0, num_epochs):
            avg_train_dice_loss, avg_loss = train(options, model_seg, device, train_loader, optimizer, epoch, num_epochs=num_epochs, ramp_up=ramp_up)
            avg_val_loss, avg_val_main_loss = validate(model_seg, device, val_loader, epoch, num_epochs, ramp_up=ramp_up)
            
            if avg_val_loss >= best_val_loss:
                best_val_loss = avg_val_loss

            # Report intermediate objective value.
            trial.report(avg_val_loss, step=epoch)
            # trial.report(avg_loss, step=epoch)

            # Handle pruning based on the intermediate value.
            if trial.should_prune():
                raise optuna.TrialPruned()

            # Log average train and test set loss for the current epoch using mlflow
            mlflow.log_metric("avg_train_dice_loss", avg_train_dice_loss, step=epoch)
            mlflow.log_metric("avg_train_main_loss", avg_loss, step=epoch)
            mlflow.log_metric("avg_val_loss", avg_val_loss, step=epoch)
            mlflow.log_metric("avg_val_main_loss", avg_val_main_loss, step=epoch)

            optimizer.step()

        # Save the final network model to the current mlflow run's directory 
        if options["save_model"]:
            pytorch.save_model(model_seg, f"{artifact_path}/LIDC_model")

    # Return the best validation loss
    return best_val_loss


In [None]:
def main():
    options = {
        "experiment_name": "54GB-RAM",
        "use_cuda": False,
        "save_model": True
    }
   
    # Create mlflow experiment if it doesn't exist already
    experiment_name = options["experiment_name"]
    experiment = mlflow.get_experiment_by_name(experiment_name)
    if experiment is None:
        mlflow.create_experiment(experiment_name)
        experiment = mlflow.get_experiment_by_name(experiment_name)
    mlflow.set_experiment(experiment_name)

    # Propagate logs to the root logger.
    optuna.logging.set_verbosity(verbosity=optuna.logging.INFO)

    # Create the optuna study which shares the experiment name
    study = optuna.create_study(study_name=experiment_name, direction="maximize")
    study.optimize(lambda trial: objective(trial, experiment, options), n_trials=30)

    # Filter optuna trials by state
    pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

    print("\n++++++++++++++++++++++++++++++++++\n")
    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Trial number: ", trial.number)
    print("  Loss (trial value): ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

In [None]:
main()