In [1]:
import os
os.chdir('..')

# Packages

In [34]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import os
import glob
import numpy as np
from dataset import Dataset_SMP, get_preprocessing, get_validation_augmentation, get_training_augmentation
import segmentation_models_pytorch as smp
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
def loguniform(a=0, b=1):
    return np.exp(np.random.uniform(np.log(a), np.log(b)))

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
torch.backends.cudnn.deterministic = True
random.seed(123456)
torch.manual_seed(123456)
torch.cuda.manual_seed(123456)
np.random.seed(123456)

# Data

In [5]:
data_root    = "C:\\Users\\gueganj\\Desktop\\My_DataBase\\database_10k\\with_one_glasses\\"
num_epochs   = 10
input_size   = 224
size_dataset = 20

In [6]:
# path
folder_data = glob.glob(os.path.join(data_root,"images\\*.jpg"))
folder_mask = glob.glob(os.path.join(data_root,"masks\\*.jpg"))
# suffle the 2 lists the same way (to be sure)
lists_shuffled = list(zip(folder_data, folder_mask))
random.shuffle(lists_shuffled)
folder_data, folder_mask = zip(*lists_shuffled)
# split in train/test
train_size = int(0.8 * size_dataset)
valid_size = int(0.1 * size_dataset)
test_size  = int(0.1 * size_dataset)
train_image_paths = folder_data[:train_size]
train_mask_paths  = folder_mask[:train_size]
valid_image_paths = folder_data[train_size:train_size+valid_size]
valid_mask_paths  = folder_mask[train_size:train_size+valid_size]

# Full Training Function

In [7]:
def train_function(search_space, num_epochs=1, input_size=224):
    # ============ SEARCH SPACE ============
    # unload dictionnary
    batch_size   = search_space['batch_size']
    lr           = search_space['lr']
    momentum     = search_space['momentum']
    weight_decay = search_space['weight_decay']
    nesterov     = search_space['nesterov']
    # ============= MODEL =============
    model = smp.Unet(encoder_name='mobilenet_v2',  encoder_weights='imagenet', activation='sigmoid')
    model.segmentation_head[0] = nn.Conv2d(16, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
    preprocessing_fn = smp.encoders.get_preprocessing_fn('mobilenet_v2', 'imagenet')
    model.to(device)
    # ============= DATALOADER =============
    train_dataset = Dataset_SMP(train_image_paths, train_mask_paths, augmentation=get_training_augmentation(input_size,input_size), preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = Dataset_SMP(valid_image_paths, valid_mask_paths, augmentation=get_validation_augmentation(input_size,input_size), preprocessing=get_preprocessing(preprocessing_fn))
    train_loader  = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    valid_loader  = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    # ============= OPTIMIZER =============
    params_to_update = model.parameters() # change here if we want finetune only certain layer
    optimizer = optim.SGD(params_to_update, lr=lr, momentum=momentum, weight_decay=weight_decay, nesterov=nesterov)
    # ============= LOSS =============
    loss    = smp.utils.losses.DiceLoss()
    metrics = [smp.utils.metrics.IoU(threshold=0.25)]
    # ============= TRAINING =============
    # create epoch runners
    train_epoch = smp.utils.train.TrainEpoch(model, loss=loss, metrics=metrics, optimizer=optimizer, device=device, verbose=False)
    valid_epoch = smp.utils.train.ValidEpoch(model, loss=loss, metrics=metrics, device=device, verbose=False)
    # train model
    best_score = 0
    for epoch in range(0, num_epochs):
        train_logs = train_epoch.run(train_loader)
        valid_logs = valid_epoch.run(valid_loader)
        # save checkpoint with Ray Tune
        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
            ckpt_path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((model.state_dict(), optimizer.state_dict()), ckpt_path)
        tune.report(loss=valid_logs['dice_loss'], accuracy=valid_logs['iou_score'])
    print("Finished Training !")

# Configure Search space

In [75]:
# DO a sampling not a grid
search_space = {
    "lr":           tune.loguniform(1e-4, 1e-1),
    "batch_size":   tune.choice([1, 4, 8, 16, 32, 64]),
    "momentum":     tune.sample_from(lambda spec: 1-loguniform(1e-4, 1e-1)),
    "weight_decay": tune.loguniform(1e-5, 1e1),
    "nesterov":     tune.choice([True,False])
}

# Scheduler

In [76]:
# early stopping with asha
scheduler = ASHAScheduler(metric="loss", mode="min", max_t=num_epochs)

# Main

In [77]:
# Uncomment this to enable distributed execution
# import ray
# ray.init(address="auto")

In [78]:
num_samples = 10 # Number of times to sample from the hyperparameter space
result = tune.run(train_function, config=search_space)
best_trial = result.get_best_trial("loss", "min", "last")

2020-10-21 14:07:36,820	ERROR syncer.py:63 -- Log sync requires rsync to be installed.


Trial name,status,loc,batch_size,lr,momentum,nesterov,weight_decay
train_function_01973_00000,RUNNING,,4,0.00480808,0.997403,True,0.00358982


KeyboardInterrupt: 

In [None]:
print("Best trial config: {}".format(best_trial.config))
print("Best trial final validation loss: {}".format(best_trial.last_result["loss"]))
print("Best trial final validation accuracy: {}".format(best_trial.last_result["accuracy"]))

# reporter

In [None]:
reporter = CLIReporter(metric_columns=["loss", "accuracy", "training_iteration"])

In [None]:
import scipy