In [1]:
from thil_utils import *
torch.manual_seed(1000)
import datetime
from datetime import datetime

from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook.plotting import render
from ax.utils.tutorials.cnn_utils import train

Using cuda device
Figure directory exists.
Model saving directory exists.


In [2]:
p = create_dataset('kappa_data.pkl',
                   apply_transforms=True)
n = create_dataset_n(img_dir = '/hpc/home/srs108/thilafushi/none_sorted_images/negative',
                    apply_transforms=True)

data_train =  torch.utils.data.ConcatDataset([p, n])
train_data, val_data = train_test_split(data_train, test_size=0.2, random_state=42)

train_dl = DataLoader(train_data, batch_size=parameters.get("pos_weight", 13),shuffle=True)
val_dl = DataLoader(val_data, batch_size = 16, shuffle=False, drop_last=True)

In [4]:
# def net_train(model, dataloader, loss, opt, epoch, device):
#     model.to(device=device)
#     model.train()
#     train_ious = []
#     running_loss = 0
#     running_acc = 0
#     iou_metric = torchmetrics.IoU(num_classes=2).to(device)
    
#     for i, batch in enumerate(dataloader):
#         x = batch['img'].float().to(device=device)
#         y = batch['fpt'].unsqueeze(dim=1).to(device=device).long()   
#         output = model(x)

#         output_binary = torch.zeros(output.shape).to(device)
#         output_binary[output >= 0.5] = 1

#         iou_score = iou_metric(output_binary.float(), y)
#         train_ious.append(iou_score.item())
        
#         #Accuracy
#         acc_epoch = pixel_accuracy(output, y)
#         running_acc += acc_epoch.item()
        
#         #Binary Cross Entropy Loss
#         loss_epoch = loss(output, y.float())
#         running_loss += loss_epoch.item()

#         opt.zero_grad()
#         loss_epoch.backward()
#         opt.step()

#     return model, np.average(train_ious)


In [8]:
#function that returns a trained model. 
def net_train(net, train_loader, parameters, dtype, device):
    net.to(device=device)
    net.train()
    iou_metric = torchmetrics.IoU(num_classes=2).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight= torch.tensor(parameters.get("pos_weight", 13)))
    optimizer  = optim.SGD(net.parameters(),
                     lr= parameters.get("lr",1e-6),
                     momentum= parameters.get("momentum",0.9))

    num_epochs = parameters.get("num_epochs", 100)
#     train_ious = []
    for _ in range(num_epochs):
        train_ious = []
        running_loss = 0
        for i, batch in enumerate(train_loader):
            x = batch['img'].float().to(device=device)
            y = batch['fpt'].unsqueeze(dim=1).to(device=device).long()   

            optimizer.zero_grad()
            output = net(x)
            
            output_binary = torch.zeros(output.shape).to(device)
            output_binary[output >= 0.5] = 1
            
            iou_score = iou_metric(output_binary.float(), y)
            train_ious.append(iou_score.item())

            loss = criterion(output, y.float())
            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            
    return net, np.average(train_ious)

In [9]:
#initialize model and return network ready to train.
def init_net(parameterization):
    model = smp.Unet(encoder_name = 'resnet18', encoder_weights = 'imagenet', in_channels = 3,classes = 1, activation='sigmoid')
    return model

In [10]:
def evaluate_(net, data_loader, dtype, device):
    net.eval()
    test_ious = []
    iou_metric = torchmetrics.IoU(num_classes=2).to(device)

    with torch.no_grad():
        for i, batch in enumerate(data_loader):
            x = batch['img'].to(device=device).float()
            y = batch['fpt'].unsqueeze(dim=1).to(device=device).long()
            output = net(x)
            
            output_binary = torch.zeros(output.shape).to(device)
            output_binary[output >= 0.5] = 1
            
            iou_score = iou_metric(output_binary.float(), y)
            test_ious.append(iou_score.item())

    return np.average(test_ious)

In [11]:
#functions that the Bayesian optimizer calls on every run.
def train_evaluate(parameterization):
    train_loader = DataLoader(data_train,
                             batch_size = parameterization.get("batchsize", 32),
                             shuffle=True)
    
    untrained_net = init_net(parameterization)
    
    trained_net, train_iou = net_train(net=untrained_net,
                                    train_loader=train_dl,
                                    parameters=parameterization,
                                    dtype=dtype,
                                    device=device)
    
    return evaluate_(
            net=trained_net,
            data_loader = val_dl,
            dtype=dtype,
            device=device)

In [None]:
#optimization. Specify hyperparameters 
# start = time.time()
dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "lr", "type":"range", "bounds":[1e-9, 1e-3]},
        {"name": "batchsize", "type": "range", "bounds": [16, 128]},
        {"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
        {"name": "num_epochs", "type":"range", "bounds": [1, 300]},
        {"name": "pos_weight", "type":"range", "bounds": [1, 30]},
    ],
    evaluation_function = train_evaluate,
    objective_name='iou',
)
print(best_parameters)
means, covariances = values
print(means)
print(covariances)
# print(f"Training time: {(time.time() - start)/3600}h")

[INFO 06-03 18:01:45] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter lr. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 06-03 18:01:45] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter batchsize. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 06-03 18:01:45] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter momentum. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 06-03 18:01:45] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter num_epochs. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter d

In [26]:

best_objectives = np.array([[trial.objective_mean*100 for trial in experiment.trials.values()]])

best_objective_plot = optimization_trace_single_method(
    y=np.maximum.accumulate(best_objectives, axis=1),
    title="Model performance vs. # of iterations",
    ylabel="IoU, %",
)
render(best_objective_plot)

render(plot_contour(model=model, param_x='pos_weight', param_y='lr', metric_name='iou'))

In [27]:
render(plot_contour(model=model, param_x='pos_weight', param_y='num_epochs', metric_name='iou'))

In [28]:
render(plot_contour(model=model, param_x='pos_weight', param_y='batchsize', metric_name='iou'))

In [29]:
render(plot_contour(model=model, param_x='pos_weight', param_y='lr', metric_name='iou'))