In [4]:
from classification_utils import *
torch.manual_seed(1000)
import datetime
from datetime import datetime

# import botorch
from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook.plotting import render
from ax.utils.tutorials.cnn_utils import train #, evaluate

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.resnet_pretrained = resnet50(pretrained=True)
        self.resnet_pretrained.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3),
                              stride=(2, 2),padding=(3, 3), bias=False)
        
        self.fc1 = nn.Linear(self.resnet_pretrained.fc.out_features, 256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256,128)
        self.fc3 = nn.Linear(128, 1)  # output size changed to 2
        self.dropout = nn.Dropout(p=0.15)
    def forward(self, image):
        img_features = self.resnet_pretrained(image)
        img_features = torch.flatten(img_features, 1)
        img_features = self.fc1(img_features)
        x = self.relu(img_features)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [6]:
batch_size = 32
data_pos = create_dataset(datadir= '/hpc/home/srs108/thilafushi/none_sorted_images/positive')
data_neg = create_dataset(datadir= '/hpc/home/srs108/thilafushi/none_sorted_images/negative')
data_all =  torch.utils.data.ConcatDataset([data_pos, data_neg])

# # Initialize data loaders
train_data, test_data = train_test_split(data_all, test_size=0.2, random_state=42)

train_dl = DataLoader(train_data, batch_size=batch_size, num_workers=4, pin_memory=True)
val_dl = DataLoader(test_data, batch_size=batch_size)

In [7]:
#function that returns a trained model. 
def net_train(net, train_loader, parameters, dtype, device):
    net.to(device=device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer  = optim.SGD(net.parameters(),
                     lr= parameters.get("lr",1e-6),
                     momentum= parameters.get("momentum",0.9))
    num_epochs = parameters.get("num_epochs", 100)
    train_ious = []
    for _ in range(num_epochs):
        running_acc = 0
        for i, batch in enumerate(train_loader):
            inputs = batch['img'].float().to(device=device)
            labels = batch['lbl'].float().to(device=device)
            optimizer.zero_grad()
            output = net(inputs)
            
            output_binary = np.zeros(output.shape)
            output_binary[output.cpu().detach().numpy() >= 0] = 1
            
            loss_epoch = loss(output, labels.reshape(-1, 1))
            loss.backward()
            optimizer.step()
    return net

In [8]:
#initialize model and return network ready to train.
def init_net(parameterization):
    model = CNN()
    return model

In [9]:
def evaluate(net, data_loader, dtype, device):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i, batch in enumerate(data_loader):
            inputs = batch['img'].float().to(device=device)
            labels = batch['lbl'].float().to(device=device)
            outputs = net(inputs)

            output_binary = np.zeros(outputs.shape)
            output_binary[outputs.cpu().detach().numpy() >= 0] = 1
            
            total += labels.size(0)
            correct += (output_binary == labels).sum().item()
    return correct / total

In [10]:
#functions that the Bayesian optimizer calls on every run.
def train_evaluate(parameterization):
    train_loader = DataLoader(train_data,
                             batch_size = parameterization.get("batchsize", 32),
                             shuffle=True)
    
    untrained_net = init_net(parameterization)
    
    trained_net = net_train(net=untrained_net,
                            train_loader=train_dl,
                            parameters=parameterization,
                            dtype=dtype,
                            device=device)
    testloader = DataLoader(test_data, batch_size=32,
                                         shuffle=False)
    return evaluate(
        net=trained_net,
        data_loader=testloader,
        dtype=dtype,
        device=device
    )

In [8]:
#optimization. Specify hyperparameters 
start = time.time()
dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "lr", "type":"range", "bounds":[1e-9, 1e-3]},
        {"name": "batchsize", "type": "range", "bounds": [16, 128]},
        {"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
        {"name": "num_epochs", "type":"range", "bounds": [1, 500]},
    ],
    evaluation_function = train_evaluate,
    objective_name='accuracy',
)
print(best_parameters)
means, covariances = values
print(means)
print(covariances)
print(f"Training time: {(time.time() - start)/3600}h")

[INFO 03-03 11:34:58] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter lr. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 03-03 11:34:58] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter batchsize. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 03-03 11:34:58] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter momentum. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 03-03 11:34:58] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter num_epochs. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter d

[INFO 03-03 11:36:42] ax.service.managed_loop: Running optimization trial 1...
[ERROR 03-03 11:36:49] ax.service.managed_loop: Encountered exception during optimization: 
Traceback (most recent call last):
  File "/hpc/home/srs108/.local/lib/python3.7/site-packages/ax/utils/common/executils.py", line 169, in handle_exceptions_in_retries
    yield  # Perform action within the context manager.
  File "/hpc/home/srs108/.local/lib/python3.7/site-packages/ax/utils/common/executils.py", line 147, in actual_wrapper
    return func(*args, **kwargs)
  File "/hpc/home/srs108/.local/lib/python3.7/site-packages/ax/service/managed_loop.py", line 206, in run_trial
    for arm, weight in self._get_weights_by_arm(trial)
  File "/hpc/home/srs108/.local/lib/python3.7/site-packages/ax/service/managed_loop.py", line 206, in <dictcomp>
    for arm, weight in self._get_weights_by_arm(trial)
  File "/hpc/home/srs108/.local/lib/python3.7/site-packages/ax/service/managed_loop.py", line 144, in _call_evaluation

ValueError: Cannot identify best point if experiment contains no data.