# GENERATE CANDIDATES ITERATIVELY

In this notebook we will generate candidates, execute them and append them to the training data at each iteration

In [1]:
%load_ext autoreload
%autoreload 2

Imports needed:

In [2]:
import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.join(os.getcwd()), '..')))
from bayesopt_core.bayesian_handler import BayesianOptimizer
from bayesopt_core.config import OptimizationConfig
import bayesopt_core.helpers.results_to_csv as csvres
import etl.extractors.provenance_extractor as pe
import torch, torchvision, yprov4ml
import torch.optim as optim
from tqdm import tqdm
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import netCDF4 as nc

Initializations for training data retrieving and bayesian optimization configuration

In [3]:
data_needed = {
    'input': ['DROPOUT', 'BATCH_SIZE', 'LR'],
    'output': ['accuracy', 'emissions']
}
extractor = pe.ProvenanceExtractor('../test/small_prov', data_needed)
inp, out = extractor.extract_all()      # cols are parameters/metrics, rows are runs

bayesopt = BayesianOptimizer(OptimizationConfig(
    data_needed['output'],
    data_needed['input'],
    ['MAX', 'MIN'],
    n_candidates=1,
    n_restarts=10,
    raw_samples=200,
    optimizers='optimize_acqf',
    acqf='ucb',
    beta=1.5,
    verbose=True
))

data = {
    'parameters': inp,
    'metrics': out
}

Initialization of classes and functions to execute yprov4ml with the candidates

In [4]:
class Net(nn.Module):
    def __init__(self, model_size, dropout):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        def get_layer_sizes(model_size): 
            if model_size == "small": 
                return 64, 32
            elif model_size == "medium": 
                return 512, 256
            else: 
                return 1024, 256

        l1, l2 = get_layer_sizes(model_size)

        self.fc1 = nn.Linear(12544, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout1(x)
        x = self.fc3(x)
        output = F.log_softmax(x, dim=1)
        return output

def train(lr, epochs, batch_size, dropout, model_size):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)

    model = Net(model_size, dropout=dropout).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss().to(device)
    scheduler = None

    model.train()

    losses = []
    for _ in range(epochs): 
        for data in tqdm(trainloader):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            if scheduler is not None:
                scheduler.step()
    yprov4ml.log_carbon_metrics(yprov4ml.Context.TRAINING, step=0)
    return model

def emissions_(expdir_path):
    fp=f'./{expdir_path}/metrics_GR0/emissions_Context.TRAINING_GR0.nc'
    data = nc.Dataset(fp)
    return data["values"][:]

def validate(model, batch_size=128):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in tqdm(testloader):
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

def handle_modelsize(n: float | str):
    if isinstance(n, float):
        if n <= 3700506.0:      #small
            return 'small'
        elif n <= 9853386.0:    #medium
            return 'medium'
        else:                   #large
            return 'large'
    else:
        if n == 'small':
            return 824682.0
        elif n == 'medium':
            return 6576330.0
        else: 
            return 13130442.0

Setup of the iteration number and iterative execution of:
- candidate generation
- candidate execution
- candidate storing
- candidate integration in the training dataset for next iteration

NB: in this case the modelsize and epochs parameters will be set automatically to have small runs

In [5]:
n=3
for it in range(n):
    print(f'Number of training dataset runs: {len(data['parameters'])}')
    res = bayesopt.run(data) 

    exec_res = []
    for candidate in res.candidates:
        #candidate[4] = handle_modelsize(candidate[4])
        yprov4ml.start_run(
            prov_user_namespace="www.example.org",
            experiment_name=f"{round(candidate[2], 4)}_4_{int(candidate[1])}_{round(candidate[0], 2)}_small",
            provenance_save_dir="../test/prov_iterative_candidates_executed",     # change the folder in which you want to save the candidate executed
            save_after_n_logs=100,
            collect_all_processes=False, 
            disable_codecarbon=False, 
            metrics_file_type=yprov4ml.MetricsType.NETCDF,
        )

        yprov4ml.log_param("MODEL_SIZE", 'small', yprov4ml.Context.TRAINING)
        yprov4ml.log_param("DROPOUT", candidate[0], yprov4ml.Context.TRAINING)
        yprov4ml.log_param("BATCH_SIZE", candidate[1], yprov4ml.Context.TRAINING)
        yprov4ml.log_param("EPOCHS", 4, yprov4ml.Context.TRAINING)
        yprov4ml.log_param("LR", candidate[2], yprov4ml.Context.TRAINING)

        trained_model = train(candidate[2], 4, int(candidate[1]), candidate[0], 'small')
        acc = validate(trained_model, int(candidate[1]))

        yprov4ml.log_param("accuracy", acc, yprov4ml.Context.TESTING)

        yprov4ml.end_run(
            create_graph=False,
            create_svg=False,
            crate_ro_crate=False
        )

        print(f'Accuracy: {100 * acc} %')
        em = emissions_(f"../test/prov_iterative_candidates_executed/{round(candidate[2], 4)}_4_{int(candidate[1])}_{round(candidate[0], 2)}_small_0")[0]
        print(f'Emissions: {em}')

        exec_res.append([acc, em])

        data['parameters'].append([candidate[0], candidate[1], candidate[2]])
        data['metrics'].append([acc, em])

    csv_saver = csvres.CSVResults(
        {'parameters': ['DROPOUT', 'BATCH_SIZE', 'LR'], 
        'metrics': ['accuracy', 'emissions']},
        './candidates_iterative_executed.csv')
    csv_saver.log_candidates(res, bayesopt.config, exec_res)

print(f"executed {n*bayesopt.config.n_candidates} candidates")

Number of training dataset runs: 242
   -> Starting Bayesian Optimization
   -> Data transformed
   -> Bounds generated
   -> Data normalized
   -> Model trained
   -> Candidates obtained
   -> Candidates denormalized
   -> Bayesian Optimization finished, took 1.203s
┌───────────┬──────────────┬──────────┐
│   DROPOUT │   BATCH_SIZE │       LR │
├───────────┼──────────────┼──────────┤
│  0.092000 │    15.040000 │ 0.000183 │
└───────────┴──────────────┴──────────┘
   -> Estimating candidates
CANDIDATE 1
┌───────────┬──────────┬──────────┐
│ METRIC    │     MEAN │      STD │
├───────────┼──────────┼──────────┤
│ accuracy  │ 0.689782 │ 0.075515 │
├───────────┼──────────┼──────────┤
│ emissions │ 0.005612 │ 0.000293 │
└───────────┴──────────┴──────────┘ 



100%|██████████| 3334/3334 [01:20<00:00, 41.32it/s]
100%|██████████| 3334/3334 [01:19<00:00, 42.06it/s]
100%|██████████| 3334/3334 [01:24<00:00, 39.63it/s]
100%|██████████| 3334/3334 [01:29<00:00, 37.33it/s]
100%|██████████| 667/667 [00:06<00:00, 98.25it/s] 


Accuracy: 64.64999999999999 %
Emissions: 0.011190276592969894
Number of training dataset runs: 243
   -> Starting Bayesian Optimization
   -> Data transformed
   -> Bounds generated
   -> Data normalized
   -> Model trained
   -> Candidates obtained
   -> Candidates denormalized
   -> Bayesian Optimization finished, took 1.072s
┌───────────┬──────────────┬──────────┐
│   DROPOUT │   BATCH_SIZE │       LR │
├───────────┼──────────────┼──────────┤
│  0.165169 │    64.979200 │ 0.000921 │
└───────────┴──────────────┴──────────┘
   -> Estimating candidates
CANDIDATE 1
┌───────────┬───────────┬──────────┐
│ METRIC    │      MEAN │      STD │
├───────────┼───────────┼──────────┤
│ accuracy  │  0.663099 │ 0.077890 │
├───────────┼───────────┼──────────┤
│ emissions │ -0.003222 │ 0.002500 │
└───────────┴───────────┴──────────┘ 



100%|██████████| 782/782 [01:14<00:00, 10.49it/s]
100%|██████████| 782/782 [01:14<00:00, 10.47it/s]
100%|██████████| 782/782 [01:14<00:00, 10.48it/s]
100%|██████████| 782/782 [01:14<00:00, 10.50it/s]
100%|██████████| 157/157 [00:06<00:00, 23.03it/s]

Accuracy: 67.75999999999999 %
Emissions: 0.009664514102041721
Number of training dataset runs: 244
   -> Starting Bayesian Optimization
   -> Data transformed
   -> Bounds generated
   -> Data normalized





   -> Model trained
   -> Candidates obtained
   -> Candidates denormalized
   -> Bayesian Optimization finished, took 1.124s
┌───────────┬──────────────┬──────────┐
│   DROPOUT │   BATCH_SIZE │       LR │
├───────────┼──────────────┼──────────┤
│  0.508160 │    65.977984 │ 0.000787 │
└───────────┴──────────────┴──────────┘
   -> Estimating candidates
CANDIDATE 1
┌───────────┬──────────┬──────────┐
│ METRIC    │     MEAN │      STD │
├───────────┼──────────┼──────────┤
│ accuracy  │ 0.621479 │ 0.092438 │
├───────────┼──────────┼──────────┤
│ emissions │ 0.003870 │ 0.002523 │
└───────────┴──────────┴──────────┘ 



100%|██████████| 770/770 [01:12<00:00, 10.64it/s]
100%|██████████| 770/770 [01:14<00:00, 10.32it/s]
100%|██████████| 770/770 [01:14<00:00, 10.32it/s]
100%|██████████| 770/770 [01:14<00:00, 10.37it/s]
100%|██████████| 154/154 [00:05<00:00, 25.79it/s]

Accuracy: 58.84 %
Emissions: 0.00966402143239975
executed 3 candidates



