# BASIC CANDIDATE GENERATION

Import necessary modules and library

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
from bayesopt import *
sys.path.insert(0, os.path.abspath(os.path.join(os.path.join(os.getcwd()), '..')))
import etl.extractors.provenance_extractor as pe

In the next block we will:
1. define the metrics and parameters used (manually)
2. extract data from training dataset with etl module
3. construct dictionary with parameters and metrics to pass to the BayesianOptimization
4. call method run of BayesianOptimizer to execute the entire Bayesian pipeline
5. finally print results

In [None]:
data_needed = {
    'input': ['DROPOUT', 'BATCH_SIZE', 'EPOCHS', 'LR'],
    'output': ['accuracy', 'emissions']
}
extractor = pe.ProvenanceExtractor('../test/prov_25', data_needed)
inp, out = extractor.extract_all()      # cols are parameters/metrics, rows are runs

bayesopt = BayesianOptimizer(OptimizationConfig(
    data_needed['output'],
    data_needed['input'],
    ['MAX', 'MIN'],
    ground_truth_dim=len(inp),
    n_candidates=1,
    n_restarts=10,
    raw_samples=1000,
    optimizers='optimize_acqf',
    acqf='ucb',
    beta=1.5,
    verbose=True
))

data = {
    'parameters': inp,
    'metrics': out
}

res = bayesopt.run(data) 
print(res)

   -> Starting Bayesian Optimization
   -> Data transformed
   -> Bounds generated
   -> Data normalized
   -> Model trained
   -> Candidates obtained
   -> Candidates denormalized
   -> Bayesian Optimization finished, took 1.884s
┌───────────┬──────────────┬───────────┬──────────┐
│   DROPOUT │   BATCH_SIZE │    EPOCHS │       LR │
├───────────┼──────────────┼───────────┼──────────┤
│  0.508000 │    31.966104 │ 10.817805 │ 0.108222 │
└───────────┴──────────────┴───────────┴──────────┘
   -> Estimating candidates
CANDIDATE 1
┌───────────┬──────────┬──────────┐
│ METRIC    │     MEAN │      STD │
├───────────┼──────────┼──────────┤
│ accuracy  │ 0.688375 │ 0.034825 │
├───────────┼──────────┼──────────┤
│ emissions │ 0.005073 │ 0.001023 │
└───────────┴──────────┴──────────┘ 

OptimizationResults(candidates=[[0.508, 31.966103816624212, 10.817805013086787, 0.10822194276684086]], acq_values=0.3629633489746321, time=1.8843865001108497, posterior=<botorch.posteriors.gpytorch.GPyTorchPosterior