In [1]:
%%capture
%cd ..
from pprint import pprint
import matplotlib.pyplot as plt

In [2]:
import json

# Importing

In [3]:
from api import Benchmark

In [4]:
bench_dir = "data/six_datasets_lw.json"
bench = Benchmark(bench_dir, cache=False)

==> Loading data...
==> No cached data found or cache set to False.
==> Reading json data...
==> Done.


# Data generation

### Datasets

In [5]:
dataset_names = bench.get_dataset_names()
openml_task_ids = bench.get_openml_task_ids()

print(dataset_names)

['Fashion-MNIST', 'adult', 'higgs', 'jasmine', 'vehicle', 'volkert']


- Batch size: [16, 512], on log-scale
- Dropout: [0.0, 1.0]
- Maximum number of units per layer: [64, 1024], on log-scale
- Number of layers: [1, 5]
- Learning rate: [1e-4, 1e-1], on log-scale
- Momentum: [0.1, 0.99]
- Weight decay: [1e-5, 1e-1]

### Transformations

In [6]:
def create_trafo(lims):
    return [[l[0], 1/(l[1]-l[0])] for l in lims]

trafos = create_trafo([[16,512],
                      [0,1],
                       [64,1024],
                       [1,5],
                       [1e-4,1e-1],
                       [0.1, 0.99],
                       [1e-5,1e-1]
                      ])

In [7]:
def apply_trafo(features, trafos):
    return [(features[i]-trafos[i][0])*trafos[i][1] for i in range(len(features))]

### Generating outputs

Add budget as hyperparameter and transform inputs to $[0, 1]^{(d_{configs}+1)}$

$X \in \mathbb{R}^{(n_{configs}\cdot \frac{budget}{step}) \times (d_{configs}+1)}$
with
$n_{configs} = 2000, d_{configs}=7$

In [8]:
budget = 50
step = 2
X = [apply_trafo([bench.get_config(dataset_name=dataset_names[0], config_id=str(i))[key] for key in ['batch_size', 
                                                                                 'max_dropout',
                                                                                'max_units',
                                                                                'num_layers',
                                                                                'learning_rate',
                                                                                'momentum',
                                                                                'weight_decay']
             ], trafos) + [float(b)/budget] for i in range(bench.get_number_of_configs(dataset_name=dataset_names[0]))
    for b in range(0, budget, step)]

$Y \in \mathbb{R}^{n_{datasets} \times (n_{configs}\cdot \frac{budget}{step})}$

In [9]:
Y = [[bench.query(dataset_name=d, tag="Train/val_balanced_accuracy", config_id=i)[b] 
      for i in range(bench.get_number_of_configs(dataset_name=d))
      for b in range(0, budget, step)]
 for d in dataset_names 
 ]

$C \in \mathbb{R}^{n_{datasets} \times (n_{configs}\cdot \frac{budget}{step})}$

In [10]:
C = [[bench.query(dataset_name=d, tag="time", config_id=i)[b] 
      for i in range(bench.get_number_of_configs(dataset_name=d))
      for b in range(0, budget, 2)]
 for d in dataset_names 
 ]

In [11]:
with open('data_lcbench.json', 'w') as file: 
    file.write(json.dumps({'X': X, 'Y':Y, 'C':C}))