In [7]:
from smac.model.gaussian_process.kernels import MaternKernel, ConstantKernel, RBFKernel
from smac.model.gaussian_process.gaussian_process import GaussianProcess
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, UniformIntegerHyperparameter
from ConfigSpace import ConfigurationSpace
from smac.intensifier.hyperband import Hyperband
# from train_gpt2_hpo import train_eval_hpo, setup_logger, print_with_tabs
from smac import MultiFidelityFacade, RunHistory, Scenario
from smac.runhistory.dataclasses import TrialValue
from functools import partial


In [8]:
def train_eval_hpo(
    config_space: ConfigurationSpace, 
    seed: int,
    budget: int,
    input_bin: str = "dev/data/fineweb10B/fineweb_train_*.bin",
    input_val_bin: str = "dev/data/fineweb10B/fineweb_val_*.bin",
    model: str = "d6",
    batch_size: int = 4,
    # sequence_length: int = 1024,
    total_batch_size: int = -1,
    # learning_rate: float = 1e-4,
    warmup_iters: int = 700, #0,
    learning_rate_decay_frac: float = 0.0,):
    
    learning_rate = config_space["learning_rate"]
    weight_decay = config_space["weight_decay"]
    sequence_length = config_space["sequence_length"]

    return learning_rate * weight_decay * sequence_length

partial_function = train_eval_hpo #partial(train_eval_hpo, batch_size=4) 

cs = ConfigurationSpace()
learning_rate = UniformFloatHyperparameter("learning_rate", 1e-6, 1e-3, default_value=1e-4, log=True)
weight_decay = UniformFloatHyperparameter("weight_decay", 1e-6, 0.1, default_value=0.01, log=True)
sequence_length = UniformIntegerHyperparameter("sequence_length", 256, 1024, default_value=1024)
cs.add_hyperparameters([learning_rate, weight_decay, sequence_length])


[learning_rate, Type: UniformFloat, Range: [1e-06, 0.001], Default: 0.0001, on log-scale,
 weight_decay, Type: UniformFloat, Range: [1e-06, 0.1], Default: 0.01, on log-scale,
 sequence_length, Type: UniformInteger, Range: [256, 1024], Default: 1024]

In [9]:
scenario = Scenario(
            cs,
            name="SMAC_trial_2",
            # objectives=["val_loss", "train_time"],
            walltime_limit=23*60*60, #60*60*23,  
            n_trials=500,  # Evaluate max 500 different trials
            min_budget=0.02*60*60,#0.75*60*60,  # Train the MLP using a hyperparameter configuration for at least 5 epochs
            max_budget=0.16*60*60,  # Train the MLP using a hyperparameter configuration for at most 25 epochs
            n_workers=1,
            seed=0,
            deterministic=True
        )
kernel = MaternKernel(nu=2.5) * ConstantKernel(1.0, constant_value_bounds="fixed")  # Radial Basis Function (RBF) kernel
model = GaussianProcess(configspace=cs, kernel=kernel)
initial_design = MultiFidelityFacade.get_initial_design(scenario, n_configs=5)
intensifier = Hyperband(scenario, eta=2)
smac = MultiFidelityFacade(
            scenario=scenario,
            target_function=partial_function,
            initial_design=initial_design,
            intensifier=intensifier,
            overwrite=False,            
            model=model,
            # acquisition_function=acquisition_function,
        )

[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
[INFO][smbo.py:514] Found old run in `smac3_output/SMAC_trial_2/0` but it is not the same as the current one:
['scenario.min_budget: 72.0 != 144.0']


In [10]:
for i in range(20):
    info = smac.ask()
    assert info.seed is not None
    print(info)
    cost = train_eval_hpo(info.config, budget=info.budget, seed=info.seed)
    value = TrialValue(cost=cost, time=0.5)

    smac.tell(info, value)

[INFO][abstract_intensifier.py:305] Using only one seed for deterministic scenario.
[INFO][successive_halving.py:164] Successive Halving uses budget type BUDGETS with eta 2, min budget 72.0, and max budget 576.0.
[INFO][successive_halving.py:323] Number of configs in stage:
[INFO][successive_halving.py:325] --- Bracket 0: [8, 4, 2, 1]
[INFO][successive_halving.py:325] --- Bracket 1: [6, 3, 1]
[INFO][successive_halving.py:325] --- Bracket 2: [4, 2]
[INFO][successive_halving.py:325] --- Bracket 3: [4]
[INFO][successive_halving.py:327] Budgets in stage:
[INFO][successive_halving.py:329] --- Bracket 0: [72.0, 144.0, 288.0, 576.0]
[INFO][successive_halving.py:329] --- Bracket 1: [144.0, 288.0, 576.0]
[INFO][successive_halving.py:329] --- Bracket 2: [288.0, 576.0]
[INFO][successive_halving.py:329] --- Bracket 3: [576.0]
TrialInfo(config=Configuration(values={
  'learning_rate': 4.430375245218267e-05,
  'sequence_length': 752,
  'weight_decay': 0.009091283280651459,
}), instance=None, seed=20

In [11]:
for i in range(10):
    info = smac.ask()
    assert info.seed is not None
    print(info)
    cost = train_eval_hpo(info.config, budget=info.budget, seed=info.seed)
    value = TrialValue(cost=cost, time=0.5)

    smac.tell(info, value)

TrialInfo(config=Configuration(values={
  'learning_rate': 0.00017079555233284266,
  'sequence_length': 580,
  'weight_decay': 0.0005502465320399705,
}), instance=None, seed=209652396, budget=576.0)
TrialInfo(config=Configuration(values={
  'learning_rate': 4.296470325829798e-06,
  'sequence_length': 372,
  'weight_decay': 1.6104370112900418e-05,
}), instance=None, seed=209652396, budget=576.0)
TrialInfo(config=Configuration(values={
  'learning_rate': 0.00038102138995106796,
  'sequence_length': 697,
  'weight_decay': 0.0012442399617969707,
}), instance=None, seed=209652396, budget=576.0)
TrialInfo(config=Configuration(values={
  'learning_rate': 1.1707290598004148e-05,
  'sequence_length': 988,
  'weight_decay': 0.0012883714749354577,
}), instance=None, seed=209652396, budget=576.0)
TrialInfo(config=Configuration(values={
  'learning_rate': 1.9989337487330563e-06,
  'sequence_length': 615,
  'weight_decay': 0.002286585658178784,
}), instance=None, seed=209652396, budget=576.0)


TrialInfo(config=Configuration(values={
  'learning_rate': 7.358944742530792e-06,
  'sequence_length': 876,
  'weight_decay': 1.9169646011359978e-06,
}), instance=None, seed=209652396, budget=72.0)
[INFO][abstract_intensifier.py:593] Added config 115b37 and rejected config 8007f3 as incumbent because it is not better than the incumbents on 1 instances:
TrialInfo(config=Configuration(values={
  'learning_rate': 5.477869867464094e-05,
  'sequence_length': 377,
  'weight_decay': 0.029473913826019556,
}), instance=None, seed=209652396, budget=72.0)
TrialInfo(config=Configuration(values={
  'learning_rate': 1.6063104405528003e-06,
  'sequence_length': 606,
  'weight_decay': 0.053204147833493816,
}), instance=None, seed=209652396, budget=72.0)
TrialInfo(config=Configuration(values={
  'learning_rate': 4.904119431164885e-06,
  'sequence_length': 489,
  'weight_decay': 1.0000651217332357e-06,
}), instance=None, seed=209652396, budget=72.0)
[INFO][abstract_intensifier.py:593] Added config 5fb21