In [1]:
from utils.scenario_runner import ScenarioRunner
from utils.bayesian_optimisation import (
    SEARCH_FIDELITIES,
    BAYESOPT_INITIALIZATION_RATIO,
    SEARCH_TYPES,
    do_search,
    logger,
    FIDELITY_RANGE,
    preprocess_features,
    get_random_scenario_seed,
    get_candidate_solutions,
    regression_pipeline,
    get_mean_and_std_from_model,
    get_next_scenario_seed_from_aq,
    expected_improvement,
    upper_confidence_bound,
    pick_next_fidelity,
    DEFAULT_SEARCH_BUDGET,
    HDD_PATH,
    get_training_data,
    random_search_iteration,
    set_seed,
)
from pathlib import Path
import shutil

## Bayes Opt Iteration


In [2]:
def bayes_opt_iteration(train_df, aq_type="ei", fidelity="multifidelity") -> tuple[int, int]:
    """
    Performs a single iteration of Bayesian Otpimisation
    Returns next scenario seed, and next fidelity to run.

    """

    logger.info(f"Entering Bayesian Opt Iteration with parameters:")
    logger.info(f"N training samples {len(train_df)}, {aq_type = }, {fidelity = }")
    target_fidelity = fidelity
    if fidelity == "multifidelity":
        target_fidelity = max(FIDELITY_RANGE)

    # PREPARE TRAINING DATA
    X_train = preprocess_features(train_df)
    y_train = train_df["eval.driving_score"]

    if target_fidelity not in train_df.index.get_level_values("fid.ads_fps"):
        logger.warning(f"Target fidelity is not present in training set.")
        logger.warning(f"Will run target fidelity now!")
        return get_random_scenario_seed(get_candidate_solutions()), target_fidelity

    current_best = y_train.xs(target_fidelity).min()
    logger.info(f"Current best score is: {current_best:.3f}")

    # TRAIN THE MODEL
    pipe = regression_pipeline(X_train)
    logger.info(f"Training using {len(X_train.columns)} features")
    # pipe.set_params(regressor__n_jobs=16)
    model = pipe.fit(X_train, y_train)
    logger.debug(f"Model trained")

    # PREPARE TEST DATA
    candidate_scenarios = get_candidate_solutions()
    # Exclude scenarios that have been evaluated (in any fidelity)
    candidate_scenarios = candidate_scenarios[
        ~candidate_scenarios.index.isin(train_df.index.get_level_values("def.seed"))
    ]
    logger.debug(f"Considering next scenario from {len(candidate_scenarios)} candidates.")

    X_test = preprocess_features(candidate_scenarios)
    # test candidates must be casted to target fidelity
    X_test["fid.ads_fps"] = target_fidelity
    X_test = X_test[X_train.columns]

    # PREDICT DSCORE FOR HIGHFIDELITY
    dscore_predictions, std = get_mean_and_std_from_model(model, X_test)
    logger.info(f"Best from model: {dscore_predictions.min():.3f}")

    match aq_type:
        case "ei":
            aq = expected_improvement(dscore_predictions, std, current_best)
        case "ucb":
            aq = upper_confidence_bound(dscore_predictions, std)
        case _:
            raise ValueError("Invalid acquisition function")

    next_seed = int(get_next_scenario_seed_from_aq(aq, candidate_scenarios))
    logger.info(f"Next seed to evaluate: {next_seed}")

    if fidelity != "multifidelity":
        return next_seed, target_fidelity

    logger.debug(f"Multifidelity enabled")

    next_cadidate = candidate_scenarios.loc[[next_seed]]
    next_fidelity = pick_next_fidelity(next_cadidate, X_train.columns, model)
    assert next_fidelity in FIDELITY_RANGE
    return next_seed, next_fidelity

## Do search


In [3]:
# This function should run in separate process
from itertools import count


def do_search(
    repetition,
    search_type="randomsearch",
    fidelity="multifidelity",
    smoketest=False,
    search_root_dir=HDD_PATH,
):

    SEARCH_DIR = Path(search_root_dir) / ("searches_smoketest" if smoketest else "searches")
    SEARCH_DIR.mkdir(exist_ok=True, parents=True)

    rep_path = SEARCH_DIR / search_type / str(fidelity) / str(repetition)
    if rep_path.exists():
        logger.info(f"Search already finished for {rep_path}, skipping")
        return

    logger.info(
        f"Starting {"smoke" if smoketest else "real"} {search_type} search for: {repetition = } in {fidelity = }"
    )

    # set random seed from rep and search type
    set_seed(repetition, search_type, fidelity)

    # Initialize the search budget
    SEARCH_BUDGET = 15 if smoketest else DEFAULT_SEARCH_BUDGET
    logger.info(f"Search budget: {SEARCH_BUDGET}")
    current_budget = SEARCH_BUDGET

    for it in count():
        logger.info(f"Starting iteration {it = }")

        match search_type.split("_"):
            case ["randomsearch"]:
                logger.info("Random search iteration!")
                next_seed, next_fid = random_search_iteration(fidelity)

            case ["bayesopt", aq_type]:
                logger.info(f"{aq_type.upper()} Baysian optimisation iteration")
                if current_budget > BAYESOPT_INITIALIZATION_RATIO * SEARCH_BUDGET:
                    logger.info(f"Still initializing BayesOpt, using RS iteration")
                    next_seed, next_fid = random_search_iteration(fidelity)
                else:
                    logger.info(f"Doing BayesOpt iteration")
                    train_df = get_training_data(rep_path=rep_path)
                    next_seed, next_fid = bayes_opt_iteration(train_df, aq_type, fidelity)
            case _:
                raise ValueError(f"Invalid search type: {search_type}")

        logger.info(f"Next seed: {next_seed}, fidelity: {next_fid}")
        it_path = rep_path / str(it)
        runner = ScenarioRunner(it_path, next_seed, next_fid)
        runner.run_scenario(repeat=True)
        cost = runner.get_evaluation_cost()
        del runner

        logger.info(f"Running this scenario cost: {cost}")
        current_budget -= cost

        logger.info(f"Current budget: {current_budget}")

        if current_budget <= 0:
            logger.info(f"Budget finished!")
            with open(SEARCH_DIR / "checkpoints.txt", "a") as file:
                file.write(f"Search of {rep_path} finished successfully!\n")

            break

In [4]:
search_dir = Path("/tmp/pipeline_check")
shutil.rmtree(search_dir, ignore_errors=True)

In [None]:
rep = 0


do_search(
    rep,
    search_type="bayesopt_ucb",
    fidelity="multifidelity",
    search_root_dir=search_dir,
    smoketest=True,
)

[2025-04-15 16:05:44,203] [MainProcess] [3917788267.py:21] [INFO]: Starting smoke bayesopt_ucb search for: repetition = 0 in fidelity = 'multifidelity'
[2025-04-15 16:05:44,203] [MainProcess] [bayesian_optimisation.py:52] [INFO]: Setting a random seed: 5030000
[2025-04-15 16:05:44,203] [MainProcess] [3917788267.py:30] [INFO]: Search budget: 15
[2025-04-15 16:05:44,204] [MainProcess] [3917788267.py:34] [INFO]: Starting iteration it = 0
[2025-04-15 16:05:44,204] [MainProcess] [3917788267.py:42] [INFO]: UCB Baysian optimisation iteration
[2025-04-15 16:05:44,204] [MainProcess] [3917788267.py:44] [INFO]: Still initializing BayesOpt, using RS iteration
[2025-04-15 16:05:44,374] [MainProcess] [3917788267.py:53] [INFO]: Next seed: 1008311, fidelity: 10
[2025-04-15 16:05:44,375] [MainProcess] [scenario_runner.py:60] [INFO]: Saving data to /tmp/pipeline_check/searches_smoketest/bayesopt_ucb/multifidelity/0/0
[2025-04-15 16:05:44,572] [MainProcess] [scenario_runner.py:236] [INFO]: Launching the 