In [1]:
import sys
import pandas as pd

sys.path.append("/home/olek/Documents/dev/metadrive-multifidelity-data/notebooks")
from utils.parse_metadrive import get_scenarios_df, process_scenario_df  # type: ignore


from utils.scenario_runner import ScenarioRunner
from utils.bayesian_optimisation import (
    SEARCH_FIDELITIES,
    BAYESOPT_INITIALIZATION_RATIO,
    SEARCH_TYPES,
    do_search,
    logger,
    FIDELITY_RANGE,
    preprocess_features,
    get_random_scenario_seed,
    get_candidate_solutions,
    regression_pipeline,
    get_mean_and_std_from_model,
    get_next_scenario_seed_from_aq,
    expected_improvement,
    upper_confidence_bound,
    pick_next_fidelity,
    DEFAULT_SEARCH_BUDGET,
    HDD_PATH,
    get_training_data,
    random_search_iteration,
    set_seed,
)
from pathlib import Path
import shutil
from itertools import count

## Bayes Opt Iteration


In [2]:
def pick_next_fidelity(
    next_cadidate: pd.DataFrame,
    scenario_features,
    trained_model,
    allowed_fidelity_range=FIDELITY_RANGE,
    epsilon=0.01,
) -> int:
    """
    Given chosed scenario decide which fidelity is safe to run.
    Returns fidelity.
    """
    logger.info(f"Picking next fidelity!")
    mf_candidates = pd.concat([next_cadidate] * len(allowed_fidelity_range))
    mf_candidates["fid.ads_fps"] = allowed_fidelity_range

    mf_X_test = mf_candidates.reset_index()[scenario_features]

    # predict dscore for each fidelity
    predicted_dscore, _ = get_mean_and_std_from_model(trained_model, mf_X_test)

    predictions = dict(zip(allowed_fidelity_range, predicted_dscore))

    hf_prediction = predictions[max(allowed_fidelity_range)]
    logger.info(f"Predicted dscore for high fidelity: {hf_prediction:.3f}")
    logger.info(str(predictions))

    # go into increasing fidelity order
    for fid, dscore in predictions.items():
        # maximum absolute error
        error = abs(dscore - hf_prediction)
        logger.info(f"Considering {fid} FPS with predicted {dscore = :.3f}, {error = :.3f}")

        if error < epsilon:
            logger.info(f"Picking fidelity {fid} with dscore error of {error:.3f}")
            return fid

    logger.warning(
        f"Could not find fidelity with acceptable error, fallbacking to highest avaialble"
    )
    raise ValueError("No fidelity with acceptable error found")

In [None]:
def bayes_opt_iteration(train_df, aq_type="ei", fidelity="multifidelity") -> tuple[int, int]:
    """
    Performs a single iteration of Bayesian Otpimisation
    Returns next scenario seed, and next fidelity to run.

    """

    logger.info(f"Entering Bayesian Opt Iteration with parameters:")
    logger.info(f"N training samples {len(train_df)}, {aq_type = }, {fidelity = }")
    target_fidelity = fidelity
    if fidelity == "multifidelity":
        target_fidelity = max(FIDELITY_RANGE)

    # Check if target fidelity is present in training set
    if len(train_df) < 2 or target_fidelity not in train_df["fid.ads_fps"].unique():
        logger.warning(f"Target fidelity is not present in training set.")
        logger.warning(f"Will run target fidelity now!")
        return get_random_scenario_seed(get_candidate_solutions()), target_fidelity

    # Determine the current best score for the target fidelity
    hf_train_df = train_df[train_df["fid.ads_fps"] == target_fidelity]
    current_best = hf_train_df["eval.driving_score"].min()
    logger.info(f"Current best score is: {current_best:.3f}")

    # TRAIN THE MODEL
    X_train = preprocess_features(train_df)
    pipe = regression_pipeline(X_train)
    logger.info(f"Training using {len(X_train.columns)} features")
    # pipe.set_params(regressor__n_jobs=16)
    y_train = train_df["eval.driving_score"]
    model = pipe.fit(X_train, y_train)
    logger.debug(f"Model trained")

    # PREPARE TEST DATA
    candidate_scenarios = get_candidate_solutions()

    # Exclude scenarios that have been evaluated (in any fidelity)
    if fidelity == "multifidelity":
        # Exclude scenarios that have been evaluated (in all fidelity settings)
        seeds_to_exclude = [
            seed
            for seed, df in train_df.groupby("def.seed")
            if set(df["fid.ads_fps"].unique()) == set(FIDELITY_RANGE)
        ]
    else:
        # Exclude scenarios that have been evaluated
        seeds_to_exclude = train_df["def.seed"].unique()

    logger.info(
        f"Excluding {len(seeds_to_exclude)} seeds from candidates {seeds_to_exclude = }"
    )

    candidate_scenarios = candidate_scenarios[
        ~candidate_scenarios.index.isin(seeds_to_exclude)
    ]
    logger.info(f"Considering next scenario from {len(candidate_scenarios)} candidates.")

    X_test = preprocess_features(candidate_scenarios)
    # test candidates must be casted to target fidelity
    X_test["fid.ads_fps"] = target_fidelity
    X_test = X_test[X_train.columns]

    # PREDICT DSCORE FOR HIGHFIDELITY
    dscore_predictions, std = get_mean_and_std_from_model(model, X_test)
    logger.info(f"Best from model: {dscore_predictions.min():.3f}")

    match aq_type:
        case "ei":
            aq = expected_improvement(dscore_predictions, std, current_best)
        case "ucb":
            aq = upper_confidence_bound(dscore_predictions, std)
        case _:
            raise ValueError("Invalid acquisition function")

    next_seed = int(get_next_scenario_seed_from_aq(aq, candidate_scenarios))
    logger.info(f"Next seed to evaluate: {next_seed}")

    if fidelity != "multifidelity":
        return next_seed, target_fidelity

    logger.debug(f"Multifidelity enabled")

    next_cadidate = candidate_scenarios.loc[[next_seed]]
    used_fidelities = train_df[train_df["def.seed"] == next_seed]["fid.ads_fps"].unique()
    available_fidelities = sorted(list(set(FIDELITY_RANGE) - set(used_fidelities)))
    logger.debug(f"Available fidelities for {next_seed}: {available_fidelities}")
    next_fidelity = pick_next_fidelity(
        next_cadidate, X_train.columns, model, allowed_fidelity_range=available_fidelities
    )

    return next_seed, next_fidelity

In [4]:
p = Path(
    "/home/olek/Documents/dev/metadrive-multifidelity-data/data/experiments/allow_repeat"
)
rep_path = p / "searches" / "bayesopt_ucb" / "multifidelity" / "13"


assert rep_path.exists()
train_df = get_training_data(rep_path=rep_path)

[2025-05-03 20:10:59,681] [MainProcess] [bayesian_optimisation.py:87] [INFO]: Loading search data from /home/olek/Documents/dev/metadrive-multifidelity-data/data/experiments/allow_repeat/searches/bayesopt_ucb/multifidelity/13
100%|██████████| 556/556 [00:26<00:00, 20.64it/s]


In [5]:
# Test dataframe where some seeds have all fidelities present
test_df_1 = pd.DataFrame(
    {
        "def.seed": [
            1000000,
            1000000,
            1000000,
            1000000,
            1000001,
            1000001,
            1000003,
            1000003,
            1000003,
            1000003,
            1000003,
        ],
        "fid.ads_fps": [60, 10, 20, 30, 10, 60, 10, 20, 20, 30, 60],
        "eval.driving_score": [0.8, 0.9, 0.7, 0.85, 0.7, 0.75, 0.6, 0.65, 0.7, 0.75, 0.8],
    }
)


seeds_to_exclude = [
    seed
    for seed, df in test_df_1.groupby("def.seed")
    if set(df["fid.ads_fps"].unique()) == set(FIDELITY_RANGE)
]
seeds_to_exclude

[1000000, 1000003]

In [6]:
candidate_scenarios = get_candidate_solutions()
candidate_scenarios = candidate_scenarios.loc[:1000004]
# candidate_scenarios

In [7]:
train_df = test_df_1.copy()
using_multifidelity = True
if using_multifidelity:
    # Exclude scenarios that have been evaluated (in all fidelity settings)
    seeds_to_exclude = [
        seed
        for seed, df in train_df.groupby("def.seed")
        if set(df["fid.ads_fps"].unique()) == set(FIDELITY_RANGE)
    ]
else:
    # Exclude scenarios that have been evaluated
    seeds_to_exclude = train_df["def.seed"].unique()


candidate_scenarios = candidate_scenarios[~candidate_scenarios.index.isin(seeds_to_exclude)]
candidate_scenarios

Unnamed: 0_level_0,fid.ads_fps,fid.world_fps,def.spawn_lane_index,def.distance,def.max_steps,time.init_time,time.agent_time,time.scenario_time,time.closing_time,def.map_seq.0.id,...,def.vehicles_data.vehicle_36_position_x,def.vehicles_data.vehicle_36_position_y,def.vehicles_data.vehicle_36_position_z,def.vehicles_data.vehicle_36_type,def.vehicles_data.vehicle_36_heading_theta,def.vehicles_data.vehicle_36_length,def.vehicles_data.vehicle_36_width,def.vehicles_data.vehicle_36_height,def.vehicles_data.vehicle_36_spawn_road,def.vehicles_data.vehicle_36_destination
def.seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000001,60,60,0,615.214675,18456,0.391575,28.178873,50.854427,0.23332,I,...,,,,,,,,,,
1000002,60,60,1,500.323549,15010,0.47172,24.169583,46.084278,0.203616,I,...,,,,,,,,,,
1000004,60,60,1,644.202527,19326,0.53526,31.104951,84.788868,0.337844,I,...,,,,,,,,,,


In [8]:
next_seed = 1000001
next_cadidate = candidate_scenarios.loc[[next_seed]]

train_df

Unnamed: 0,def.seed,fid.ads_fps,eval.driving_score
0,1000000,60,0.8
1,1000000,10,0.9
2,1000000,20,0.7
3,1000000,30,0.85
4,1000001,10,0.7
5,1000001,60,0.75
6,1000003,10,0.6
7,1000003,20,0.65
8,1000003,20,0.7
9,1000003,30,0.75


In [9]:
print("Test Case 1 - Seeds to Exclude:", seeds_to_exclude)
FIDELITY_RANGE

Test Case 1 - Seeds to Exclude: [1000000, 1000003]


[10, 20, 30, 60]

In [None]:
train_df = get_training_data(rep_path=rep_path)
next_seed, next_fid = bayes_opt_iteration(train_df, "ucb", "multifidelity")
next_seed, next_fid

[2025-05-03 20:11:26,955] [MainProcess] [bayesian_optimisation.py:87] [INFO]: Loading search data from /home/olek/Documents/dev/metadrive-multifidelity-data/data/experiments/allow_repeat/searches/bayesopt_ucb/multifidelity/13
100%|██████████| 556/556 [00:00<00:00, 860528.79it/s]
[2025-05-03 20:11:27,013] [MainProcess] [1192918923.py:10] [INFO]: Entering Bayesian Opt Iteration with parameters:
[2025-05-03 20:11:27,013] [MainProcess] [1192918923.py:11] [INFO]: N training samples 556, aq_type = 'ucb', fidelity = 'multifidelity'
[2025-05-03 20:11:27,014] [MainProcess] [1192918923.py:25] [INFO]: Current best score is: 0.121
[2025-05-03 20:11:27,024] [MainProcess] [1192918923.py:30] [INFO]: Training using 354 features
[2025-05-03 20:11:27,143] [MainProcess] [1192918923.py:52] [INFO]: Excluding 0 seeds from candidates seeds_to_exclude = []
[2025-05-03 20:11:27,222] [MainProcess] [1192918923.py:59] [INFO]: Considering next scenario from 100000 candidates.
[2025-05-03 20:11:36,553] [MainProcess

(1041605, 20)

## Do search


In [None]:
# This function should run in separate process


def do_search(
    repetition,
    search_type="randomsearch",
    fidelity="multifidelity",
    smoketest=False,
    search_root_dir=HDD_PATH,
):

    SEARCH_DIR = Path(search_root_dir) / ("searches_smoketest" if smoketest else "searches")
    SEARCH_DIR.mkdir(exist_ok=True, parents=True)

    rep_path = SEARCH_DIR / search_type / str(fidelity) / str(repetition)
    if rep_path.exists():
        logger.info(f"Search already finished for {rep_path}, skipping")
        return

    logger.info(
        f"Starting {"smoke" if smoketest else "real"} {search_type} search for: {repetition = } in {fidelity = }"
    )

    # set random seed from rep and search type
    set_seed(repetition, search_type, fidelity)

    # Initialize the search budget
    SEARCH_BUDGET = 15 if smoketest else DEFAULT_SEARCH_BUDGET
    logger.info(f"Search budget: {SEARCH_BUDGET}")
    current_budget = SEARCH_BUDGET

    for it in count():
        logger.info(f"Starting iteration {it = }")

        match search_type.split("_"):
            case ["randomsearch"]:
                logger.info("Random search iteration!")
                next_seed, next_fid = random_search_iteration(fidelity)

            case ["bayesopt", aq_type]:
                logger.info(f"{aq_type.upper()} Baysian optimisation iteration")
                if current_budget > BAYESOPT_INITIALIZATION_RATIO * SEARCH_BUDGET:
                    logger.info(f"Still initializing BayesOpt, using RS iteration")
                    next_seed, next_fid = random_search_iteration(fidelity)
                else:
                    logger.info(f"Doing BayesOpt iteration")
                    train_df = get_training_data(rep_path=rep_path)
                    next_seed, next_fid = bayes_opt_iteration(train_df, aq_type, fidelity)
            case _:
                raise ValueError(f"Invalid search type: {search_type}")

        logger.info(f"Next seed: {next_seed}, fidelity: {next_fid}")
        it_path = rep_path / str(it)
        runner = ScenarioRunner(it_path, next_seed, next_fid)
        runner.run_scenario(repeat=True)
        cost = runner.get_evaluation_cost()
        del runner

        logger.info(f"Running this scenario cost: {cost}")
        current_budget -= cost

        logger.info(f"Current budget: {current_budget}")

        if current_budget <= 0:
            logger.info(f"Budget finished!")
            with open(SEARCH_DIR / "checkpoints.txt", "a") as file:
                file.write(f"Search of {rep_path} finished successfully!\n")

            break

In [12]:
search_dir = Path("/tmp/pipeline_check")
shutil.rmtree(search_dir, ignore_errors=True)

In [None]:
rep = 0


do_search(
    rep,
    search_type="bayesopt_ucb",
    fidelity=60,
    search_root_dir=search_dir,
    smoketest=True,
)

[2025-05-03 20:11:36,642] [MainProcess] [925087489.py:20] [INFO]: Starting smoke bayesopt_ucb search for: repetition = 0 in fidelity = 60
[2025-05-03 20:11:36,642] [MainProcess] [bayesian_optimisation.py:52] [INFO]: Setting a random seed: 4030000
[2025-05-03 20:11:36,642] [MainProcess] [925087489.py:29] [INFO]: Search budget: 15
[2025-05-03 20:11:36,642] [MainProcess] [925087489.py:33] [INFO]: Starting iteration it = 0
[2025-05-03 20:11:36,642] [MainProcess] [925087489.py:41] [INFO]: UCB Baysian optimisation iteration
[2025-05-03 20:11:36,643] [MainProcess] [925087489.py:43] [INFO]: Still initializing BayesOpt, using RS iteration
[2025-05-03 20:11:36,644] [MainProcess] [925087489.py:54] [INFO]: Next seed: 1008842, fidelity: 60
[2025-05-03 20:11:36,644] [MainProcess] [scenario_runner.py:60] [INFO]: Saving data to /tmp/pipeline_check/searches_smoketest/bayesopt_ucb/60/0/0
[2025-05-03 20:11:36,645] [MainProcess] [base_env.py:327] [INFO]: Environment: MetaDriveEnv
[2025-05-03 20:11:36,645]

In [15]:
rep_path = Path("/tmp/pipeline_check/searches_smoketest/bayesopt_ucb/60/0")
train_df = get_training_data(rep_path=rep_path)
candidate_scenarios = get_candidate_solutions()
train_df

[2025-05-03 20:36:29,620] [MainProcess] [bayesian_optimisation.py:87] [INFO]: Loading search data from /tmp/pipeline_check/searches_smoketest/bayesopt_ucb/60/0
100%|██████████| 3/3 [00:00<00:00, 79.87it/s]


Unnamed: 0,fid.ads_fps,fid.world_fps,def.seed,def.spawn_lane_index,def.distance,def.max_steps,time.init_time,time.agent_time,time.scenario_time,time.closing_time,...,def.vehicles_data.vehicle_21_position_y,def.vehicles_data.vehicle_21_position_z,def.vehicles_data.vehicle_21_type,def.vehicles_data.vehicle_21_heading_theta,def.vehicles_data.vehicle_21_length,def.vehicles_data.vehicle_21_width,def.vehicles_data.vehicle_21_height,def.vehicles_data.vehicle_21_spawn_road,def.vehicles_data.vehicle_21_destination,eval.driving_score
0,60,60,1008842,0,715.587907,21468,0.224855,14.6024,25.960283,0.194825,...,,,,,,,,,,1.0
1,60,60,1020629,1,485.407791,14562,0.181667,10.817329,19.228711,0.172202,...,,,,,,,,,,1.0
2,60,60,1093287,2,567.270663,17018,0.235134,13.788057,28.870845,0.179069,...,-128.887451,0.925,LVehicle,3.075271,4.87,2.046,1.85,"['-5T2_1_', '-5T2_0_']","['->>>', '->>']",1.0


In [16]:
df = get_scenarios_df(rep_path, multiprocessed=False)
df = process_scenario_df(df)
df = df.set_index(["fid.ads_fps", "def.seed"])
df["eval.driving_score"].to_frame()

100%|██████████| 3/3 [00:00<00:00, 103991.01it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,eval.driving_score
fid.ads_fps,def.seed,Unnamed: 2_level_1
60,1008842,1.0
60,1020629,1.0
60,1093287,1.0


In [17]:
train_df["eval.driving_score"]

0    1.0
1    1.0
2    1.0
Name: eval.driving_score, dtype: float64

In [18]:
train_df

Unnamed: 0,fid.ads_fps,fid.world_fps,def.seed,def.spawn_lane_index,def.distance,def.max_steps,time.init_time,time.agent_time,time.scenario_time,time.closing_time,...,def.vehicles_data.vehicle_21_position_y,def.vehicles_data.vehicle_21_position_z,def.vehicles_data.vehicle_21_type,def.vehicles_data.vehicle_21_heading_theta,def.vehicles_data.vehicle_21_length,def.vehicles_data.vehicle_21_width,def.vehicles_data.vehicle_21_height,def.vehicles_data.vehicle_21_spawn_road,def.vehicles_data.vehicle_21_destination,eval.driving_score
0,60,60,1008842,0,715.587907,21468,0.224855,14.6024,25.960283,0.194825,...,,,,,,,,,,1.0
1,60,60,1020629,1,485.407791,14562,0.181667,10.817329,19.228711,0.172202,...,,,,,,,,,,1.0
2,60,60,1093287,2,567.270663,17018,0.235134,13.788057,28.870845,0.179069,...,-128.887451,0.925,LVehicle,3.075271,4.87,2.046,1.85,"['-5T2_1_', '-5T2_0_']","['->>>', '->>']",1.0
