## General thoughts
- Use [qNIPV](https://github.com/facebook/Ax/issues/930) as acquisition fn to minimize
  model error
- store the raw sensor channel data, single-objective as the sum of all channels (maybe
  best as ScalarizedObjective) or could be multi-objective
- Run repeat experiments for each requested parameter set
- Pass the standard deviation to Ax
- For later, maybe base the hold-out performance on the input RGB differences instead of the target outputs
- Using qNIPV means each datapoint is more valuable from a modeling perspective (OK that
  it takes longer since it will likely be small compared to a real-world scenario)

## Estimating distribution of targets
- KDE, then fit a model that interpolates for a given quantile
- fit quantiles across all experiments (model for lowest, model for 2nd lowest, etc.)
- Is there a KDE for multiple inputs and a single response?
- Use a heteroskedastic GP (probably run into scaling issues)

In [1]:
num_sobol = 10
num_gpei = 0
num_qnipv = 290
num_repeats = 10 # i.e. how many times an individual experiment is repeated
num_campaigns = 1 # i.e. how many campaigns are run (each with a different target)
num_all_campaign_trials = (num_qnipv + num_gpei) * num_campaigns * num_repeats
print(f"Number of trials in all campaigns: {num_all_campaign_trials}")
time_per_trial_s = 10
estimated_campaign_time = num_all_campaign_trials * time_per_trial_s / 3600
print(f"Estimated campaign time: {estimated_campaign_time:.1f} hours")

Number of trials in all campaigns: 2900
Estimated campaign time: 8.1 hours


In [2]:
from numpy.random import default_rng
import json
with open("secrets.json", "r") as f:
    secrets = json.load(f)
MAIN_SEED = secrets["MAIN_SEED"]
rng = default_rng(seed=MAIN_SEED)
SEEDS = rng.integers(low=0, high=1000000, size=num_campaigns)

In [3]:
from self_driving_lab_demo import SelfDrivingLabDemoLight, mqtt_observe_sensor_data

sdls = [
    SelfDrivingLabDemoLight(
        target_seed=seed,
        observe_sensor_data_fn=mqtt_observe_sensor_data,
        observe_sensor_data_kwargs=dict(pico_id="test", session_id="hackathon-dev"),
    )
    for seed in SEEDS
]
sdl = sdls[0]
parameters = sdls[0].parameters[0:3]
parameters


[{'name': 'R', 'type': 'range', 'bounds': [0, 89]},
 {'name': 'G', 'type': 'range', 'bounds': [0, 89]},
 {'name': 'B', 'type': 'range', 'bounds': [0, 89]}]

In [4]:
from typing import Any, Dict, Optional

from botorch.acquisition.active_learning import (
    MCSampler,
    qNegIntegratedPosteriorVariance,
)

from botorch.acquisition.input_constructors import (
    MaybeDict,
    acqf_input_constructor,
    construct_inputs_mc_base,
)

from botorch.models.model import Model
from botorch.utils.datasets import SupervisedDataset
from torch import Tensor

from botorch.acquisition.objective import AcquisitionObjective

@acqf_input_constructor(qNegIntegratedPosteriorVariance)
def construct_inputs_qNIPV(
    model: Model,
    mc_points: Tensor,
    training_data: MaybeDict[SupervisedDataset],
    objective: Optional[AcquisitionObjective] = None,
    X_pending: Optional[Tensor] = None,
    sampler: Optional[MCSampler] = None,
    **kwargs: Any,
) -> Dict[str, Any]:

    if model.num_outputs == 1:
        objective = None

    base_inputs = construct_inputs_mc_base(
        model=model,
        training_data=training_data,
        sampler=sampler,
        X_pending=X_pending,
        objective=objective,
    )

    return {**base_inputs, "mc_points": mc_points}

In [6]:
from typing import Any, Dict, Optional

import torch

from ax.modelbridge import get_sobol
from ax.modelbridge.generation_strategy import GenerationStep, GenerationStrategy
from ax.modelbridge.registry import Models
from ax.models.torch.botorch_modular.surrogate import Surrogate
from ax.service.ax_client import AxClient
from botorch.models.gp_regression import SingleTaskGP
from ax.service.utils.instantiation import ObjectiveProperties

torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ax_client_tmp = AxClient(torch_device=torch_device)
ax_client_tmp.create_experiment(parameters=parameters)
sobol = get_sobol(ax_client_tmp.experiment.search_space)
mc_points = sobol.gen(1024).param_df.values
mcp = torch.tensor(mc_points)

model_kwargs_val = {
    "surrogate": Surrogate(SingleTaskGP),
    "botorch_acqf_class": qNegIntegratedPosteriorVariance,
    "acquisition_options": {"mc_points": mcp},
}

gs = GenerationStrategy(
    steps=[
        GenerationStep(model=Models.SOBOL, num_trials=num_sobol),
        GenerationStep(
            model=Models.BOTORCH_MODULAR,
            num_trials=num_qnipv,
            model_kwargs=model_kwargs_val,
        ),
        # GenerationStep(model=Models.GPEI, num_trials=num_gpei),
    ]
)

ax_client = AxClient(generation_strategy=gs)
ax_client.create_experiment(
    name="clslab-light-experiment",
    parameters=parameters,
    objectives={
        ch_name: ObjectiveProperties(minimize=True) for ch_name in sdl.channel_names
    },
)


[INFO 12-10 10:32:51] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 6 decimal points.
[INFO 12-10 10:32:51] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter R. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 12-10 10:32:51] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter G. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 12-10 10:32:51] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter B. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 12-10 10:32:51] ax.service.utils.instan

In [12]:
def evaluate(parameters):
    data = sdl.observe_sensor_data(parameters)
    new_data = {ch_name: data[ch_name] for ch_name in sdl.channel_names}
    new_data["onboard_temperature_K"] = data["onboard_temperature_K"]
    new_data["utc_timestamp"] = data["utc_timestamp"]
    return new_data

for _ in range(20):
    trial_params, trial_index = ax_client.get_next_trial()
    data = evaluate(trial_params)
    ax_client.complete_trial(
        trial_index=trial_index, raw_data=data
    )


[INFO 12-10 10:40:11] ax.service.ax_client: Generated new trial 2 with parameters {'R': 85, 'G': 74, 'B': 38}.
[INFO 12-10 10:40:15] ax.service.ax_client: Data was logged for metric onboard_temperature_K that was not yet tracked on the experiment. Please specify `tracking_metric_names` argument in AxClient.create_experiment to add tracking metrics to the experiment. Without those, all data users specify is still attached to the experiment, but will not be fetched in `experiment.fetch_data()`, but you can still use `experiment.lookup_data_for_trial` to get all attached data.
[INFO 12-10 10:40:15] ax.service.ax_client: Data was logged for metric utc_timestamp that was not yet tracked on the experiment. Please specify `tracking_metric_names` argument in AxClient.create_experiment to add tracking metrics to the experiment. Without those, all data users specify is still attached to the experiment, but will not be fetched in `experiment.fetch_data()`, but you can still use `experiment.lookup

UnsupportedError: qNegIntegratedPosteriorVariance only supports ScalarizedObjective (DEPRECATED) type objectives.