In [None]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

Notebook local path should be at `ScientificValueAgent/figures`.

In [None]:
import sys
sys.path.append("..")

In [None]:
from collections import Counter
from copy import deepcopy
from itertools import product
from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pickle
from scipy.spatial import distance_matrix
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
import warnings

In [None]:
from sva import utils

Set some plotting defaults.

In [None]:
utils.set_defaults()

# Multi-phase one-dimensional XRD results

In [None]:
from sva.postprocessing import read_data, parse_results_by_acquisition_function
from sva.truth.xrd1dim import (
    _get_1d_phase_data,
    residual_1d_phase_relative_mae,
    xrd1dim_compute_metrics_all_acquisition_functions_and_LTB,
    _get_1d_phase_fractions,
    truth_xrd1dim,
    residual_1d_phase_mse
)

## Core manuscript figures

In [None]:
results_Adam = read_data("../results/results_23-05-02-xrd1dim/")

In [None]:
results_by_acqf_Adam = parse_results_by_acquisition_function(results_Adam)

In [None]:
cache = Path("cache")
cache.mkdir(exist_ok=True)

### Subfigure (a) and (b)

In [None]:
acquisition_function = "UpperConfidenceBound10"
all_results_Adam = np.array([xx.data.X.squeeze() for xx in results_by_acqf_Adam[acquisition_function]])

Get the phases...

In [None]:
x_grid = np.linspace(0, 100, 1000)
phases = _get_1d_phase_fractions(x_grid).T

Resolve by the experiment iteration...

In [None]:
all_results_Adam_n_resolved = [all_results_Adam[:, :nn].flatten() for nn in range(3, all_results_Adam.shape[1] + 1)]

In [None]:
all_results_Adam_n_resolved_coordinates = []
for ii, res in enumerate(all_results_Adam_n_resolved):
    n = len(res)
    coords = (np.ones(shape=(n,)) * ii).astype(int)
    arr = np.array([res, coords]).T
    all_results_Adam_n_resolved_coordinates.append(arr)
all_results_Adam_n_resolved_coordinates = np.concatenate(all_results_Adam_n_resolved_coordinates, axis=0)
all_results_Adam_n_resolved_coordinates[:, 1] += 3

In [None]:
vmax = 1500

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(3, 3), gridspec_kw={'height_ratios':[1, 2]}, sharex=True)

ax = axs[0]
ax.plot(x_grid, phases[:, 0], label="1")
ax.plot(x_grid, phases[:, 1], label="2")
ax.plot(x_grid, phases[:, 2], label="3")
ax.plot(x_grid, phases[:, 3], label="4")
axlims = ax.get_ylim()
ax.text(1.05, 1.0, "Phase", ha="left", va="bottom", transform=ax.transAxes)
ax.text(0.025, 0.9, "(a)", ha="left", va="top", transform=ax.transAxes)
ax.legend(frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left")

ax.fill_betweenx(np.linspace(*axlims, 10), 10, 50, color="black", alpha=0.1, linewidth=0)
ax.fill_betweenx(np.linspace(*axlims, 10), 60, 80, color="black", alpha=0.1, linewidth=0)
ax.fill_betweenx(np.linspace(*axlims, 10), 88.5, 91.5, color="black", alpha=0.1, linewidth=0)

utils.set_grids(ax)
ax.set_ylabel("$p(x)$")
ax.set_ylim(*axlims)

ax = axs[1]
ax.hist2d(all_results_Adam_n_resolved_coordinates[:, 0], all_results_Adam_n_resolved_coordinates[:, 1], bins=[100, 247], cmap="viridis", vmax=vmax, rasterized=True)
ax.set_yticks([3, 50, 150, 250])
ax.set_xticks([0, 20, 40, 60, 80, 100])
utils.set_grids(ax)
ax.tick_params(which="minor", left=False, right=False)
ax.set_ylabel(r"$N$")
ax.set_xlabel("$x$~[a.u.]")
ax.text(0.025, 0.9, "(b)", ha="left", va="top", transform=ax.transAxes, color="white")

# plt.savefig("figures_xrd1dim/xrd1dim_subfigure_a.svg", dpi=300, bbox_inches="tight")
plt.show()

### Subfigure (a) colorbar

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(2, 2), sharex=True, sharey=True)

im = ax.hist2d(all_results_Adam_n_resolved_coordinates[:, 0], all_results_Adam_n_resolved_coordinates[:, 1], bins=[150, 247], cmap="viridis", vmax=vmax)

cbar = utils.add_colorbar(im[-1], aspect=20)
cbar.set_ticks([0, vmax])
cbar.set_ticklabels([0, f"$\geq$ %i" % int(vmax / 300)])
cbar.set_label(r"Average Counts", labelpad=-10)

ax.remove()

# plt.savefig("figures_xrd1dim/xrd1dim_cbar.svg", dpi=300, bbox_inches="tight")
plt.show()

### Subfigure (c): select metrics

The metrics for this part take a long time to calculate, so we cache them.

In [None]:
acquisition_function_name_maps = {
    "Linear": "LTB",
    "UpperConfidenceBound10": "UCB(10)",
    "ExpectedImprovement": "EI",
    "UpperConfidenceBound1": "UCB(1)",
    "UpperConfidenceBound20": "UCB(20)",
    "UpperConfidenceBound100": "UCB(100)"
}

In [None]:
metrics_grid = list(range(3, 251, 10))
linspace_points = 10000

In [None]:
path = cache / "xrd1dim_all.pkl"
if not path.exists():
    print("Recalculating...")
    _m = xrd1dim_compute_metrics_all_acquisition_functions_and_LTB(
        results_by_acqf_Adam,
        metrics_grid=metrics_grid,
        metrics_grid_linear=metrics_grid,
        metric="mse",
        grid_points=linspace_points,
        disable_pbar=False,
        xmin=0.0,
        xmax=100.0,
    )
    all_metrics = _m["metrics"]
    pickle.dump(all_metrics, open(path, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
else:
    all_metrics = pickle.load(open(path, "rb"))

In [None]:
only_plot = ["LTB", "EI", "UCB(10)"]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(2, 3))

for acquisition_function_name in acquisition_function_name_maps.keys():
    values = all_metrics[acquisition_function_name]
    label = acquisition_function_name_maps[acquisition_function_name]
    if only_plot is None or label in only_plot:
        v = np.log(values)
        mu = np.nanmean(v, axis=1)
        sd = np.nanstd(v, axis=1) * 2
        ax.plot(metrics_grid, mu, label=label if label != "LTB" else "Grid")
        ax.fill_between(metrics_grid, mu - sd, mu + sd, linewidth=0, alpha=0.3)

utils.set_grids(ax)
ax.tick_params(which="minor", bottom=False, top=False)
ax.set_xticks([3, 50, 150, 250])

ax.legend(frameon=False, loc="upper right")
# ax.text(0.1, 0.05, r"$\mu \pm \sigma / 3$", ha="left", va="bottom", transform=ax.transAxes)
ax.text(0.1, 0.95, r"(c)", ha="left", va="top", transform=ax.transAxes)

# ax.set_yscale("log")
yticks = np.array([-2, -5, -8, -11, -14])
ax.set_yticks((yticks).tolist())
ax.set_yticklabels([f"${ii}$" for ii in yticks])
# ax.set_ylim(10**-5.3, 10**-0.7)
ax.tick_params(axis='y', which='minor', left=True, right=True)

ax.set_xlabel(r"$N$")
ax.set_ylabel(r"$\ln$(MSE)")

# plt.savefig("figures_xrd1dim/xrd1dim_subfigure_c.svg", dpi=300, bbox_inches="tight")
plt.show()

## Supplemental

### Bayes clustering

In [None]:
max_queries = 250
grid_points = 10000
N_exp = 10

In [None]:
np.random.seed(123)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    for k_clusters in [3, 4, 5]:
        metrics_grid = list(range(k_clusters, max_queries + 1, 10))
        all_metrics[f"Bayesian_{k_clusters}"] = []

        for exp in tqdm(range(N_exp)):
            k_means = KMeans(k_clusters, n_init="auto")
            clf = LogisticRegression()
            x = list(np.linspace(0, 100, k_clusters))
            y = truth_xrd1dim(np.array(x))  # This won't take single items, so we feed it the entire array each time???

            for _ in range(k_clusters + 1, max_queries + 1):
                labels = k_means.fit_predict(y)
                clf.fit(np.array(x).reshape(-1, 1), labels)
                linspace = np.linspace(0, 100, 1000).reshape(-1, 1)
                proby = clf.predict_proba(linspace)
                shannon = np.sum(proby * np.log(1 / proby), axis=-1)
                max_entropy_loc = float(linspace[np.argmax(shannon)])

                x.append(max_entropy_loc)
                y = truth_xrd1dim(np.array(x))

            _metrics = []
            for N in metrics_grid:
                res = residual_1d_phase_mse(
                    np.array(x)[:N].reshape(-1, 1),
                    linspace_points=grid_points,
                    use_only=None,
                )
                _metrics.append(res)
            all_metrics[f"Bayesian_{k_clusters}"].append(_metrics)

for k_clusters in [3, 4, 5]:
    all_metrics[f"Bayesian_{k_clusters}"] = np.array(all_metrics[f"Bayesian_{k_clusters}"])

In [None]:
metric_grids_bayesian = []
for k_clusters in [3, 4, 5]:
    metric_grids_bayesian.append(list(range(k_clusters, max_queries + 1, 10)))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(2, 3))

for acquisition_function_name in acquisition_function_name_maps.keys():
    values = all_metrics[acquisition_function_name]
    label = acquisition_function_name_maps[acquisition_function_name]
    if only_plot is None or label in only_plot:
        v = np.log(values)
        mu = np.nanmean(v, axis=1)
        sd = np.nanstd(v, axis=1) * 2
        ax.plot(metrics_grid, mu, label=label if label != "LTB" else "Grid")
        ax.fill_between(metrics_grid, mu - sd, mu + sd, linewidth=0, alpha=0.3)


acquisition_function_name_maps_bayesian = {
    f"Bayesian_{k_clusters}": f"Bayesian_{k_clusters}_clusters" for k_clusters in [3, 4, 5]
}
for ii, acquisition_function_name in enumerate(acquisition_function_name_maps_bayesian.keys()):
    values = all_metrics[acquisition_function_name]
    label = acquisition_function_name_maps_bayesian[acquisition_function_name]
    mu = np.nanmean(np.log(values), axis=0)
    sd = np.nanstd(np.log(values), axis=0) * 2
    ax.plot(metric_grids_bayesian[ii], mu, label=label.replace("_", " "))
    ax.fill_between(metric_grids_bayesian[ii], mu - sd, mu + sd, linewidth=0, alpha=0.3)


utils.set_grids(ax)
ax.tick_params(which="minor", bottom=False, top=False)
ax.set_xticks([3, 50, 150, 250])

ax.legend(frameon=False, loc="center left", bbox_to_anchor=(1, 0.5))
# ax.text(0.1, 0.05, r"$\mu \pm \sigma / 3$", ha="left", va="bottom", transform=ax.transAxes)
# ax.text(0.1, 0.95, r"(c)", ha="left", va="top", transform=ax.transAxes)

# ax.set_yscale("log")
yticks = np.array([-2, -5, -8, -11, -14])
ax.set_yticks((yticks).tolist())
ax.set_yticklabels([f"${ii}$" for ii in yticks])
# ax.set_ylim(10**-5.3, 10**-0.7)
ax.tick_params(axis='y', which='minor', left=True, right=True)

ax.set_xlabel(r"$N$")
ax.set_ylabel(r"$\ln$(MSE)")

# plt.savefig("figures_xrd1dim/SI_xrd1dim_Bayesian.svg", dpi=300, bbox_inches="tight")
plt.show()

### Average value of the GP modeling SVF

In [None]:
x_grid = np.linspace(0, 100, 1000)
phases = _get_1d_phase_fractions(x_grid).T

In [None]:
from sva.experiments import Experiment, Data
from sva.value import default_asymmetric_value_function
from sva.truth.xrd1dim import truth_xrd1dim

EI

In [None]:
data = Data.from_initial_conditions(
    truth=truth_xrd1dim,
    value=default_asymmetric_value_function,
    seed=124,
    how="random",
    xmin=0.0,
    xmax=100.0,
    ndim=1,
    value_kwargs={"sd": None, "multiplier": 1.0},
)
experiment_EI = Experiment(
    data=data,
    acqf_signature="botorch.acquisition:ExpectedImprovement",
    acqf_kwargs={},
)
experiment_EI.run(max_n_dat=250, pbar=True, record_gp_every=1, points_per_dimension_full_grid=100, fit_with_Adam=True)

UCB(1)

In [None]:
data = Data.from_initial_conditions(
    truth=truth_xrd1dim,
    value=default_asymmetric_value_function,
    seed=124,
    how="random",
    xmin=0.0,
    xmax=100.0,
    ndim=1,
    value_kwargs={"sd": None, "multiplier": 1.0},
)
experiment_UCB1 = Experiment(
    data=data,
    acqf_signature="botorch.acquisition:UpperConfidenceBound",
    acqf_kwargs={"beta": 1.0},
)
experiment_UCB1.run(max_n_dat=250, pbar=True, record_gp_every=1, points_per_dimension_full_grid=100, fit_with_Adam=True)

UCB(10)

In [None]:
data = Data.from_initial_conditions(
    truth=truth_xrd1dim,
    value=default_asymmetric_value_function,
    seed=124,
    how="random",
    xmin=0.0,
    xmax=100.0,
    ndim=1,
    value_kwargs={"sd": None, "multiplier": 1.0},
)
experiment_UCB10 = Experiment(
    data=data,
    acqf_signature="botorch.acquisition:UpperConfidenceBound",
    acqf_kwargs={"beta": 10.0},
)
experiment_UCB10.run(max_n_dat=250, pbar=True, record_gp_every=1, points_per_dimension_full_grid=100, fit_with_Adam=True)

UCB(20)

In [None]:
data = Data.from_initial_conditions(
    truth=truth_xrd1dim,
    value=default_asymmetric_value_function,
    seed=124,
    how="random",
    xmin=0.0,
    xmax=100.0,
    ndim=1,
    value_kwargs={"sd": None, "multiplier": 1.0},
)
experiment_UCB20 = Experiment(
    data=data,
    acqf_signature="botorch.acquisition:UpperConfidenceBound",
    acqf_kwargs={"beta": 20.0},
)
experiment_UCB20.run(max_n_dat=250, pbar=True, record_gp_every=1, points_per_dimension_full_grid=100, fit_with_Adam=True)

UCB(100)

In [None]:
data = Data.from_initial_conditions(
    truth=truth_xrd1dim,
    value=default_asymmetric_value_function,
    seed=124,
    how="random",
    xmin=0.0,
    xmax=100.0,
    ndim=1,
    value_kwargs={"sd": None, "multiplier": 1.0},
)
experiment_UCB100 = Experiment(
    data=data,
    acqf_signature="botorch.acquisition:UpperConfidenceBound",
    acqf_kwargs={"beta": 100.0},
)
experiment_UCB100.run(max_n_dat=250, pbar=True, record_gp_every=1, points_per_dimension_full_grid=100, fit_with_Adam=True)

Random

In [None]:
data = Data.from_initial_conditions(
    truth=truth_xrd1dim,
    value=default_asymmetric_value_function,
    seed=124,
    how="random",
    xmin=0.0,
    xmax=100.0,
    ndim=1,
    value_kwargs={"sd": None, "multiplier": 1.0},
)
experiment_random = Experiment(
    data=data,
    acqf_signature="random",
)
experiment_random.run(max_n_dat=250, pbar=True, record_gp_every=1, points_per_dimension_full_grid=100, fit_with_Adam=True)

Plot everything

In [None]:
extent = (-0.1, 100.1, 247, 3)

fig, axs = plt.subplots(7, 1, figsize=(4, 8), gridspec_kw={'height_ratios':[1] + [1.75] * 6}, sharex=True)

ax = axs[0]
ax.plot(x_grid, phases[:, 0], label="1")
ax.plot(x_grid, phases[:, 1], label="2")
ax.plot(x_grid, phases[:, 2], label="3")
ax.plot(x_grid, phases[:, 3], label="4")
ax.set_ylabel(r"$p(x)$")
# axlims = ax.get_ylim()
# ax.text(1.05, 1.0, "Phase", ha="left", va="bottom", transform=ax.transAxes)
# ax.text(0.025, 0.9, "(a)", ha="left", va="top", transform=ax.transAxes)
# ax.legend(frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left")


ax = axs[1]
svf = np.array([v["mu"] for v in experiment_EI._record])
svf = svf[::-1, :]
svf = svf / svf.max(axis=1, keepdims=True)

ax.imshow(svf, cmap="viridis", rasterized=True, aspect="auto", extent=extent)
ytick_labels = np.array([3, 50, 150, 249])
ax.set_yticks(ytick_labels - 3)
ax.set_yticklabels(ytick_labels[::-1])
ax.set_xticks(np.array([0, 20, 40, 60, 80, 100]))
ax.text(0.05, 0.9, "EI", ha="left", va="top", transform=ax.transAxes, color="white")

ax = axs[2]
svf = np.array([v["mu"] for v in experiment_UCB1._record])
svf = svf[::-1, :]
svf = svf / svf.max(axis=1, keepdims=True)

ax.imshow(svf, cmap="viridis", rasterized=True, aspect="auto", extent=extent)
ytick_labels = np.array([3, 50, 150, 249])
ax.set_yticks(ytick_labels - 3)
ax.set_yticklabels(ytick_labels[::-1])
ax.set_xticks(np.array([0, 20, 40, 60, 80, 100]))
ax.text(0.05, 0.9, "UCB(1)", ha="left", va="top", transform=ax.transAxes, color="white")

ax = axs[3]
svf = np.array([v["mu"] for v in experiment_UCB10._record])
svf = svf[::-1, :]
svf = svf / svf.max(axis=1, keepdims=True)

ax.imshow(svf, cmap="viridis", rasterized=True, aspect="auto", extent=extent)
ytick_labels = np.array([3, 50, 150, 249])
ax.set_yticks(ytick_labels - 3)
ax.set_yticklabels(ytick_labels[::-1])
ax.set_xticks(np.array([0, 20, 40, 60, 80, 100]))
ax.text(0.05, 0.9, "UCB(10)", ha="left", va="top", transform=ax.transAxes, color="white")

ax = axs[4]
svf = np.array([v["mu"] for v in experiment_UCB20._record])
svf = svf[::-1, :]
svf = svf / svf.max(axis=1, keepdims=True)

ax.imshow(svf, cmap="viridis", rasterized=True, aspect="auto", extent=extent)
ytick_labels = np.array([3, 50, 150, 249])
ax.set_yticks(ytick_labels - 3)
ax.set_yticklabels(ytick_labels[::-1])
ax.set_xticks(np.array([0, 20, 40, 60, 80, 100]))
ax.text(0.05, 0.9, "UCB(20)", ha="left", va="top", transform=ax.transAxes, color="white")

ax = axs[5]
svf = np.array([v["mu"] for v in experiment_UCB100._record])
svf = svf[::-1, :]
svf = svf / svf.max(axis=1, keepdims=True)

ax.imshow(svf, cmap="viridis", rasterized=True, aspect="auto", extent=extent)
ytick_labels = np.array([3, 50, 150, 249])
ax.set_yticks(ytick_labels - 3)
ax.set_yticklabels(ytick_labels[::-1])
ax.set_xticks(np.array([0, 20, 40, 60, 80, 100]))
ax.text(0.05, 0.9, "UCB(100)", ha="left", va="top", transform=ax.transAxes, color="white")

ax = axs[6]
svf = np.array([v["mu"] for v in experiment_random._record])
svf = svf[::-1, :]
svf = svf / svf.max(axis=1, keepdims=True)

ax.imshow(svf, cmap="viridis", rasterized=True, aspect="auto", extent=extent)
ytick_labels = np.array([3, 50, 150, 249])
ax.set_yticks(ytick_labels - 3)
ax.set_yticklabels(ytick_labels[::-1])
ax.set_xticks(np.array([0, 20, 40, 60, 80, 100]))
ax.text(0.05, 0.9, "Random", ha="left", va="top", transform=ax.transAxes, color="white")
ax.set_xlabel(r"$x$~[a.u.]")
ax.set_ylabel(r"$N$")

# plt.savefig("figures_xrd1dim/SI_xrd1dim_SVF.svg", dpi=300, bbox_inches="tight")
plt.show()

### Noise ablation experiment

In [None]:
from sva.experiments import Experiment, Data
from sva.value import default_asymmetric_value_function
from sva.truth.xrd1dim import truth_xrd1dim

In [None]:
experiments = []
noise_values = [round(0.1 + ii * 0.1, 1) for ii in range(7)] + [10.0]  # 10 is a crazy noisy control

We run a noise ablation experiment, treating the observation as somewhat random _every time_ the truth function is called. This mimics irreducible noise during experiment.

In [None]:
for noise in noise_values:

    data = Data.from_initial_conditions(
        truth=truth_xrd1dim,
        value=default_asymmetric_value_function,
        seed=124,
        how="random",
        xmin=0.0,
        xmax=100.0,
        ndim=1,
        value_kwargs={"sd": None, "multiplier": 1.0},
        truth_kwargs={"noise": noise, "noise_seed": None},
    )

    e = Experiment(
        data=data,
        acqf_signature="botorch.acquisition:ExpectedImprovement",
        acqf_kwargs={},
    )

    e.run(max_n_dat=50, pbar=True, record_gp_every=1, points_per_dimension_full_grid=100, fit_with_Adam=True)

    experiments.append(e)

In [None]:
x_grid = np.linspace(0, 100, 1000)
phases = _get_1d_phase_fractions(x_grid).T
Q_grid = np.linspace(0, 9, 1000)

In [None]:
extent = (-0.1, 100.1, 47, 0)

fig, axs = plt.subplots(
    len(experiments[:-1]) + 1, 2,
    figsize=(7, 8),
    gridspec_kw={'height_ratios':[1] + [1.75] * len(experiments[:-1])}
)

ax = axs[0, 0]
ax.plot(x_grid, phases[:, 0], label="1")
ax.plot(x_grid, phases[:, 1], label="2")
ax.plot(x_grid, phases[:, 2], label="3")
ax.plot(x_grid, phases[:, 3], label="4")
ax.set_ylabel(r"$p(x)$")
ax.set_xticks([])
axs[0, 1].set_xticks([])
ax.set_xlim(0, 100)

for ii, (exp, noise) in enumerate(zip(experiments[:-1], noise_values)):
    ax = axs[ii + 1, 0]
    svf = np.array([v["mu"] for v in exp._record])
    svf = svf[::-1, :]
    svf = svf / svf.max(axis=1, keepdims=True)

    ax.imshow(svf, cmap="viridis", rasterized=True, aspect="auto", extent=extent)
    ytick_labels = np.array([3, 50])
    ax.set_yticks(ytick_labels - 3)
    ax.set_yticklabels(ytick_labels[::-1])
    ax.set_xticks(np.array([0, 20, 40, 60, 80, 100]))
    
    if ii != len(experiments) - 2:
        ax.set_xticks([])
    

    
for ii, (exp, noise) in enumerate(zip(experiments[:-1], noise_values)):
    ax = axs[ii + 1, 1]
    ax.text(1.04, 0.5, r"$\sigma=%.01f$" % noise, ha="left", va="center", transform=ax.transAxes, color="black", rotation=90)
    x = np.linspace(0, 100, 100)
    noisy_truth = truth_xrd1dim(x, noise=noise, noise_seed=None)
    
    if ii == 0:
        label1 = r"$x=88$"
        label2 = r"$x=92$"
    else:
        label1 = 0
        label2 = 0
    ax.plot(Q_grid, noisy_truth[88, :], label=label1)
    ax.plot(Q_grid, noisy_truth[92, :], label=label2)
    if ii == 0:
        ax.legend(frameon=False, fontsize=8)
    
    if ii != len(experiments) - 2:
        ax.set_xticks([])
    else:
        ax.set_xticks([0, 9])
        ax.set_xlabel(r"$Q$~[\AA$^{-1}$]")

    
axs[4, 0].set_ylabel(r"$N$")
axs[4, 1].set_ylabel(r"$I(Q)$~[a.u.]")
axs[-1, 0].set_xlabel(r"$x$~[a.u.]")

axs[0, 1].set_axis_off()

plt.subplots_adjust(wspace=0.3)

# plt.savefig("figures_xrd1dim/SI_xrd1dim_noise_ablation.pdf", dpi=300, bbox_inches="tight")
plt.show()

### Noisy observations

In [None]:
noise_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
Q_grid = np.linspace(0, 9, 1000)

fig, axs = plt.subplots(len(noise_values), 1, figsize=(3, len(noise_values)), sharex=True, sharey=True)


for ii, (noise, ax) in enumerate(zip(noise_values, axs)):
    phases = _get_1d_phase_data(noise=noise, noise_seed=124)
    ax.plot(Q_grid, phases[0, :], label="Phase 1")
    ax.plot(Q_grid, phases[1, :], label="Phase 2")
    ax.plot(Q_grid, phases[2, :], label="Phase 3")
    ax.plot(Q_grid, phases[3, :], label="Phase 4")
    utils.set_grids(ax)
    if ii == 0:
        ax.legend(frameon=False, fontsize=6)
    ax.text(0.5, 0.9, r"$\sigma=%.01f$" % noise, ha="center", va="top", transform=ax.transAxes)
    
    
axs[len(noise_values) // 2].set_ylabel(r"$I(Q)$~[a.u.]")

axs[-1].set_xticks([0, 9])
axs[-1].set_xlabel(r"$Q$~[\AA$^{-1}$]")

# plt.savefig("figures_xrd1dim/SI_xrd1dim_noisy_phases.pdf", dpi=300, bbox_inches="tight")
plt.show()

### Sigma=0.1 (S1)

In [None]:
phases = _get_1d_phase_data()
Q_grid = np.linspace(0, 9, 1000)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(3, 1))

for ii in range(4):
    ax.plot(Q_grid, phases[ii, :], label=f"Phase {ii+1}")
ax.legend(frameon=False, fontsize=6)
ax.set_xticks([0, 9])

ax.set_xlabel(r"$Q$~[\AA$^{-1}$]")
ax.set_ylabel(r"$I(Q)$~[a.u.]")

# plt.savefig("figures_xrd1dim/SI_xrd1dim_pure_phases_xrd_patterns.pdf", dpi=300, bbox_inches="tight")
plt.show()

### Compare Adam and LGBFS

In [None]:
results_LGBFS = read_data("../legacy/Legacy_Figures_v2/results/results_22-12-21_xrd1dim")

In [None]:
results_by_acqf_LGBFS = parse_results_by_acquisition_function(results_LGBFS)

In [None]:
acquisition_function_name_maps = {
    "ExpectedImprovement": "EI",
    "UpperConfidenceBound1": "UCB(1)",
    "UpperConfidenceBound10": "UCB(10)",
    "UpperConfidenceBound20": "UCB(20)",
    "UpperConfidenceBound100": "UCB(100)"
}

In [None]:
bins = 50

fig, axs = plt.subplots(2, len(acquisition_function_name_maps), figsize=(7, 2), sharex=True)

for ii, (key, value) in enumerate(acquisition_function_name_maps.items()):

    all_results_LGBFS = np.array([xx.data.X.squeeze() for xx in results_by_acqf_LGBFS[key]])
    all_results_Adam = np.array([xx.data.X.squeeze() for xx in results_by_acqf_Adam[key]])

    ax = axs[0, ii]
    ax.hist(all_results_LGBFS.flatten(), bins=bins)
    ax.set_title(value)
    
    ax = axs[1, ii]
    ax.hist(all_results_Adam.flatten(), bins=bins)

for ax in axs.flatten():
    utils.set_grids(ax)
    ax.set_yticks([])
    
axs[0, -1].text(1.1, 0.5, "LGBFS", ha="left", va="center", transform=axs[0, -1].transAxes, rotation=90)
axs[1, -1].text(1.1, 0.5, "Adam", ha="left", va="center", transform=axs[1, -1].transAxes, rotation=90)

ax = fig.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes
plt.tick_params(labelcolor='none', top='off', bottom='off', left='off', right='off')
ax.set_xticks([])
ax.set_yticks([])
# Use `labelpad=...` for padding when using ax.set_xlabel(...)
# or ax.set_ylabel(...)
ax.set_ylabel("Counts")
ax.set_xlabel(r"$x$~[a.u.]", labelpad=12)

plt.subplots_adjust(hspace=0.05, wspace=0.15)

# plt.savefig("SI_xrd1d_compare_Adam_LGBFS.pdf", dpi=300, bbox_inches="tight")
plt.show()

### Phase-resolve metrics

In [None]:
acquisition_function_name_maps = {
    "Linear": "Grid",
    "ExpectedImprovement": "EI",
    "UpperConfidenceBound1": "UCB(1)",
    "UpperConfidenceBound10": "UCB(10)",
    "UpperConfidenceBound20": "UCB(20)",
    "UpperConfidenceBound100": "UCB(100)"
}

In [None]:
metrics_grid = list(range(3, 251, 10))
linspace_points = 10000

In [None]:
path = cache / "xrd1dim_linear.pkl"
if not path.exists():
    print("Recalculating...")
    _m = xrd1dim_compute_metrics_all_acquisition_functions_and_LTB(
        results_by_acqf_Adam,
        metrics_grid=metrics_grid,
        metrics_grid_linear=metrics_grid,
        metric="mse",
        grid_points=linspace_points,
        disable_pbar=False,
        xmin=9.0,
        xmax=51.0,
    )
    all_metrics_linear = _m["metrics"]
    pickle.dump(all_metrics_linear, open(path, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
else:
    all_metrics_linear = pickle.load(open(path, "rb"))

In [None]:
path = cache / "xrd1dim_quad.pkl"
if not path.exists():
    print("Recalculating...")
    _m = xrd1dim_compute_metrics_all_acquisition_functions_and_LTB(
        results_by_acqf_Adam,
        metrics_grid=metrics_grid,
        metrics_grid_linear=metrics_grid,
        metric="mse",
        grid_points=linspace_points,
        disable_pbar=False,
        xmin=59.0,
        xmax=81.0,
    )
    all_metrics_quad = _m["metrics"]
    pickle.dump(all_metrics_quad, open(path, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
else:
    all_metrics_quad = pickle.load(open(path, "rb"))

In [None]:
path = cache / "xrd1dim_sharp.pkl"
if not path.exists():
    print("Recalculating...")
    _m = xrd1dim_compute_metrics_all_acquisition_functions_and_LTB(
        results_by_acqf_Adam,
        metrics_grid=metrics_grid,
        metrics_grid_linear=metrics_grid,
        metric="mse",
        grid_points=linspace_points,
        disable_pbar=False,
        xmin=88.0,
        xmax=92.0,
    )
    all_metrics_sharp = _m["metrics"]
    pickle.dump(all_metrics_sharp, open(path, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
else:
    all_metrics_sharp = pickle.load(open(path, "rb"))

In [None]:
only_plot = ["Grid", "EI", "UCB(1)", "UCB(10)", "UCB(20)", "UCB(100)"]

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(6, 3), sharex=True, sharey=True)

for ii, all_metrics in enumerate([all_metrics_linear, all_metrics_quad, all_metrics_sharp]):
    ax = axs[ii]
    utils.set_grids(ax)
    ax.tick_params(which="minor", bottom=False, top=False)
    ax.set_xticks([3, 50, 150, 250])

    for acquisition_function_name in acquisition_function_name_maps.keys():
        values = all_metrics[acquisition_function_name]
        label = acquisition_function_name_maps[acquisition_function_name]
        if only_plot is None or label in only_plot:
            v = np.log(values)
            mu = np.nanmean(v, axis=1)
            sd = np.nanstd(v, axis=1) * 2
            ax.plot(metrics_grid, mu, label=label)
            ax.fill_between(metrics_grid, mu - sd, mu + sd, linewidth=0, alpha=0.3)

axs[2].legend(frameon=False, bbox_to_anchor=(1, 0.5), loc="center left")
# axs[0].text(0.5, 0.95, r"$\mu \pm 2\sigma$", ha="center", va="top", transform=axs[0].transAxes)

axs[0].text(0.05, 0.05, r"(a)", ha="left", va="bottom", transform=axs[0].transAxes)
axs[1].text(0.05, 0.05, r"(b)", ha="left", va="bottom", transform=axs[1].transAxes)
axs[2].text(0.05, 0.05, r"(c)", ha="left", va="bottom", transform=axs[2].transAxes)

axs[0].set_title("linear")
axs[1].set_title("quadratic")
axs[2].set_title("sharp")

axs[1].set_xlabel(r"$N$")
axs[0].set_ylabel(r"$\ln$(MSE)")

# plt.savefig("figures_xrd1dim/SI_xrd1dim_phase_resolved_metric.pdf", dpi=300, bbox_inches="tight")
plt.show()

### Hist

In [None]:
acquisition_function_name_maps = {
    "ExpectedImprovement": "EI",
    "UpperConfidenceBound1": "UCB(1)",
    "UpperConfidenceBound10": "UCB(10)",
    "UpperConfidenceBound20": "UCB(20)",
    "UpperConfidenceBound100": "UCB(100)"
}

In [None]:
all_points = dict()
for acquisition_function_name, values in results_by_acqf_Adam.items():
    tmp_metrics = [exp.data.X for exp in values]
    all_points[acquisition_function_name] = np.array(tmp_metrics)

In [None]:
x_grid = np.linspace(0, 100, 1000)
phases = _get_1d_phase_fractions(x_grid).T

In [None]:
L = len(acquisition_function_name_maps) + 1

fig, axs = plt.subplots(L, 1, figsize=(3, L), sharex=True, sharey=False)


ax = axs[0]
ax.plot(x_grid, phases[:, 0], label="1")
ax.plot(x_grid, phases[:, 1], label="2")
ax.plot(x_grid, phases[:, 2], label="3")
ax.plot(x_grid, phases[:, 3], label="4")
axlims = ax.get_ylim()
# ax.legend(frameon=False, bbox_to_anchor=(1.0, 0.5), loc="center left")

ax.fill_betweenx(np.linspace(*axlims, 10), 10, 50, color="black", alpha=0.1, linewidth=0)
ax.fill_betweenx(np.linspace(*axlims, 10), 60, 80, color="black", alpha=0.1, linewidth=0)
ax.fill_betweenx(np.linspace(*axlims, 10), 88.5, 91.5, color="black", alpha=0.1, linewidth=0)

utils.set_grids(ax)
ax.set_ylabel("$p(x)$")
ax.set_ylim(*axlims)

for ii, acquisition_function_name in enumerate(acquisition_function_name_maps.keys()):
    
    ax = axs[ii + 1]
    
    value = all_points[acquisition_function_name].squeeze()
    all_results_Adam_n_resolved = [value[:, :nn].flatten() for nn in range(3, value.shape[1] + 1)]
    
    all_results_Adam_n_resolved_coordinates = []
    for ii, res in enumerate(all_results_Adam_n_resolved):
        n = len(res)
        coords = (np.ones(shape=(n,)) * ii).astype(int)
        arr = np.array([res, coords]).T
        all_results_Adam_n_resolved_coordinates.append(arr)
    all_results_Adam_n_resolved_coordinates = np.concatenate(all_results_Adam_n_resolved_coordinates, axis=0)
    all_results_Adam_n_resolved_coordinates[:, 1] += 3
    
    label = acquisition_function_name_maps[acquisition_function_name]
    
    ax.hist2d(
        all_results_Adam_n_resolved_coordinates[:, 0],
        all_results_Adam_n_resolved_coordinates[:, 1],
        bins=[100, 247], cmap="viridis", vmax=vmax, rasterized=True
    )
    
    utils.set_grids(ax)
    ax.set_yticks([3, 100, 250])
    ax.text(1.05, 0.5, label, ha="left", va="center", transform=ax.transAxes, rotation=90)
    

axs[5].set_xlabel("$x$~[a.u.]")
axs[3].set_ylabel(r"$N$")
axs[0].text(0.025, 0.5, "(a)", ha="left", va="center", transform=axs[0].transAxes)
axs[1].text(0.025, 0.5, "(b)", ha="left", va="center", transform=axs[1].transAxes, color="white")
axs[2].text(0.025, 0.5, "(c)", ha="left", va="center", transform=axs[2].transAxes, color="white")
axs[3].text(0.025, 0.5, "(d)", ha="left", va="center", transform=axs[3].transAxes, color="white")
axs[4].text(0.025, 0.5, "(e)", ha="left", va="center", transform=axs[4].transAxes, color="white")
axs[5].text(0.025, 0.5, "(f)", ha="left", va="center", transform=axs[5].transAxes, color="white")



plt.subplots_adjust(hspace=0.4, wspace=0.03)

# plt.savefig("SI_xrd1d_hist.pdf", dpi=300, bbox_inches="tight")
plt.show()

### Length scales

In [None]:
acquisition_function_name_maps = {
    "ExpectedImprovement": "EI",
    "UpperConfidenceBound1": "UCB(1)",
    "UpperConfidenceBound10": "UCB(10)",
    "UpperConfidenceBound20": "UCB(20)",
    "UpperConfidenceBound100": "UCB(100)"
}

In [None]:
all_points = dict()
for acquisition_function_name, values in results_by_acqf_Adam.items():
    tmp_metrics = [exp.data.X for exp in values]
    all_points[acquisition_function_name] = np.array(tmp_metrics)

In [None]:
n_grid = list(range(3, 251))

In [None]:
all_length_scales = dict()
for acquisition_function_name, values in all_points.items():
    tmp_jj = []
    
    # jj is the experiment index
    for jj in tqdm(range(300)):
        
        tmp_jj_ii = []
    
        ## ii is the n-points index
        for ii in n_grid:

            p = values.squeeze()[jj, :ii].reshape(-1, 1)
            X_dist = distance_matrix(p, p)
            distance = X_dist.copy()
            distance[distance == 0.0] = np.inf
            sd = distance.min(axis=1).mean()
            tmp_jj_ii.append(sd)
        tmp_jj.append(tmp_jj_ii)
    all_length_scales[acquisition_function_name] = np.array(tmp_jj)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(2, 1.5))

for acquisition_function_name, _ in acquisition_function_name_maps.items():
    values = all_length_scales[acquisition_function_name]
    mu = values.mean(axis=0)
    sd = values.std(axis=0)
    p = np.polyfit(np.log10(n_grid)[20:], np.log10(mu)[20:], deg=1)
    ax.plot(n_grid, mu, label=acquisition_function_name_maps[acquisition_function_name])
    ax.fill_between(n_grid, mu - sd, mu + sd, alpha=0.2)

    # ax.plot(n_grid, 10**(p[1]) * n_grid**p[0], "k--")

ax.set_yscale('log')
ax.set_xscale('log')
ax.set_ylabel(r"$\langle l \rangle$")
ax.set_xlabel(r"$N$")

utils.set_grids(ax)
# ax.text(0.1, 0.1, "(a) XRD1dim", ha="left", va="bottom", transform=ax.transAxes)

ax.legend(frameon=False, bbox_to_anchor=(1.05, 0.5), loc="center left")

# plt.savefig("figures_xrd1dim/SI_xrd1dim_sigma_with_N.pdf", bbox_inches="tight", dpi=300)
plt.show()