Merge pull request #186 from kiudee/183_graph_bug

Fix performance and optima plot glitches when reducing parameter ranges
kiudee · Feb 19, 2022 · a7ed5eb · a7ed5eb
2 parents 4cf3207 + 8e496b3
commit a7ed5eb
Show file tree

Hide file tree

Showing 6 changed files with 102 additions and 13 deletions.
diff --git a/tests/test_local.py b/tests/test_local.py
@@ -159,7 +159,10 @@ def test_initialize_data(tmp_path):
     noise_in = np.array([0.3, 0.2, 0.5])
     optima_in = np.array([[0.3]])
     performance_in = np.array([[2.0, 30.0, 20.0]])
-    np.savez_compressed(testfile, X_in, y_in, noise_in, optima_in, performance_in)
+    iteration_in = np.array([5])
+    np.savez_compressed(
+        testfile, X_in, y_in, noise_in, optima_in, performance_in, iteration_in
+    )
 
     # Check if resume=False is recognized correctly
     # (outputs should be empty despite data_path being given):
@@ -172,7 +175,7 @@ def test_initialize_data(tmp_path):
     X, y, noise, iteration, optima, performance = initialize_data(
         parameter_ranges=[(0.0, 1.0)], data_path=testfile, resume=True,
     )
-    assert iteration == 3
+    assert int(iteration) == 5
     assert np.allclose(X, X_in)
     assert np.allclose(y, y_in)
     assert np.allclose(noise, noise_in)
@@ -183,7 +186,7 @@ def test_initialize_data(tmp_path):
     X, y, noise, iteration, _, _ = initialize_data(
         parameter_ranges=[(0.0, 0.5)], data_path=testfile, resume=True,
     )
-    assert iteration == 2
+    assert int(iteration) == 5
     assert np.allclose(X, np.array([[0.0], [0.5]]))
     assert np.allclose(y, np.array([1.0, -1.0]))
     assert np.allclose(noise, np.array([0.3, 0.2]))

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -0,0 +1,30 @@
+"""Test utility functions of the project."""
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+
+from tune.utils import latest_iterations
+
+
+def test_latest_iterations():
+    iterations = np.array([1.0, 2.0, 3.0, 3.0, 4.0])
+    expected_indices = [0, 1, 3, 4]
+    result = latest_iterations(iterations)
+    assert len(result) == 1
+    assert_allclose(result, (iterations[expected_indices],))
+    array = np.array([0.0, 0.1, 0.2, 0.3, 0.4])
+    result = latest_iterations(iterations, array)
+    assert len(result) == 2
+    assert_allclose(result[0], iterations[expected_indices])
+    assert_allclose(result[1], array[expected_indices])
+
+    # Test if inconsistent lengths cause an exception
+    array = np.array([0.0, 0.1])
+    with pytest.raises(ValueError):
+        latest_iterations(iterations, array)
+
+    # Test an empty input:
+    iterations = np.array([])
+    result = latest_iterations(iterations)
+    assert len(result) == 1
+    assert_allclose(result, (iterations,))
diff --git a/tune/cli.py b/tune/cli.py
@@ -418,7 +418,7 @@ def local(  # noqa: C901
                         confidence=settings.get("confidence", confidence),
                     )
                     optima.append(current_optimum)
-                    performance.append([iteration, estimated_elo, estimated_std])
+                    performance.append((int(iteration), estimated_elo, estimated_std))
                 except ValueError:
                     pass
             plot_every_n = settings.get("plot_every", plot_every)
@@ -432,6 +432,7 @@ def local(  # noqa: C901
                     plot_path=settings.get("plot_path", plot_path),
                     parameter_names=list(param_ranges.keys()),
                     confidence=settings.get("confidence", confidence),
+                    current_iteration=iteration,
                 )
 
         # Ask optimizer for next point:
@@ -489,7 +490,7 @@ def local(  # noqa: C901
         X.append(point)
         y.append(score)
         noise.append(error_variance)
-        iteration = len(X)
+        iteration += 1
 
         with AtomicWriter(data_path, mode="wb", overwrite=True).open() as f:
             np.savez_compressed(
@@ -499,6 +500,7 @@ def local(  # noqa: C901
                 np.array(noise),
                 np.array(optima),
                 np.array(performance),
+                np.array(iteration),
             )
         with AtomicWriter(model_path, mode="wb", overwrite=True).open() as f:
             dill.dump(opt, f)

diff --git a/tune/local.py b/tune/local.py
@@ -268,6 +268,10 @@ def initialize_data(
                     optima = importa["arr_3"].tolist()
                 if "arr_4" in importa:
                     performance = importa["arr_4"].tolist()
+                if "arr_5" in importa:
+                    iteration = importa["arr_5"]
+                else:
+                    iteration = len(X)
             if len(X[0]) != space.n_dims:
                 raise ValueError(
                     f"Number of parameters ({len(X[0])}) are not matching "
@@ -292,7 +296,6 @@ def initialize_data(
                 X = X_reduced
                 y = y_reduced
                 noise = noise_reduced
-            iteration = len(X)
     return X, y, noise, iteration, optima, performance
 
 
@@ -524,6 +527,7 @@ def plot_results(
     plot_path: str,
     parameter_names: Sequence[str],
     confidence: float = 0.9,
+    current_iteration: Optional[int] = None,
 ) -> None:
     """Plot the current results of the optimizer.
 
@@ -545,12 +549,18 @@ def plot_results(
         Names of the parameters to use for plotting.
     confidence : float
         The confidence level of the normal distribution to plot in the 1d plot.
+    current_iteration : int, default=None
+        The current iteration of the optimization process.
+        If None, the current iteration is assumed to be the amount of points collected.
     """
     logger = logging.getLogger(LOGGER)
     logger.debug("Starting to compute the next plot.")
     timestr = time.strftime("%Y%m%d-%H%M%S")
     dark_gray = "#36393f"
 
+    if current_iteration is None:
+        current_iteration = len(optimizer.Xi)
+
     # First save the landscape:
     save_params = dict()
     if optimizer.space.n_dims == 1:
@@ -575,7 +585,7 @@ def plot_results(
     plotpath = pathlib.Path(plot_path)
     for subdir in ["landscapes", "elo", "optima"]:
         (plotpath / subdir).mkdir(parents=True, exist_ok=True)
-    full_plotpath = plotpath / f"landscapes/landscape-{timestr}-{len(optimizer.Xi)}.png"
+    full_plotpath = plotpath / f"landscapes/landscape-{timestr}-{current_iteration}.png"
     dpi = 150 if optimizer.space.n_dims == 1 else 300
     plt.savefig(full_plotpath, dpi=dpi, facecolor=dark_gray, **save_params)
     logger.info(f"Saving a plot to {full_plotpath}.")
@@ -588,15 +598,15 @@ def plot_results(
         space=optimizer.space,
         parameter_names=parameter_names,
     )
-    full_plotpath = plotpath / f"optima/optima-{timestr}-{len(optimizer.Xi)}.png"
+    full_plotpath = plotpath / f"optima/optima-{timestr}-{current_iteration}.png"
     fig.savefig(full_plotpath, dpi=150, facecolor=dark_gray)
     plt.close(fig)
 
     # Plot the predicted Elo performance of the optima:
     fig, ax = plot_performance(
         performance=np.hstack([iterations[:, None], elos]), confidence=confidence
     )
-    full_plotpath = plotpath / f"elo/elo-{timestr}-{len(optimizer.Xi)}.png"
+    full_plotpath = plotpath / f"elo/elo-{timestr}-{current_iteration}.png"
     fig.savefig(full_plotpath, dpi=150, facecolor=dark_gray)
     plt.close(fig)
 

diff --git a/tune/plots.py b/tune/plots.py
@@ -10,7 +10,7 @@
 from skopt.plots import _format_scatter_plot_axes
 from skopt.space import Space
 
-from tune.utils import confidence_to_mult, expected_ucb
+from tune.utils import confidence_to_mult, expected_ucb, latest_iterations
 
 __all__ = [
     "partial_dependence",
@@ -510,9 +510,10 @@ def plot_optima(
         - if the number of iterations is not matching the number of optima
         - if a fig, but no ax is passed
     """
-    n_points, n_parameters = optima.shape
-    if n_points != len(iterations):
+    if optima.shape[0] != len(iterations):
         raise ValueError("Iteration array does not match optima array.")
+    iterations, optima = latest_iterations(iterations, optima)
+    n_points, n_parameters = optima.shape
     if parameter_names is not None and len(parameter_names) != n_parameters:
         raise ValueError(
             "Number of parameter names does not match the number of parameters."
@@ -673,7 +674,7 @@ def plot_performance(
         - if the number of iterations is not matching the number of optima
         - if a fig, but no ax is passed
     """
-    iterations, elo, elo_std = performance.T
+    iterations, elo, elo_std = latest_iterations(*performance.T)
     if colors is None:
         colors = plt.cm.get_cmap("Set3").colors
     if fig is None:

diff --git a/tune/utils.py b/tune/utils.py
@@ -1,6 +1,7 @@
 import itertools
 from collections import namedtuple
 from decimal import Decimal
+from typing import Tuple
 
 import numpy as np
 from scipy.optimize import minimize
@@ -12,6 +13,7 @@
     "parse_timecontrol",
     "TimeControl",
     "TimeControlBag",
+    "latest_iterations",
 ]
 
 
@@ -138,3 +140,44 @@ def confidence_to_mult(confidence: float) -> float:
     if confidence < 0 or confidence > 1:
         raise ValueError("Confidence level must be in the range [0, 1].")
     return erfinv(confidence) * np.sqrt(2)
+
+
+def latest_iterations(
+    iterations: np.ndarray, *arrays: np.ndarray
+) -> Tuple[np.ndarray, ...]:
+    """Remove rows with duplicate iteration numbers and only keep the latest.
+
+    Example
+    -------
+    >>> iterations = np.array([1, 2, 3, 3, 5, 6])
+    >>> arrays = (np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]), )
+    >>> latest_iterations(iterations, *arrays)
+    (array([1, 2, 3, 5, 6]), array([0.1, 0.2, 0.4, 0.5, 0.6]))
+
+
+    Parameters
+    ----------
+    iterations: np.ndarray
+        The array containing the iteration numbers.
+    *arrays: np.ndarray
+        Additional arrays of the same length which correspond to the rows of data.
+
+    Returns
+    -------
+    Tuple[np.ndarray, ...]
+        The arrays with the duplicate rows removed.
+    """
+    # First check that all arrays have the same length
+    for array in arrays:
+        if array.shape[0] != iterations.shape[0]:
+            raise ValueError("Arrays must have the same length.")
+    unique_iterations = np.unique(iterations)
+    if len(unique_iterations) == len(iterations):
+        return (iterations, *arrays)
+    else:
+        # Compute the indices of the latest unique iterations:
+        indices = np.searchsorted(iterations, unique_iterations, side="right") - 1
+        return (
+            iterations[indices],
+            *(a[indices] for a in arrays),
+        )