Skip to content

Commit

Permalink
Merge pull request #186 from kiudee/183_graph_bug
Browse files Browse the repository at this point in the history
Fix performance and optima plot glitches when reducing parameter ranges
  • Loading branch information
kiudee committed Feb 19, 2022
2 parents 4cf3207 + 8e496b3 commit a7ed5eb
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 13 deletions.
9 changes: 6 additions & 3 deletions tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,10 @@ def test_initialize_data(tmp_path):
noise_in = np.array([0.3, 0.2, 0.5])
optima_in = np.array([[0.3]])
performance_in = np.array([[2.0, 30.0, 20.0]])
np.savez_compressed(testfile, X_in, y_in, noise_in, optima_in, performance_in)
iteration_in = np.array([5])
np.savez_compressed(
testfile, X_in, y_in, noise_in, optima_in, performance_in, iteration_in
)

# Check if resume=False is recognized correctly
# (outputs should be empty despite data_path being given):
Expand All @@ -172,7 +175,7 @@ def test_initialize_data(tmp_path):
X, y, noise, iteration, optima, performance = initialize_data(
parameter_ranges=[(0.0, 1.0)], data_path=testfile, resume=True,
)
assert iteration == 3
assert int(iteration) == 5
assert np.allclose(X, X_in)
assert np.allclose(y, y_in)
assert np.allclose(noise, noise_in)
Expand All @@ -183,7 +186,7 @@ def test_initialize_data(tmp_path):
X, y, noise, iteration, _, _ = initialize_data(
parameter_ranges=[(0.0, 0.5)], data_path=testfile, resume=True,
)
assert iteration == 2
assert int(iteration) == 5
assert np.allclose(X, np.array([[0.0], [0.5]]))
assert np.allclose(y, np.array([1.0, -1.0]))
assert np.allclose(noise, np.array([0.3, 0.2]))
Expand Down
30 changes: 30 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Test utility functions of the project."""
import numpy as np
import pytest
from numpy.testing import assert_allclose

from tune.utils import latest_iterations


def test_latest_iterations():
iterations = np.array([1.0, 2.0, 3.0, 3.0, 4.0])
expected_indices = [0, 1, 3, 4]
result = latest_iterations(iterations)
assert len(result) == 1
assert_allclose(result, (iterations[expected_indices],))
array = np.array([0.0, 0.1, 0.2, 0.3, 0.4])
result = latest_iterations(iterations, array)
assert len(result) == 2
assert_allclose(result[0], iterations[expected_indices])
assert_allclose(result[1], array[expected_indices])

# Test if inconsistent lengths cause an exception
array = np.array([0.0, 0.1])
with pytest.raises(ValueError):
latest_iterations(iterations, array)

# Test an empty input:
iterations = np.array([])
result = latest_iterations(iterations)
assert len(result) == 1
assert_allclose(result, (iterations,))
6 changes: 4 additions & 2 deletions tune/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def local( # noqa: C901
confidence=settings.get("confidence", confidence),
)
optima.append(current_optimum)
performance.append([iteration, estimated_elo, estimated_std])
performance.append((int(iteration), estimated_elo, estimated_std))
except ValueError:
pass
plot_every_n = settings.get("plot_every", plot_every)
Expand All @@ -432,6 +432,7 @@ def local( # noqa: C901
plot_path=settings.get("plot_path", plot_path),
parameter_names=list(param_ranges.keys()),
confidence=settings.get("confidence", confidence),
current_iteration=iteration,
)

# Ask optimizer for next point:
Expand Down Expand Up @@ -489,7 +490,7 @@ def local( # noqa: C901
X.append(point)
y.append(score)
noise.append(error_variance)
iteration = len(X)
iteration += 1

with AtomicWriter(data_path, mode="wb", overwrite=True).open() as f:
np.savez_compressed(
Expand All @@ -499,6 +500,7 @@ def local( # noqa: C901
np.array(noise),
np.array(optima),
np.array(performance),
np.array(iteration),
)
with AtomicWriter(model_path, mode="wb", overwrite=True).open() as f:
dill.dump(opt, f)
Expand Down
18 changes: 14 additions & 4 deletions tune/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,10 @@ def initialize_data(
optima = importa["arr_3"].tolist()
if "arr_4" in importa:
performance = importa["arr_4"].tolist()
if "arr_5" in importa:
iteration = importa["arr_5"]
else:
iteration = len(X)
if len(X[0]) != space.n_dims:
raise ValueError(
f"Number of parameters ({len(X[0])}) are not matching "
Expand All @@ -292,7 +296,6 @@ def initialize_data(
X = X_reduced
y = y_reduced
noise = noise_reduced
iteration = len(X)
return X, y, noise, iteration, optima, performance


Expand Down Expand Up @@ -524,6 +527,7 @@ def plot_results(
plot_path: str,
parameter_names: Sequence[str],
confidence: float = 0.9,
current_iteration: Optional[int] = None,
) -> None:
"""Plot the current results of the optimizer.
Expand All @@ -545,12 +549,18 @@ def plot_results(
Names of the parameters to use for plotting.
confidence : float
The confidence level of the normal distribution to plot in the 1d plot.
current_iteration : int, default=None
The current iteration of the optimization process.
If None, the current iteration is assumed to be the amount of points collected.
"""
logger = logging.getLogger(LOGGER)
logger.debug("Starting to compute the next plot.")
timestr = time.strftime("%Y%m%d-%H%M%S")
dark_gray = "#36393f"

if current_iteration is None:
current_iteration = len(optimizer.Xi)

# First save the landscape:
save_params = dict()
if optimizer.space.n_dims == 1:
Expand All @@ -575,7 +585,7 @@ def plot_results(
plotpath = pathlib.Path(plot_path)
for subdir in ["landscapes", "elo", "optima"]:
(plotpath / subdir).mkdir(parents=True, exist_ok=True)
full_plotpath = plotpath / f"landscapes/landscape-{timestr}-{len(optimizer.Xi)}.png"
full_plotpath = plotpath / f"landscapes/landscape-{timestr}-{current_iteration}.png"
dpi = 150 if optimizer.space.n_dims == 1 else 300
plt.savefig(full_plotpath, dpi=dpi, facecolor=dark_gray, **save_params)
logger.info(f"Saving a plot to {full_plotpath}.")
Expand All @@ -588,15 +598,15 @@ def plot_results(
space=optimizer.space,
parameter_names=parameter_names,
)
full_plotpath = plotpath / f"optima/optima-{timestr}-{len(optimizer.Xi)}.png"
full_plotpath = plotpath / f"optima/optima-{timestr}-{current_iteration}.png"
fig.savefig(full_plotpath, dpi=150, facecolor=dark_gray)
plt.close(fig)

# Plot the predicted Elo performance of the optima:
fig, ax = plot_performance(
performance=np.hstack([iterations[:, None], elos]), confidence=confidence
)
full_plotpath = plotpath / f"elo/elo-{timestr}-{len(optimizer.Xi)}.png"
full_plotpath = plotpath / f"elo/elo-{timestr}-{current_iteration}.png"
fig.savefig(full_plotpath, dpi=150, facecolor=dark_gray)
plt.close(fig)

Expand Down
9 changes: 5 additions & 4 deletions tune/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from skopt.plots import _format_scatter_plot_axes
from skopt.space import Space

from tune.utils import confidence_to_mult, expected_ucb
from tune.utils import confidence_to_mult, expected_ucb, latest_iterations

__all__ = [
"partial_dependence",
Expand Down Expand Up @@ -510,9 +510,10 @@ def plot_optima(
- if the number of iterations is not matching the number of optima
- if a fig, but no ax is passed
"""
n_points, n_parameters = optima.shape
if n_points != len(iterations):
if optima.shape[0] != len(iterations):
raise ValueError("Iteration array does not match optima array.")
iterations, optima = latest_iterations(iterations, optima)
n_points, n_parameters = optima.shape
if parameter_names is not None and len(parameter_names) != n_parameters:
raise ValueError(
"Number of parameter names does not match the number of parameters."
Expand Down Expand Up @@ -673,7 +674,7 @@ def plot_performance(
- if the number of iterations is not matching the number of optima
- if a fig, but no ax is passed
"""
iterations, elo, elo_std = performance.T
iterations, elo, elo_std = latest_iterations(*performance.T)
if colors is None:
colors = plt.cm.get_cmap("Set3").colors
if fig is None:
Expand Down
43 changes: 43 additions & 0 deletions tune/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import itertools
from collections import namedtuple
from decimal import Decimal
from typing import Tuple

import numpy as np
from scipy.optimize import minimize
Expand All @@ -12,6 +13,7 @@
"parse_timecontrol",
"TimeControl",
"TimeControlBag",
"latest_iterations",
]


Expand Down Expand Up @@ -138,3 +140,44 @@ def confidence_to_mult(confidence: float) -> float:
if confidence < 0 or confidence > 1:
raise ValueError("Confidence level must be in the range [0, 1].")
return erfinv(confidence) * np.sqrt(2)


def latest_iterations(
iterations: np.ndarray, *arrays: np.ndarray
) -> Tuple[np.ndarray, ...]:
"""Remove rows with duplicate iteration numbers and only keep the latest.
Example
-------
>>> iterations = np.array([1, 2, 3, 3, 5, 6])
>>> arrays = (np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]), )
>>> latest_iterations(iterations, *arrays)
(array([1, 2, 3, 5, 6]), array([0.1, 0.2, 0.4, 0.5, 0.6]))
Parameters
----------
iterations: np.ndarray
The array containing the iteration numbers.
*arrays: np.ndarray
Additional arrays of the same length which correspond to the rows of data.
Returns
-------
Tuple[np.ndarray, ...]
The arrays with the duplicate rows removed.
"""
# First check that all arrays have the same length
for array in arrays:
if array.shape[0] != iterations.shape[0]:
raise ValueError("Arrays must have the same length.")
unique_iterations = np.unique(iterations)
if len(unique_iterations) == len(iterations):
return (iterations, *arrays)
else:
# Compute the indices of the latest unique iterations:
indices = np.searchsorted(iterations, unique_iterations, side="right") - 1
return (
iterations[indices],
*(a[indices] for a in arrays),
)

0 comments on commit a7ed5eb

Please sign in to comment.