diff --git a/src/spotoptim/optimizer/steady_state.py b/src/spotoptim/optimizer/steady_state.py index f113195..d7c60a7 100644 --- a/src/spotoptim/optimizer/steady_state.py +++ b/src/spotoptim/optimizer/steady_state.py @@ -322,6 +322,23 @@ def _batch_ready() -> bool: optimizer._update_storage_steady(xi, yi) optimizer.n_iter_ += 1 + # TensorBoard: this result loop runs only in the + # parent main thread (workers carry tb_writer=None, + # search threads never touch the writer), so the + # single SummaryWriter needs no lock. update_stats() + # refreshes ``counter`` (the TB step) which the + # steady-state loop otherwise never advances; it is + # kept inside the guard so the no-TB path stays + # byte-identical. Mirrors the sequential per-eval + # logging in the main optimize loop. + if optimizer.tb_writer is not None: + optimizer.update_stats() + for xi, yi in zip(X_done, y_done): + optimizer._write_tensorboard_hparams( + np.asarray(xi, dtype=float), float(yi) + ) + optimizer._write_tensorboard_scalars() + if optimizer.verbose: if optimizer.max_time != np.inf: prog_val = ( diff --git a/tests/test_tensorboard_parallel.py b/tests/test_tensorboard_parallel.py index d60faf4..3ba372a 100644 --- a/tests/test_tensorboard_parallel.py +++ b/tests/test_tensorboard_parallel.py @@ -2,8 +2,12 @@ # # SPDX-License-Identifier: AGPL-3.0-or-later +import os + import numpy as np import pytest +from tensorboard.backend.event_processing.event_accumulator import EventAccumulator + from spotoptim import SpotOptim @@ -16,6 +20,22 @@ def dummy_func(X): return np.sum(X**2, axis=1) +def _count_scalar_steps(logdir, tag="success_rate"): + """Return the number of distinct steps logged for a scalar tag. + + add_scalar events land in the run root; add_hparams events land in + per-call subdirectories, so every event file under ``logdir`` is read. + """ + steps = set() + for root, _dirs, files in os.walk(str(logdir)): + if any(f.startswith("events.out.tfevents") for f in files): + acc = EventAccumulator(root) + acc.Reload() + if tag in acc.Tags().get("scalars", []): + steps.update(e.step for e in acc.Scalars(tag)) + return len(steps) + + def test_tensorboard_enabled_in_parallel(capsys): """Test that TensorBoard is ENABLED when n_jobs > 1 (steady-state).""" opt = SpotOptim( @@ -57,6 +77,53 @@ def test_tensorboard_enabled_in_parallel(capsys): opt.optimize() +def test_parallel_logs_infill_evals(tmp_path): + """Steady-state parallel runs log scalars beyond the initial design. + + Regression test for the gap where workers carry ``tb_writer=None`` and + the parent result loop never wrote per-eval scalars, so parallel runs + logged only the initial design (one step). + """ + n_initial, max_iter = 4, 12 + path = str(tmp_path / "tb_parallel") + opt = SpotOptim( + fun=dummy_func, + bounds=[(-5, 5)], + n_initial=n_initial, + max_iter=max_iter, + n_jobs=2, + tensorboard_log=True, + tensorboard_path=path, + seed=0, + verbose=False, + ) + opt.optimize() + + assert len(opt.y_) > n_initial, "expected infill evaluations beyond n_initial" + # Before the fix, success_rate appeared at a single step (initial design + # only). With per-eval parent-side logging it advances with each batch. + assert _count_scalar_steps(path, "success_rate") > 1 + + +def test_parallel_no_tensorboard_regression(tmp_path): + """tensorboard_log=False parallel run is unaffected: no writer, no runs dir.""" + opt = SpotOptim( + fun=dummy_func, + bounds=[(-5, 5)], + n_initial=4, + max_iter=10, + n_jobs=2, + tensorboard_log=False, + seed=0, + verbose=False, + ) + res = opt.optimize() + + assert opt.tb_writer is None + assert res.success is True + assert not (tmp_path / "runs").exists() + + def test_tensorboard_enabled_in_sequential(): """Test that TensorBoard IS enabled when n_jobs = 1.""" opt = SpotOptim(