Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/spotoptim/optimizer/steady_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,23 @@ def _batch_ready() -> bool:
optimizer._update_storage_steady(xi, yi)
optimizer.n_iter_ += 1

# TensorBoard: this result loop runs only in the
# parent main thread (workers carry tb_writer=None,
# search threads never touch the writer), so the
# single SummaryWriter needs no lock. update_stats()
# refreshes ``counter`` (the TB step) which the
# steady-state loop otherwise never advances; it is
# kept inside the guard so the no-TB path stays
# byte-identical. Mirrors the sequential per-eval
# logging in the main optimize loop.
if optimizer.tb_writer is not None:
optimizer.update_stats()
for xi, yi in zip(X_done, y_done):
optimizer._write_tensorboard_hparams(
np.asarray(xi, dtype=float), float(yi)
)
optimizer._write_tensorboard_scalars()

if optimizer.verbose:
if optimizer.max_time != np.inf:
prog_val = (
Expand Down
67 changes: 67 additions & 0 deletions tests/test_tensorboard_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
#
# SPDX-License-Identifier: AGPL-3.0-or-later

import os

import numpy as np
import pytest
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

from spotoptim import SpotOptim


Expand All @@ -16,6 +20,22 @@ def dummy_func(X):
return np.sum(X**2, axis=1)


def _count_scalar_steps(logdir, tag="success_rate"):
"""Return the number of distinct steps logged for a scalar tag.

add_scalar events land in the run root; add_hparams events land in
per-call subdirectories, so every event file under ``logdir`` is read.
"""
steps = set()
for root, _dirs, files in os.walk(str(logdir)):
if any(f.startswith("events.out.tfevents") for f in files):
acc = EventAccumulator(root)
acc.Reload()
if tag in acc.Tags().get("scalars", []):
steps.update(e.step for e in acc.Scalars(tag))
return len(steps)


def test_tensorboard_enabled_in_parallel(capsys):
"""Test that TensorBoard is ENABLED when n_jobs > 1 (steady-state)."""
opt = SpotOptim(
Expand Down Expand Up @@ -57,6 +77,53 @@ def test_tensorboard_enabled_in_parallel(capsys):
opt.optimize()


def test_parallel_logs_infill_evals(tmp_path):
"""Steady-state parallel runs log scalars beyond the initial design.

Regression test for the gap where workers carry ``tb_writer=None`` and
the parent result loop never wrote per-eval scalars, so parallel runs
logged only the initial design (one step).
"""
n_initial, max_iter = 4, 12
path = str(tmp_path / "tb_parallel")
opt = SpotOptim(
fun=dummy_func,
bounds=[(-5, 5)],
n_initial=n_initial,
max_iter=max_iter,
n_jobs=2,
tensorboard_log=True,
tensorboard_path=path,
seed=0,
verbose=False,
)
opt.optimize()

assert len(opt.y_) > n_initial, "expected infill evaluations beyond n_initial"
# Before the fix, success_rate appeared at a single step (initial design
# only). With per-eval parent-side logging it advances with each batch.
assert _count_scalar_steps(path, "success_rate") > 1


def test_parallel_no_tensorboard_regression(tmp_path):
"""tensorboard_log=False parallel run is unaffected: no writer, no runs dir."""
opt = SpotOptim(
fun=dummy_func,
bounds=[(-5, 5)],
n_initial=4,
max_iter=10,
n_jobs=2,
tensorboard_log=False,
seed=0,
verbose=False,
)
res = opt.optimize()

assert opt.tb_writer is None
assert res.success is True
assert not (tmp_path / "runs").exists()


def test_tensorboard_enabled_in_sequential():
"""Test that TensorBoard IS enabled when n_jobs = 1."""
opt = SpotOptim(
Expand Down
Loading