diff --git a/pyproject.toml b/pyproject.toml index bf287089..82bcf8af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,9 @@ dependencies = [ "funcy", "gto", "ruamel.yaml", - "scmrepo>=3,<4" + "scmrepo>=3,<4", + "psutil", + "pynvml" ] [project.optional-dependencies] @@ -51,7 +53,9 @@ tests = [ "pytest-cov>=3.0.0,<4.0", "pytest-mock>=3.8.2,<4.0", "dvclive[image,plots,markdown]", - "ipython" + "ipython", + "pytest_voluptuous", + "dpath" ] dev = [ "dvclive[all,tests]", diff --git a/src/dvclive/live.py b/src/dvclive/live.py index 0cc0b17a..0116a3f5 100644 --- a/src/dvclive/live.py +++ b/src/dvclive/live.py @@ -7,9 +7,11 @@ import os import shutil import tempfile + from pathlib import Path, PurePath from typing import Any, Dict, List, Optional, Set, Tuple, Union, TYPE_CHECKING, Literal + if TYPE_CHECKING: import numpy as np import pandas as pd @@ -41,6 +43,7 @@ from .report import BLANK_NOTEBOOK_REPORT, make_report from .serialize import dump_json, dump_yaml, load_yaml from .studio import get_dvc_studio_config, post_to_studio +from .monitor_system import _SystemMonitor from .utils import ( StrPath, catch_and_warn, @@ -81,6 +84,7 @@ def __init__( cache_images: bool = False, exp_name: Optional[str] = None, exp_message: Optional[str] = None, + monitor_system: bool = False, ): """ Initializes a DVCLive logger. A `Live()` instance is required in order to log @@ -119,6 +123,8 @@ def __init__( provided string will be passed to `dvc exp save --message`. If DVCLive is used inside `dvc exp run`, the option will be ignored, use `dvc exp run --message` instead. + monitor_system (bool): if `True`, DVCLive will monitor GPU, CPU, ram, and + disk usage. Defaults to `False`. """ self.summary: Dict[str, Any] = {} @@ -165,6 +171,10 @@ def __init__( self._dvc_studio_config: Dict[str, Any] = {} self._init_studio() + self._system_monitor: Optional[_SystemMonitor] = None # Monitoring thread + if monitor_system: + self.monitor_system() + def _init_resume(self): self._read_params() self.summary = self.read_latest() @@ -370,6 +380,43 @@ def step(self, value: int) -> None: self._step = value logger.debug(f"Step: {self.step}") + def monitor_system( + self, + interval: float = 0.05, # seconds + num_samples: int = 20, + directories_to_monitor: Optional[Dict[str, str]] = None, + ) -> None: + """Monitor GPU, CPU, ram, and disk resources and log them to DVC Live. + + Args: + interval (float): the time interval between samples in seconds. To keep the + sampling interval small, the maximum value allowed is 0.1 seconds. + Default to 0.05. + num_samples (int): the number of samples to collect before the aggregation. + The value should be between 1 and 30 samples. Default to 20. + directories_to_monitor (Optional[Dict[str, str]]): a dictionary with the + information about which directories to monitor. The `key` would be the + name of the metric and the `value` is the path to the directory. + The metric tracked concerns the partition that contains the directory. + Default to `{"main": "/"}`. + + Raises: + ValueError: if the keys in `directories_to_monitor` contains invalid + characters as defined by `os.path.normpath`. + """ + if directories_to_monitor is None: + directories_to_monitor = {"main": "/"} + + if self._system_monitor is not None: + self._system_monitor.end() + + self._system_monitor = _SystemMonitor( + live=self, + interval=interval, + num_samples=num_samples, + directories_to_monitor=directories_to_monitor, + ) + def sync(self): self.make_summary() @@ -857,6 +904,11 @@ def end(self): # If next_step called before end, don't want to update step number if "step" in self.summary: self.step = self.summary["step"] + + # Kill threads that monitor the system metrics + if self._system_monitor is not None: + self._system_monitor.end() + self.sync() if self._inside_dvc_exp and self._dvc_repo: diff --git a/src/dvclive/monitor_system.py b/src/dvclive/monitor_system.py new file mode 100644 index 00000000..f6d26d83 --- /dev/null +++ b/src/dvclive/monitor_system.py @@ -0,0 +1,240 @@ +import logging +import os +from typing import Dict, Union, Tuple + +import psutil +from statistics import mean +from threading import Event, Thread +from funcy import merge_with + +try: + from pynvml import ( + nvmlInit, + nvmlDeviceGetCount, + nvmlDeviceGetHandleByIndex, + nvmlDeviceGetMemoryInfo, + nvmlDeviceGetUtilizationRates, + nvmlShutdown, + NVMLError, + ) + + GPU_AVAILABLE = True +except ImportError: + GPU_AVAILABLE = False + +logger = logging.getLogger("dvclive") +GIGABYTES_DIVIDER = 1024.0**3 + +MINIMUM_CPU_USAGE_TO_BE_ACTIVE = 20 + +METRIC_CPU_COUNT = "system/cpu/count" +METRIC_CPU_USAGE_PERCENT = "system/cpu/usage (%)" +METRIC_CPU_PARALLELIZATION_PERCENT = "system/cpu/parallelization (%)" + +METRIC_RAM_USAGE_PERCENT = "system/ram/usage (%)" +METRIC_RAM_USAGE_GB = "system/ram/usage (GB)" +METRIC_RAM_TOTAL_GB = "system/ram/total (GB)" + +METRIC_DISK_USAGE_PERCENT = "system/disk/usage (%)" +METRIC_DISK_USAGE_GB = "system/disk/usage (GB)" +METRIC_DISK_TOTAL_GB = "system/disk/total (GB)" + +METRIC_GPU_COUNT = "system/gpu/count" +METRIC_GPU_USAGE_PERCENT = "system/gpu/usage (%)" +METRIC_VRAM_USAGE_PERCENT = "system/vram/usage (%)" +METRIC_VRAM_USAGE_GB = "system/vram/usage (GB)" +METRIC_VRAM_TOTAL_GB = "system/vram/total (GB)" + + +class _SystemMonitor: + _plot_blacklist_prefix: Tuple = ( + METRIC_CPU_COUNT, + METRIC_RAM_TOTAL_GB, + METRIC_DISK_TOTAL_GB, + METRIC_GPU_COUNT, + METRIC_VRAM_TOTAL_GB, + ) + + def __init__( + self, + live, + interval: float, # seconds + num_samples: int, + directories_to_monitor: Dict[str, str], + ): + self._live = live + self._interval = self._check_interval(interval, max_interval=0.1) + self._num_samples = self._check_num_samples( + num_samples, min_num_samples=1, max_num_samples=30 + ) + self._disks_to_monitor = self._check_directories_to_monitor( + directories_to_monitor + ) + self._warn_cpu_problem = True + self._warn_gpu_problem = True + self._warn_disk_doesnt_exist: Dict[str, bool] = {} + + self._shutdown_event = Event() + Thread( + target=self._monitoring_loop, + ).start() + + def _check_interval(self, interval: float, max_interval: float) -> float: + if interval > max_interval: + logger.warning( + f"System monitoring `interval` should be less than {max_interval} " + f"seconds. Setting `interval` to {max_interval} seconds." + ) + return max_interval + return interval + + def _check_num_samples( + self, num_samples: int, min_num_samples: int, max_num_samples: int + ) -> int: + min_num_samples = 1 + max_num_samples = 30 + if not min_num_samples < num_samples < max_num_samples: + num_samples = max(min(num_samples, max_num_samples), min_num_samples) + logger.warning( + f"System monitoring `num_samples` should be between {min_num_samples} " + f"and {max_num_samples}. Setting `num_samples` to {num_samples}." + ) + return num_samples + + def _check_directories_to_monitor( + self, directories_to_monitor: Dict[str, str] + ) -> Dict[str, str]: + disks_to_monitor = {} + for disk_name, disk_path in directories_to_monitor.items(): + if disk_name != os.path.normpath(disk_name): + raise ValueError( # noqa: TRY003 + "Keys for `directories_to_monitor` should be a valid name" + f", but got '{disk_name}'." + ) + disks_to_monitor[disk_name] = disk_path + return disks_to_monitor + + def _monitoring_loop(self): + while not self._shutdown_event.is_set(): + self._metrics = {} + for _ in range(self._num_samples): + try: + last_metrics = self._get_metrics() + except psutil.Error: + if self._warn_cpu_problem: + logger.exception("Failed to monitor CPU metrics") + self._warn_cpu_problem = False + except NVMLError: + if self._warn_gpu_problem: + logger.exception("Failed to monitor GPU metrics") + self._warn_gpu_problem = False + + self._metrics = merge_with(sum, self._metrics, last_metrics) + self._shutdown_event.wait(self._interval) + if self._shutdown_event.is_set(): + break + for name, values in self._metrics.items(): + blacklisted = any( + name.startswith(prefix) for prefix in self._plot_blacklist_prefix + ) + self._live.log_metric( + name, + values / self._num_samples, + timestamp=True, + plot=None if blacklisted else True, + ) + + def _get_metrics(self) -> Dict[str, Union[float, int]]: + return { + **self._get_gpu_info(), + **self._get_cpu_info(), + **self._get_ram_info(), + **self._get_disk_info(), + } + + def _get_ram_info(self) -> Dict[str, Union[float, int]]: + ram_info = psutil.virtual_memory() + return { + METRIC_RAM_USAGE_PERCENT: ram_info.percent, + METRIC_RAM_USAGE_GB: ram_info.used / GIGABYTES_DIVIDER, + METRIC_RAM_TOTAL_GB: ram_info.total / GIGABYTES_DIVIDER, + } + + def _get_cpu_info(self) -> Dict[str, Union[float, int]]: + num_cpus = psutil.cpu_count() + cpus_percent = psutil.cpu_percent(percpu=True) + return { + METRIC_CPU_COUNT: num_cpus, + METRIC_CPU_USAGE_PERCENT: mean(cpus_percent), + METRIC_CPU_PARALLELIZATION_PERCENT: len( + [ + percent + for percent in cpus_percent + if percent >= MINIMUM_CPU_USAGE_TO_BE_ACTIVE + ] + ) + * 100 + / num_cpus, + } + + def _get_disk_info(self) -> Dict[str, Union[float, int]]: + result = {} + for disk_name, disk_path in self._disks_to_monitor.items(): + try: + disk_info = psutil.disk_usage(disk_path) + except OSError: + if self._warn_disk_doesnt_exist.get(disk_name, True): + logger.warning( + f"Couldn't find directory '{disk_path}', ignoring it." + ) + self._warn_disk_doesnt_exist[disk_name] = False + continue + disk_metrics = { + f"{METRIC_DISK_USAGE_PERCENT}/{disk_name}": disk_info.percent, + f"{METRIC_DISK_USAGE_GB}/{disk_name}": disk_info.used + / GIGABYTES_DIVIDER, + f"{METRIC_DISK_TOTAL_GB}/{disk_name}": disk_info.total + / GIGABYTES_DIVIDER, + } + disk_metrics = {k.rstrip("/"): v for k, v in disk_metrics.items()} + result.update(disk_metrics) + return result + + def _get_gpu_info(self) -> Dict[str, Union[float, int]]: + if not GPU_AVAILABLE: + return {} + + nvmlInit() + num_gpus = nvmlDeviceGetCount() + gpu_metrics = { + "system/gpu/count": num_gpus, + } + + for gpu_idx in range(num_gpus): + gpu_handle = nvmlDeviceGetHandleByIndex(gpu_idx) + memory_info = nvmlDeviceGetMemoryInfo(gpu_handle) + usage_info = nvmlDeviceGetUtilizationRates(gpu_handle) + + gpu_metrics.update( + { + f"{METRIC_GPU_USAGE_PERCENT}/{gpu_idx}": ( + 100 * usage_info.memory / usage_info.gpu + if usage_info.gpu + else 0 + ), + f"{METRIC_VRAM_USAGE_PERCENT}/{gpu_idx}": ( + 100 * memory_info.used / memory_info.total + ), + f"{METRIC_VRAM_USAGE_GB}/{gpu_idx}": ( + memory_info.used / GIGABYTES_DIVIDER + ), + f"{METRIC_VRAM_TOTAL_GB}/{gpu_idx}": ( + memory_info.total / GIGABYTES_DIVIDER + ), + } + ) + nvmlShutdown() + return gpu_metrics + + def end(self): + self._shutdown_event.set() diff --git a/tests/test_monitor_system.py b/tests/test_monitor_system.py new file mode 100644 index 00000000..d704f00b --- /dev/null +++ b/tests/test_monitor_system.py @@ -0,0 +1,333 @@ +import time +from pathlib import Path +import pytest + +import dpath +from pytest_voluptuous import S + +from dvclive import Live +from dvclive.monitor_system import ( + _SystemMonitor, + METRIC_CPU_COUNT, + METRIC_CPU_USAGE_PERCENT, + METRIC_CPU_PARALLELIZATION_PERCENT, + METRIC_RAM_USAGE_PERCENT, + METRIC_RAM_USAGE_GB, + METRIC_RAM_TOTAL_GB, + METRIC_DISK_USAGE_PERCENT, + METRIC_DISK_USAGE_GB, + METRIC_DISK_TOTAL_GB, + METRIC_GPU_COUNT, + METRIC_GPU_USAGE_PERCENT, + METRIC_VRAM_USAGE_PERCENT, + METRIC_VRAM_USAGE_GB, + METRIC_VRAM_TOTAL_GB, + GIGABYTES_DIVIDER, +) +from dvclive.utils import parse_metrics + + +def mock_psutil_cpu(mocker): + mocker.patch( + "dvclive.monitor_system.psutil.cpu_percent", + return_value=[10, 10, 10, 40, 50, 60], + ) + mocker.patch("dvclive.monitor_system.psutil.cpu_count", return_value=6) + + +def mock_psutil_ram(mocker): + mocked_ram = mocker.MagicMock() + mocked_ram.percent = 50 + mocked_ram.used = 2 * GIGABYTES_DIVIDER + mocked_ram.total = 4 * GIGABYTES_DIVIDER + mocker.patch( + "dvclive.monitor_system.psutil.virtual_memory", return_value=mocked_ram + ) + + +def mock_psutil_disk(mocker): + mocked_disk = mocker.MagicMock() + mocked_disk.percent = 50 + mocked_disk.used = 16 * GIGABYTES_DIVIDER + mocked_disk.total = 32 * GIGABYTES_DIVIDER + mocker.patch("dvclive.monitor_system.psutil.disk_usage", return_value=mocked_disk) + + +def mock_psutil_disk_with_oserror(mocker): + mocked_disk = mocker.MagicMock() + mocked_disk.percent = 50 + mocked_disk.used = 16 * GIGABYTES_DIVIDER + mocked_disk.total = 32 * GIGABYTES_DIVIDER + mocker.patch( + "dvclive.monitor_system.psutil.disk_usage", + side_effect=[ + mocked_disk, + OSError, + mocked_disk, + OSError, + ], + ) + + +def mock_pynvml(mocker, num_gpus=2): + prefix = "dvclive.monitor_system" + mocker.patch(f"{prefix}.GPU_AVAILABLE", bool(num_gpus)) + mocker.patch(f"{prefix}.nvmlDeviceGetCount", return_value=num_gpus) + mocker.patch(f"{prefix}.nvmlInit", return_value=None) + mocker.patch(f"{prefix}.nvmlShutdown", return_value=None) + mocker.patch(f"{prefix}.nvmlDeviceGetHandleByIndex", return_value=None) + + vram_info = mocker.MagicMock() + vram_info.used = 3 * 1024**3 + vram_info.total = 6 * 1024**3 + + gpu_usage = mocker.MagicMock() + gpu_usage.memory = 5 + gpu_usage.gpu = 10 + + mocker.patch(f"{prefix}.nvmlDeviceGetMemoryInfo", return_value=vram_info) + mocker.patch(f"{prefix}.nvmlDeviceGetUtilizationRates", return_value=gpu_usage) + + +@pytest.fixture() +def cpu_metrics(): + content = { + METRIC_CPU_COUNT: 6, + METRIC_CPU_USAGE_PERCENT: 30.0, + METRIC_CPU_PARALLELIZATION_PERCENT: 50.0, + METRIC_RAM_USAGE_PERCENT: 50.0, + METRIC_RAM_USAGE_GB: 2.0, + METRIC_RAM_TOTAL_GB: 4.0, + f"{METRIC_DISK_USAGE_PERCENT}/main": 50.0, + f"{METRIC_DISK_USAGE_GB}/main": 16.0, + f"{METRIC_DISK_TOTAL_GB}/main": 32.0, + } + result = {} + for name, value in content.items(): + dpath.new(result, name, value) + return result + + +def _timeserie_schema(name, value): + return [{name: str(value), "timestamp": str, "step": "0"}] + + +@pytest.fixture() +def cpu_timeseries(): + return { + f"{METRIC_CPU_USAGE_PERCENT}.tsv": _timeserie_schema( + METRIC_CPU_USAGE_PERCENT.split("/")[-1], 30.0 + ), + f"{METRIC_CPU_PARALLELIZATION_PERCENT}.tsv": _timeserie_schema( + METRIC_CPU_PARALLELIZATION_PERCENT.split("/")[-1], 50.0 + ), + f"{METRIC_RAM_USAGE_PERCENT}.tsv": _timeserie_schema( + METRIC_RAM_USAGE_PERCENT.split("/")[-1], 50.0 + ), + f"{METRIC_RAM_USAGE_GB}.tsv": _timeserie_schema( + METRIC_RAM_USAGE_GB.split("/")[-1], 2.0 + ), + f"{METRIC_DISK_USAGE_PERCENT}/main.tsv": _timeserie_schema("main", 50.0), + f"{METRIC_DISK_USAGE_GB}/main.tsv": _timeserie_schema("main", 16.0), + } + + +@pytest.fixture() +def gpu_timeseries(): + return { + f"{METRIC_GPU_USAGE_PERCENT}/0.tsv": _timeserie_schema("0", 50.0), + f"{METRIC_GPU_USAGE_PERCENT}/1.tsv": _timeserie_schema("1", 50.0), + f"{METRIC_VRAM_USAGE_PERCENT}/0.tsv": _timeserie_schema("0", 50.0), + f"{METRIC_VRAM_USAGE_PERCENT}/1.tsv": _timeserie_schema("1", 50.0), + f"{METRIC_VRAM_USAGE_GB}/0.tsv": _timeserie_schema("0", 3.0), + f"{METRIC_VRAM_USAGE_GB}/1.tsv": _timeserie_schema("1", 3.0), + } + + +def test_monitor_system_is_false(tmp_dir, mocker): + mock_psutil_cpu(mocker) + mock_psutil_ram(mocker) + mock_psutil_disk(mocker) + mock_pynvml(mocker, num_gpus=0) + system_monitor_mock = mocker.patch( + "dvclive.live._SystemMonitor", spec=_SystemMonitor + ) + Live(tmp_dir, save_dvc_exp=False, monitor_system=False) + system_monitor_mock.assert_not_called() + + +def test_monitor_system_is_true(tmp_dir, mocker): + mock_psutil_cpu(mocker) + mock_psutil_ram(mocker) + mock_psutil_disk(mocker) + mock_pynvml(mocker, num_gpus=0) + system_monitor_mock = mocker.patch( + "dvclive.live._SystemMonitor", spec=_SystemMonitor + ) + + Live(tmp_dir, save_dvc_exp=False, monitor_system=True) + system_monitor_mock.assert_called_once() + + +def test_all_threads_close(tmp_dir, mocker): + mock_psutil_cpu(mocker) + mock_psutil_ram(mocker) + mock_psutil_disk(mocker) + mock_pynvml(mocker, num_gpus=0) + + with Live( + tmp_dir, + save_dvc_exp=False, + monitor_system=True, + ) as live: + first_end_spy = mocker.spy(live._system_monitor, "end") + first_end_spy.assert_not_called() + + live.monitor_system(interval=0.01) + first_end_spy.assert_called_once() + + second_end_spy = mocker.spy(live._system_monitor, "end") + + # check the monitoring thread is stopped + second_end_spy.assert_called_once() + + +def test_ignore_non_existent_directories(tmp_dir, mocker): + mock_psutil_cpu(mocker) + mock_psutil_ram(mocker) + mock_psutil_disk_with_oserror(mocker) + mock_pynvml(mocker, num_gpus=0) + with Live( + tmp_dir, + save_dvc_exp=False, + monitor_system=False, + ) as live: + non_existent_disk = "/non-existent" + system_monitor = _SystemMonitor( + live=live, + interval=0.1, + num_samples=4, + directories_to_monitor={"main": "/", "non-existent": non_existent_disk}, + ) + metrics = system_monitor._get_metrics() + system_monitor.end() + + assert not Path(non_existent_disk).exists() + + assert f"{METRIC_DISK_USAGE_PERCENT}/non-existent" not in metrics + assert f"{METRIC_DISK_USAGE_GB}/non-existent" not in metrics + assert f"{METRIC_DISK_TOTAL_GB}/non-existent" not in metrics + + +@pytest.mark.timeout(2) +def test_monitor_system_metrics(tmp_dir, cpu_metrics, mocker): + mock_psutil_cpu(mocker) + mock_psutil_ram(mocker) + mock_psutil_disk(mocker) + mock_pynvml(mocker, num_gpus=0) + with Live( + tmp_dir, + save_dvc_exp=False, + monitor_system=False, + ) as live: + live.monitor_system(interval=0.05, num_samples=4) + # wait for the metrics to be logged. + # METRIC_DISK_TOTAL_GB is the last metric to be logged. + while len(dpath.search(live.summary, METRIC_DISK_TOTAL_GB)) == 0: + time.sleep(0.001) + live.next_step() + + _, latest = parse_metrics(live) + + schema = {"step": 0, **cpu_metrics} + assert latest == S(schema) + + +@pytest.mark.timeout(2) +def test_monitor_system_timeseries(tmp_dir, cpu_timeseries, mocker): + mock_psutil_cpu(mocker) + mock_psutil_ram(mocker) + mock_psutil_disk(mocker) + mock_pynvml(mocker, num_gpus=0) + with Live( + tmp_dir, + save_dvc_exp=False, + monitor_system=False, + ) as live: + live.monitor_system(interval=0.05, num_samples=4) + + # wait for the metrics to be logged. + # METRIC_DISK_TOTAL_GB is the last metric to be logged. + while len(dpath.search(live.summary, METRIC_DISK_TOTAL_GB)) == 0: + time.sleep(0.001) + + live.next_step() + + timeseries, _ = parse_metrics(live) + + prefix = Path(tmp_dir) / "plots/metrics" + schema = {str(prefix / name): value for name, value in cpu_timeseries.items()} + assert timeseries == S(schema) + + +@pytest.mark.timeout(2) +def test_monitor_system_metrics_with_gpu(tmp_dir, cpu_metrics, mocker): + mock_psutil_cpu(mocker) + mock_psutil_ram(mocker) + mock_psutil_disk(mocker) + mock_pynvml(mocker, num_gpus=2) + with Live( + tmp_dir, + save_dvc_exp=False, + monitor_system=False, + ) as live: + live.monitor_system(interval=0.05, num_samples=4) + # wait for the metrics to be logged. + # METRIC_DISK_TOTAL_GB is the last metric to be logged. + while len(dpath.search(live.summary, METRIC_DISK_TOTAL_GB)) == 0: + time.sleep(0.001) + live.next_step() + + _, latest = parse_metrics(live) + + schema = {"step": 0, **cpu_metrics} + gpu_content = { + METRIC_GPU_COUNT: 2, + f"{METRIC_GPU_USAGE_PERCENT}": {"0": 50.0, "1": 50.0}, + f"{METRIC_VRAM_USAGE_PERCENT}": {"0": 50.0, "1": 50.0}, + f"{METRIC_VRAM_USAGE_GB}": {"0": 3.0, "1": 3.0}, + f"{METRIC_VRAM_TOTAL_GB}": {"0": 6.0, "1": 6.0}, + } + for name, value in gpu_content.items(): + dpath.new(schema, name, value) + assert latest == S(schema) + + +@pytest.mark.timeout(2) +def test_monitor_system_timeseries_with_gpu( + tmp_dir, cpu_timeseries, gpu_timeseries, mocker +): + mock_psutil_cpu(mocker) + mock_psutil_ram(mocker) + mock_psutil_disk(mocker) + mock_pynvml(mocker, num_gpus=2) + with Live( + tmp_dir, + save_dvc_exp=False, + monitor_system=False, + ) as live: + live.monitor_system(interval=0.05, num_samples=4) + + # wait for the metrics to be logged. + # METRIC_DISK_TOTAL_GB is the last metric to be logged. + while len(dpath.search(live.summary, METRIC_DISK_TOTAL_GB)) == 0: + time.sleep(0.001) + + live.next_step() + + timeseries, _ = parse_metrics(live) + + prefix = Path(tmp_dir) / "plots/metrics" + schema = {str(prefix / name): value for name, value in cpu_timeseries.items()} + schema.update({str(prefix / name): value for name, value in gpu_timeseries.items()}) + assert timeseries == S(schema)