diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 620488301a..2272188618 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,7 @@ New Metrics New Trackers ~~~~~~~~~~~~ - Console Tracker (https://github.com/pykeen/pykeen/pull/440) +- Tensorboard Tracker (https://github.com/pykeen/pykeen/pull/416; thanks @sbonner0) New Models ~~~~~~~~~~ diff --git a/README.md b/README.md index 86957e1b3f..a52caf9cf1 100644 --- a/README.md +++ b/README.md @@ -244,16 +244,17 @@ or the URL for the dataset if neither of the first two are available. | Mean Reciprocal Rank (MRR) | The inverse of the harmonic mean over all ranks, on (0, 1]. Higher is better. | | Median Rank | The median over all ranks, on [1, inf). Lower is better. | -### Trackers (6) - -| Name | Reference | Description | -|---------|---------------------------------------------------------------------------------------------------------------------------------|------------------------------------------| -| console | [`pykeen.trackers.ConsoleResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.ConsoleResultTracker.html) | A class that directly prints to console. | -| csv | [`pykeen.trackers.CSVResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.CSVResultTracker.html) | Tracking results to a CSV file. | -| json | [`pykeen.trackers.JSONResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.JSONResultTracker.html) | Tracking results to a JSON lines file. | -| mlflow | [`pykeen.trackers.MLFlowResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.MLFlowResultTracker.html) | A tracker for MLflow. | -| neptune | [`pykeen.trackers.NeptuneResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.NeptuneResultTracker.html) | A tracker for Neptune.ai. | -| wandb | [`pykeen.trackers.WANDBResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.WANDBResultTracker.html) | A tracker for Weights and Biases. | +### Trackers (7) + +| Name | Reference | Description | +|-------------|-----------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------| +| console | [`pykeen.trackers.ConsoleResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.ConsoleResultTracker.html) | A class that directly prints to console. | +| csv | [`pykeen.trackers.CSVResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.CSVResultTracker.html) | Tracking results to a CSV file. | +| json | [`pykeen.trackers.JSONResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.JSONResultTracker.html) | Tracking results to a JSON lines file. | +| mlflow | [`pykeen.trackers.MLFlowResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.MLFlowResultTracker.html) | A tracker for MLflow. | +| neptune | [`pykeen.trackers.NeptuneResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.NeptuneResultTracker.html) | A tracker for Neptune.ai. | +| tensorboard | [`pykeen.trackers.TensorBoardResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.TensorBoardResultTracker.html) | A tracker for TensorBoard. | +| wandb | [`pykeen.trackers.WANDBResultTracker`](https://pykeen.readthedocs.io/en/latest/api/pykeen.trackers.WANDBResultTracker.html) | A tracker for Weights and Biases. | ## Hyper-parameter Optimization diff --git a/docs/source/installation.rst b/docs/source/installation.rst index f01fe37f72..7029f7704c 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -108,12 +108,13 @@ of the ``setup.cfg``. They can be included with installation using the bracket n ``pip install pykeen[docs]`` or ``pip install -e .[docs]``. Several can be listed, comma-delimited like in ``pip install pykeen[docs,plotting]``. -============== ======================================================= -Name Description -============== ======================================================= -``plotting`` Plotting with ``seaborn`` and generation of word clouds -``mlflow`` Tracking of results with ``mlflow`` -``wandb`` Tracking of results with ``wandb`` -``docs`` Building of the documentation -``templating`` Building of templated documentation, like the README -============== ======================================================= +=============== ============================================================================== +Name Description +=============== ============================================================================== +``plotting`` Plotting with ``seaborn`` and generation of word clouds +``mlflow`` Tracking of results with ``mlflow`` +``wandb`` Tracking of results with ``wandb`` +``tensorboard`` Tracking of results with :mod:`tensorboard` via :mod:`torch.utils.tensorboard` +``docs`` Building of the documentation +``templating`` Building of templated documentation, like the README +=============== ============================================================================== diff --git a/docs/source/tutorial/trackers/index.rst b/docs/source/tutorial/trackers/index.rst index 252516c31d..52d74064de 100644 --- a/docs/source/tutorial/trackers/index.rst +++ b/docs/source/tutorial/trackers/index.rst @@ -6,4 +6,5 @@ Tracking Results during Training using_mlflow using_neptune using_wandb + using_tensorboard using_file diff --git a/docs/source/tutorial/trackers/using_tensorboard.rst b/docs/source/tutorial/trackers/using_tensorboard.rst new file mode 100644 index 0000000000..1b9dbf0d60 --- /dev/null +++ b/docs/source/tutorial/trackers/using_tensorboard.rst @@ -0,0 +1,110 @@ +Using Tensorboard +================= +`Tensorboard `_ is a service for tracking experimental results +during or after training. It is part of the larger Tensorflow project but can be used independently of it. + +Installing Tensorboard +---------------------- +The :mod:`tensorboard` package can either be installed directly with ``pip install tensorboard`` +or with PyKEEN by using the ``tensorboard`` extra in ``pip install pykeen[tensorboard]``. + +.. note:: + + Tensorboard logs can created without actually installing tensorboard itself. + However, if you want to view and interact with the data created via the tracker, it must be installed. + +Starting Tensorboard +-------------------- +The :mod:`tensorboard` web application can be started from the command line with + +.. code-block:: shell + + $ tensorboard --logdir=~/.data/pykeen/logs/tensorboard/ + +where the value passed to the ``--logdir`` is location of log directory. By default, PyKEEN logs to +``~/.data/pykeen/logs/tensorboard/``, but this is configurable. +The Tensorboard can then be accessed via a browser at: http://localhost:6006/ + +.. note:: + + It is not required for the Tensorboard process to be running while the training is happening. Indeed, + it only needs to be started once you want to interact with and view the logs. It can be stopped at any + time and the logs will persist in the filesystem. + +Minimal Pipeline Example +------------------------ +The tensorboard tracker can be used during training with the :func:`pykeen.pipeline.pipeline` as follows: + +.. code-block:: python + + from pykeen.pipeline import pipeline + + pipeline_result = pipeline( + model='RotatE', + dataset='Kinships', + result_tracker='tensorboard', + ) + +It is placed in a subdirectory of :mod:`pystow` default data directory of PyKEEN called ``tensorboard``, +which will likely be at ``~/.data/pykeen/logs/tensorboard`` on your system. The file is named based on the +current time if no alternative is provided. + +Specifying a Log Name +--------------------- +If you want to specify the name of the log file in the default directory, use the ``experiment_name`` keyword +argument like: + +.. code-block:: python + + from pykeen.pipeline import pipeline + + pipeline_result = pipeline( + model='RotatE', + dataset='Kinships', + result_tracker='tensorboard', + result_tracker_kwargs=dict( + experiment_name='rotate-kinships', + ), + ) + +Specifying a Custom Log Directory +--------------------------------- +If you want to specify a custom directory to store the tensorboard logs, use the ``experiment_path`` keyword +argument like: + +.. code-block:: python + + from pykeen.pipeline import pipeline + + pipeline_result = pipeline( + model='RotatE', + dataset='Kinships', + result_tracker='tensorboard', + result_tracker_kwargs=dict( + experiment_path='tb-logs/rotate-kinships', + ), + ) + +.. warning:: + + Please be aware that if you re-run an experiment using the same directory, then the logs will be combined. + It is advisable to use a unique sub-directory for each experiment to allow for easy comparison. + +Minimal HPO Pipeline Example +---------------------------- +Tensorboard tracking can also be used in conjunction with a HPO pipeline as follows: + +.. code-block:: python + + from pykeen.pipeline import pipeline + + hpo_pipeline_result = hpo_pipeline( + n_trials=30, + dataset='Nations', + model='TransE', + result_tracker='tensorboard', + ) + +This provides a way to compare directly between different trails and parameter configurations. Please not that it +is recommended to leave the experiment name as the default value here to allow for a directory to be created per +trail. diff --git a/setup.cfg b/setup.cfg index a3b065abac..ef38c189f6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -91,6 +91,8 @@ wandb = wandb neptune = neptune-client +tensorboard = + tensorboard tests = unittest-templates>=0.0.5 coverage diff --git a/src/pykeen/trackers/__init__.py b/src/pykeen/trackers/__init__.py index 37987cd457..613cd2c8cd 100644 --- a/src/pykeen/trackers/__init__.py +++ b/src/pykeen/trackers/__init__.py @@ -8,6 +8,7 @@ from .file import CSVResultTracker, FileResultTracker, JSONResultTracker from .mlflow import MLFlowResultTracker from .neptune import NeptuneResultTracker +from .tensorboard import TensorBoardResultTracker from .wandb import WANDBResultTracker __all__ = [ @@ -20,6 +21,7 @@ 'WANDBResultTracker', 'JSONResultTracker', 'CSVResultTracker', + 'TensorBoardResultTracker', 'ConsoleResultTracker', # Utilities 'tracker_resolver', diff --git a/src/pykeen/trackers/tensorboard.py b/src/pykeen/trackers/tensorboard.py new file mode 100644 index 0000000000..56949b6780 --- /dev/null +++ b/src/pykeen/trackers/tensorboard.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +"""An adapter for TensorBoard.""" + +import pathlib +import time +from typing import Any, Dict, Mapping, Optional, TYPE_CHECKING, Union + +from .base import ResultTracker +from ..constants import PYKEEN_LOGS +from ..utils import flatten_dictionary + +if TYPE_CHECKING: + import torch.utils.tensorboard + +__all__ = [ + 'TensorBoardResultTracker', +] + + +class TensorBoardResultTracker(ResultTracker): + """A tracker for TensorBoard.""" + + summary_writer: 'torch.utils.tensorboard.SummaryWriter' + path: pathlib.Path + + def __init__( + self, + experiment_path: Union[None, str, pathlib.Path] = None, + experiment_name: Optional[str] = None, + ): + """ + Initialize result tracking via Tensorboard. + + :param experiment_path: + The experiment path. A custom path at which the tensorboard logs will be saved. + :param experiment_name: + The name of the experiment, will be used as a sub directory name for the logging. If no default is given, + the current time is used. If set, experiment_path is set, this argument has no effect. + """ + import torch.utils.tensorboard + + if isinstance(experiment_path, str): + path = pathlib.Path(experiment_path) + elif isinstance(experiment_path, pathlib.Path): + path = experiment_path + else: + if experiment_name is None: + experiment_name = time.strftime('%Y-%m-%d-%H-%M-%S') + path = PYKEEN_LOGS.joinpath("tensorboard", experiment_name) + + # if we really need access to the path later, we can expose it as a property + # via self.writer.log_dir + self.writer = torch.utils.tensorboard.SummaryWriter(log_dir=path.resolve()) + + def log_metrics( + self, + metrics: Mapping[str, float], + step: Optional[int] = None, + prefix: Optional[str] = None, + ) -> None: # noqa: D102 + metrics = flatten_dictionary(dictionary=metrics, prefix=prefix) + for key, value in metrics.items(): + self.writer.add_scalar(tag=key, scalar_value=value, global_step=step) + self.writer.flush() + + def log_params(self, params: Mapping[str, Any], prefix: Optional[str] = None) -> None: # noqa: D102 + params = flatten_dictionary(dictionary=params, prefix=prefix) + for key, value in params.items(): + self.writer.add_text(tag=str(key), text_string=str(value)) + self.writer.flush() + + def end_run(self) -> None: # noqa: D102 + self.writer.flush() + self.writer.close() diff --git a/tests/test_trackers.py b/tests/test_trackers.py index f3229d4641..7e459016a9 100644 --- a/tests/test_trackers.py +++ b/tests/test_trackers.py @@ -2,6 +2,7 @@ """Tests for result trackers.""" +from pykeen.trackers import TensorBoardResultTracker from pykeen.trackers.base import ConsoleResultTracker from pykeen.trackers.file import CSVResultTracker, JSONResultTracker from tests import cases @@ -23,3 +24,9 @@ class ConsoleResultTrackerTests(cases.ResultTrackerTests): """Tests for console tracker.""" cls = ConsoleResultTracker + + +class TensorboardTrackerTests(cases.ResultTrackerTests): + """Tests for console tracker.""" + + cls = TensorBoardResultTracker