[tune] tbx logger (#6133)

* tbx * add_hparams * fix_hparams * ok * ok * fix * ok * fix
ray-project · Nov 15, 2019 · 62cbc04 · 62cbc04
1 parent 8ff393a
commit 62cbc04
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 4 deletions.
diff --git a/doc/source/tune-usage.rst b/doc/source/tune-usage.rst
@@ -598,13 +598,29 @@ You can pass in your own logging mechanisms to output logs in custom formats as
     from ray.tune.logger import DEFAULT_LOGGERS
 
     tune.run(
-        MyTrainableClass
+        MyTrainableClass,
         name="experiment_name",
         loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2)
     )
 
 These loggers will be called along with the default Tune loggers. All loggers must inherit the `Logger interface <tune-package-ref.html#ray.tune.logger.Logger>`__. Tune enables default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/logger.py>`__ for implementation details. An example can be found in `logging_example.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__.
 
+.. warning:: If you run into issues for TensorBoard logging, consider using the TensorBoardX Logger (``from ray.tune.logger import TBXLogger``)
+
+TBXLogger (TensorboardX)
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tune provides a logger using `TensorBoardX <https://github.com/lanpa/tensorboardX>`_. You can install tensorboardX via ``pip install tensorboardX``. This logger automatically outputs loggers similar to the default TensorFlow logging format but is nice if you are undergoing a TF1 to TF2 transition. By default, it will log any scalar value provided via the result dictionary along with HParams information.
+
+.. code-block:: python
+
+    from ray.tune.logger import TBXLogger
+
+    tune.run(
+        MyTrainableClass,
+        name="experiment_name",
+        loggers=[TBXLogger]
+    )
 
 MLFlow
 ~~~~~~

diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile
@@ -14,6 +14,6 @@ RUN pip install -U h5py  # Mutes FutureWarnings
 RUN pip install --upgrade bayesian-optimization
 RUN pip install --upgrade hyperopt==0.1.2
 RUN pip install ConfigSpace==0.4.10
-RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost torch torchvision
+RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost torch torchvision tensorboardX
 RUN pip install -U tabulate mlflow
 RUN pip install -U pytest-remotedata>=0.3.1
diff --git a/docker/tune_test/Dockerfile b/docker/tune_test/Dockerfile
@@ -17,7 +17,7 @@ RUN pip install gym[atari]==0.10.11 opencv-python-headless tensorflow lz4 keras
 RUN pip install --upgrade bayesian-optimization
 RUN pip install --upgrade hyperopt==0.1.2
 RUN pip install ConfigSpace==0.4.10
-RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost
+RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost tensorboardX
 RUN pip install -U mlflow
 RUN pip install -U pytest-remotedata>=0.3.1
 

diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py
@@ -316,6 +316,63 @@ def close(self):
         self._file.close()
 
 
+class TBXLogger(Logger):
+    """TensorBoardX Logger.
+
+    Automatically flattens nested dicts to show on TensorBoard:
+
+        {"a": {"b": 1, "c": 2}} -> {"a/b": 1, "a/c": 2}
+    """
+
+    def _init(self):
+        try:
+            from tensorboardX import SummaryWriter
+        except ImportError:
+            logger.error("pip install tensorboardX to see TensorBoard files.")
+            raise
+        self._file_writer = SummaryWriter(self.logdir, flush_secs=30)
+        self.last_result = None
+
+    def on_result(self, result):
+        step = result.get(TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]
+
+        tmp = result.copy()
+        for k in [
+                "config", "pid", "timestamp", TIME_TOTAL_S, TRAINING_ITERATION
+        ]:
+            if k in tmp:
+                del tmp[k]  # not useful to log these
+
+        flat_result = flatten_dict(tmp, delimiter="/")
+        path = ["ray", "tune"]
+        valid_result = {
+            "/".join(path + [attr]): value
+            for attr, value in flat_result.items()
+            if type(value) in VALID_SUMMARY_TYPES
+        }
+
+        for attr, value in valid_result.items():
+            self._file_writer.add_scalar(attr, value, global_step=step)
+        self.last_result = valid_result
+        self._file_writer.flush()
+
+    def flush(self):
+        if self._file_writer is not None:
+            self._file_writer.flush()
+
+    def close(self):
+        if self._file_writer is not None:
+            if self.trial and self.trial.evaluated_params and self.last_result:
+                from tensorboardX.summary import hparams
+                experiment_tag, session_start_tag, session_end_tag = hparams(
+                    hparam_dict=self.trial.evaluated_params,
+                    metric_dict=self.last_result)
+                self._file_writer.file_writer.add_summary(experiment_tag)
+                self._file_writer.file_writer.add_summary(session_start_tag)
+                self._file_writer.file_writer.add_summary(session_end_tag)
+            self._file_writer.close()
+
+
 DEFAULT_LOGGERS = (JsonLogger, CSVLogger, tf2_compat_logger)
 
 

diff --git a/python/ray/tune/tests/test_logger.py b/python/ray/tune/tests/test_logger.py
@@ -7,7 +7,7 @@
 import tempfile
 import shutil
 
-from ray.tune.logger import tf2_compat_logger, JsonLogger, CSVLogger
+from ray.tune.logger import tf2_compat_logger, JsonLogger, CSVLogger, TBXLogger
 
 Trial = namedtuple("MockTrial", ["evaluated_params", "trial_id"])
 
@@ -54,6 +54,14 @@ def testJSON(self):
         logger.on_result(result(2, 4))
         logger.close()
 
+    def testTBX(self):
+        config = {"a": 2, "b": 5}
+        t = Trial(evaluated_params=config, trial_id="tbx")
+        logger = TBXLogger(config=config, logdir=self.test_dir, trial=t)
+        logger.on_result(result(2, 4))
+        logger.on_result(result(2, 4))
+        logger.close()
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)