Merge pull request #59 from neptune-ai/cli-exporter

cli exporter
neptune-ai · Jun 19, 2023 · 5d4c79f · 5d4c79f
2 parents 47dbb2a + df1f0d2
commit 5d4c79f
Show file tree

Hide file tree

Showing 7 changed files with 178 additions and 1 deletion.
diff --git a/.github/actions/e2e/action.yml b/.github/actions/e2e/action.yml
@@ -28,5 +28,5 @@ runs:
 
     - name: Run tests
       working-directory: ${{ inputs.working_directory }}
-      run: pytest -n auto -v
+      run: pytest -v
       shell: bash
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## (UNRELEASED) neptune-tensorboard 1.0.0.pre3
+
+### Changes
+- Add support to export existing Tensorboard logs via CLI plugin ([#59](https://github.com/neptune-ai/neptune-tensorboard/pull/59/))
+
 ## neptune-tensorboard 1.0.0.pre2
 
 ### Changes

diff --git a/pyproject.toml b/pyproject.toml
@@ -16,6 +16,7 @@ importlib-metadata = { version = "*", python = "<3.8" }
 tensorflow = { version = ">=2.0.0", optional = true }
 torch = { version = ">=1.9.0", optional = true }
 tensorboardX = { version = ">=2.2.0", optional = true }
+tbparse = { version = "*", optional = true }
 
 # dev
 pre-commit = { version = "*", optional = true }
@@ -99,3 +100,6 @@ force_grid_wrap = 2
 [tool.flake8]
 max-line-length = 120
 extend-ignore = "E203"
+
+[tool.poetry.plugins."neptune.plugins"]
+"tensorboard" = "neptune_tensorboard_plugin:sync"
diff --git a/src/neptune_tensorboard/sync/__init__.py b/src/neptune_tensorboard/sync/__init__.py
@@ -0,0 +1,3 @@
+__all__ = ["DataSync"]
+
+from neptune_tensorboard.sync.sync_impl import DataSync
diff --git a/src/neptune_tensorboard/sync/sync_impl.py b/src/neptune_tensorboard/sync/sync_impl.py
@@ -0,0 +1,98 @@
+import hashlib
+import pathlib
+import traceback
+
+import click
+import neptune
+from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
+
+try:
+    import tbparse
+except ModuleNotFoundError:
+    raise ModuleNotFoundError("neptune-tensorboard: require `tbparse` for exporting logs (pip install tbparse)")
+
+
+def compute_md5_hash(path):
+    return hashlib.md5(path.encode()).hexdigest()
+
+
+class DataSync:
+    def __init__(self, project, api_token, path):
+        self._project = project
+        self._api_token = api_token
+        self._path = path
+
+    def run(self):
+        # NOTE: Fetching custom_run_ids is not a trivial operation, so
+        #       we cache the custom_run_ids here.
+        self._existing_custom_run_ids = self._get_existing_neptune_custom_run_ids()
+        # Inspect if files correspond to EventFiles.
+        for path in pathlib.Path(self._path).glob("**/*tfevents*"):
+            try:
+                # methods below expect path to be str.
+                str_path = str(path)
+
+                # only try export for valid files i.e. files which EventAccumulator
+                # can actually read.
+                if self._is_valid_tf_event_file(str_path):
+                    self._export_to_neptune_run(str_path)
+            except Exception as e:
+                click.echo("Cannot load run from file '{}'. ".format(path) + "Error: " + str(e))
+                try:
+                    traceback.print_exc(e)
+                except:  # noqa: E722
+                    pass
+
+    def _is_valid_tf_event_file(self, path):
+        accumulator = EventAccumulator(path)
+        accumulator.Reload()
+        try:
+            accumulator.FirstEventTimestamp()
+        except ValueError:
+            return False
+        return True
+
+    def _get_existing_neptune_custom_run_ids(self):
+        with neptune.init_project(project=self._project, api_token=self._api_token) as project:
+            try:
+                return {run_id for run_id in project.fetch_runs_table().to_pandas()["sys/custom_run_id"].to_list()}
+            except KeyError:
+                # empty project
+                return set()
+
+    def _experiment_exists(self, hash_run_id, run_path):
+        return hash_run_id in self._existing_custom_run_ids
+
+    def _export_to_neptune_run(self, path):
+        # custom_run_id supports str with max length of 32.
+        hash_run_id = compute_md5_hash(path)
+
+        if self._experiment_exists(hash_run_id, self._project):
+            click.echo(f"{path} was already synced")
+            return
+
+        with neptune.init_run(custom_run_id=hash_run_id, project=self._project, api_token=self._api_token) as run:
+            run["tensorboard_path"] = path
+
+            namespace_handler = run["tensorboard"]
+
+            # parse events file
+            reader = tbparse.SummaryReader(path)
+
+            # Read scalars
+            for scalar in reader.scalars.itertuples():
+                namespace_handler["scalar"][scalar.tag].append(scalar.value)
+
+            # Read images (and figures)
+            for image in reader.images.itertuples():
+                namespace_handler["image"][image.tag].append(neptune.types.File.as_image(image.value))
+
+            # Read text
+            for text in reader.text.itertuples():
+                namespace_handler["text"][text.tag].append(text.value)
+
+            # Read hparams
+            for hparam in reader.hparams.itertuples():
+                namespace_handler["hparams"][hparam.tag].append(hparam.value)
+
+            click.echo(f"{path} was exported with run_id: {hash_run_id}")
diff --git a/src/neptune_tensorboard_plugin/__init__.py b/src/neptune_tensorboard_plugin/__init__.py
@@ -0,0 +1,18 @@
+import os
+
+import click
+
+
+@click.command("tensorboard")
+@click.option("--project", help="Neptune Project name")
+@click.option("--api_token", help="Neptune API token")
+@click.argument("log_dir", required=True)
+def sync(project, api_token, log_dir):
+    if not os.path.exists(log_dir):
+        click.echo("ERROR: Provided `log_dir` path doesn't exist", err=True)
+        return
+
+    # We do not want to import anything if process was executed for autocompletion purposes.
+    from neptune_tensorboard.sync import DataSync
+
+    DataSync(project, api_token, log_dir).run()
diff --git a/tests/test_exporter.py b/tests/test_exporter.py
@@ -0,0 +1,49 @@
+import hashlib
+import os
+import shutil
+import uuid
+import time
+
+import neptune
+import torch
+from tensorboardX.writer import SummaryWriter
+
+from neptune_tensorboard.sync.sync_impl import DataSync
+
+
+def test_exporter():
+    log_dir = str(uuid.uuid4())
+    writer = SummaryWriter(log_dir=log_dir)
+
+    writer.add_scalar("tensorboardX_scalar", 0.5)
+    writer.add_image("zero", torch.zeros(12, 12, 3), dataformats="HWC")
+    writer.add_images("zeros", torch.zeros(4, 12, 12, 3), dataformats="NHWC")
+    writer.add_text("my_text", "Hello World")
+    writer.add_text("my_text", "Hello World 2")
+
+    writer.flush()
+    writer.close()
+
+    DataSync(project=None, api_token=None, path=log_dir).run()
+
+    # let the data sync (we can't call run.sync() as we don't have
+    # access to run)
+    time.sleep(10)
+
+    for fname in os.listdir(log_dir):
+        path = os.path.join(log_dir, fname)
+        hash_run_id = hashlib.md5(path.encode()).hexdigest()
+        break
+
+    with neptune.init_project() as project:
+        runs_df = project.fetch_runs_table().to_pandas()
+        custom_run_id_map = dict(zip(runs_df["sys/custom_run_id"], runs_df["sys/id"]))
+        run_id = custom_run_id_map[hash_run_id]
+
+    with neptune.init_run(with_id=run_id) as run:
+        assert run.exists("tensorboard_path")
+        assert run.exists("tensorboard/image")
+        assert run.exists("tensorboard/scalar")
+        assert run.exists("tensorboard/text")
+
+    shutil.rmtree(log_dir)