-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #59 from neptune-ai/cli-exporter
cli exporter
- Loading branch information
Showing
7 changed files
with
178 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
__all__ = ["DataSync"] | ||
|
||
from neptune_tensorboard.sync.sync_impl import DataSync |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import hashlib | ||
import pathlib | ||
import traceback | ||
|
||
import click | ||
import neptune | ||
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator | ||
|
||
try: | ||
import tbparse | ||
except ModuleNotFoundError: | ||
raise ModuleNotFoundError("neptune-tensorboard: require `tbparse` for exporting logs (pip install tbparse)") | ||
|
||
|
||
def compute_md5_hash(path): | ||
return hashlib.md5(path.encode()).hexdigest() | ||
|
||
|
||
class DataSync: | ||
def __init__(self, project, api_token, path): | ||
self._project = project | ||
self._api_token = api_token | ||
self._path = path | ||
|
||
def run(self): | ||
# NOTE: Fetching custom_run_ids is not a trivial operation, so | ||
# we cache the custom_run_ids here. | ||
self._existing_custom_run_ids = self._get_existing_neptune_custom_run_ids() | ||
# Inspect if files correspond to EventFiles. | ||
for path in pathlib.Path(self._path).glob("**/*tfevents*"): | ||
try: | ||
# methods below expect path to be str. | ||
str_path = str(path) | ||
|
||
# only try export for valid files i.e. files which EventAccumulator | ||
# can actually read. | ||
if self._is_valid_tf_event_file(str_path): | ||
self._export_to_neptune_run(str_path) | ||
except Exception as e: | ||
click.echo("Cannot load run from file '{}'. ".format(path) + "Error: " + str(e)) | ||
try: | ||
traceback.print_exc(e) | ||
except: # noqa: E722 | ||
pass | ||
|
||
def _is_valid_tf_event_file(self, path): | ||
accumulator = EventAccumulator(path) | ||
accumulator.Reload() | ||
try: | ||
accumulator.FirstEventTimestamp() | ||
except ValueError: | ||
return False | ||
return True | ||
|
||
def _get_existing_neptune_custom_run_ids(self): | ||
with neptune.init_project(project=self._project, api_token=self._api_token) as project: | ||
try: | ||
return {run_id for run_id in project.fetch_runs_table().to_pandas()["sys/custom_run_id"].to_list()} | ||
except KeyError: | ||
# empty project | ||
return set() | ||
|
||
def _experiment_exists(self, hash_run_id, run_path): | ||
return hash_run_id in self._existing_custom_run_ids | ||
|
||
def _export_to_neptune_run(self, path): | ||
# custom_run_id supports str with max length of 32. | ||
hash_run_id = compute_md5_hash(path) | ||
|
||
if self._experiment_exists(hash_run_id, self._project): | ||
click.echo(f"{path} was already synced") | ||
return | ||
|
||
with neptune.init_run(custom_run_id=hash_run_id, project=self._project, api_token=self._api_token) as run: | ||
run["tensorboard_path"] = path | ||
|
||
namespace_handler = run["tensorboard"] | ||
|
||
# parse events file | ||
reader = tbparse.SummaryReader(path) | ||
|
||
# Read scalars | ||
for scalar in reader.scalars.itertuples(): | ||
namespace_handler["scalar"][scalar.tag].append(scalar.value) | ||
|
||
# Read images (and figures) | ||
for image in reader.images.itertuples(): | ||
namespace_handler["image"][image.tag].append(neptune.types.File.as_image(image.value)) | ||
|
||
# Read text | ||
for text in reader.text.itertuples(): | ||
namespace_handler["text"][text.tag].append(text.value) | ||
|
||
# Read hparams | ||
for hparam in reader.hparams.itertuples(): | ||
namespace_handler["hparams"][hparam.tag].append(hparam.value) | ||
|
||
click.echo(f"{path} was exported with run_id: {hash_run_id}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import os | ||
|
||
import click | ||
|
||
|
||
@click.command("tensorboard") | ||
@click.option("--project", help="Neptune Project name") | ||
@click.option("--api_token", help="Neptune API token") | ||
@click.argument("log_dir", required=True) | ||
def sync(project, api_token, log_dir): | ||
if not os.path.exists(log_dir): | ||
click.echo("ERROR: Provided `log_dir` path doesn't exist", err=True) | ||
return | ||
|
||
# We do not want to import anything if process was executed for autocompletion purposes. | ||
from neptune_tensorboard.sync import DataSync | ||
|
||
DataSync(project, api_token, log_dir).run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import hashlib | ||
import os | ||
import shutil | ||
import uuid | ||
import time | ||
|
||
import neptune | ||
import torch | ||
from tensorboardX.writer import SummaryWriter | ||
|
||
from neptune_tensorboard.sync.sync_impl import DataSync | ||
|
||
|
||
def test_exporter(): | ||
log_dir = str(uuid.uuid4()) | ||
writer = SummaryWriter(log_dir=log_dir) | ||
|
||
writer.add_scalar("tensorboardX_scalar", 0.5) | ||
writer.add_image("zero", torch.zeros(12, 12, 3), dataformats="HWC") | ||
writer.add_images("zeros", torch.zeros(4, 12, 12, 3), dataformats="NHWC") | ||
writer.add_text("my_text", "Hello World") | ||
writer.add_text("my_text", "Hello World 2") | ||
|
||
writer.flush() | ||
writer.close() | ||
|
||
DataSync(project=None, api_token=None, path=log_dir).run() | ||
|
||
# let the data sync (we can't call run.sync() as we don't have | ||
# access to run) | ||
time.sleep(10) | ||
|
||
for fname in os.listdir(log_dir): | ||
path = os.path.join(log_dir, fname) | ||
hash_run_id = hashlib.md5(path.encode()).hexdigest() | ||
break | ||
|
||
with neptune.init_project() as project: | ||
runs_df = project.fetch_runs_table().to_pandas() | ||
custom_run_id_map = dict(zip(runs_df["sys/custom_run_id"], runs_df["sys/id"])) | ||
run_id = custom_run_id_map[hash_run_id] | ||
|
||
with neptune.init_run(with_id=run_id) as run: | ||
assert run.exists("tensorboard_path") | ||
assert run.exists("tensorboard/image") | ||
assert run.exists("tensorboard/scalar") | ||
assert run.exists("tensorboard/text") | ||
|
||
shutil.rmtree(log_dir) |