From 5c330582da368b01eb2aecf1ddbfb2c1e3f805c9 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 28 Mar 2019 01:40:47 +0800 Subject: [PATCH 01/32] initial commit --- tensorboard/compat/BUILD | 1 + tensorboard/compat/proto/BUILD | 3 + tensorboard/compat/proto/layout.proto | 1 + .../compat/proto/plugin_pr_curve.proto | 1 + tensorboard/compat/proto/plugin_text.proto | 1 + tensorboard/writer/BUILD | 22 ++ tensorboard/writer/__init__.py | 37 +++ tensorboard/writer/crc32c.py | 138 +++++++++++ tensorboard/writer/event_file_writer.py | 231 ++++++++++++++++++ tensorboard/writer/record_writer.py | 158 ++++++++++++ 10 files changed, 593 insertions(+) create mode 120000 tensorboard/compat/proto/layout.proto create mode 120000 tensorboard/compat/proto/plugin_pr_curve.proto create mode 120000 tensorboard/compat/proto/plugin_text.proto create mode 100644 tensorboard/writer/BUILD create mode 100644 tensorboard/writer/__init__.py create mode 100644 tensorboard/writer/crc32c.py create mode 100644 tensorboard/writer/event_file_writer.py create mode 100644 tensorboard/writer/record_writer.py diff --git a/tensorboard/compat/BUILD b/tensorboard/compat/BUILD index 9a12c44bdb..04aa332df1 100644 --- a/tensorboard/compat/BUILD +++ b/tensorboard/compat/BUILD @@ -44,6 +44,7 @@ py_library( deps = [ ":compat", "//tensorboard/compat/tensorflow_stub", + "//tensorboard/writer", ], ) diff --git a/tensorboard/compat/proto/BUILD b/tensorboard/compat/proto/BUILD index 647829f982..f5504b24e9 100644 --- a/tensorboard/compat/proto/BUILD +++ b/tensorboard/compat/proto/BUILD @@ -25,9 +25,12 @@ tb_proto_library( "event.proto", "function.proto", "graph.proto", + "layout.proto", "meta_graph.proto", "node_def.proto", "op_def.proto", + "plugin_pr_curve.proto", + "plugin_text.proto", "resource_handle.proto", "rewriter_config.proto", "saver.proto", diff --git a/tensorboard/compat/proto/layout.proto b/tensorboard/compat/proto/layout.proto new file mode 120000 index 0000000000..14e484a998 --- /dev/null +++ b/tensorboard/compat/proto/layout.proto @@ -0,0 +1 @@ +../../plugins/custom_scalar/layout.proto \ No newline at end of file diff --git a/tensorboard/compat/proto/plugin_pr_curve.proto b/tensorboard/compat/proto/plugin_pr_curve.proto new file mode 120000 index 0000000000..320b7e557f --- /dev/null +++ b/tensorboard/compat/proto/plugin_pr_curve.proto @@ -0,0 +1 @@ +../../plugins/pr_curve/plugin_data.proto \ No newline at end of file diff --git a/tensorboard/compat/proto/plugin_text.proto b/tensorboard/compat/proto/plugin_text.proto new file mode 120000 index 0000000000..2ccbacb3d2 --- /dev/null +++ b/tensorboard/compat/proto/plugin_text.proto @@ -0,0 +1 @@ +../../plugins/text/plugin_data.proto \ No newline at end of file diff --git a/tensorboard/writer/BUILD b/tensorboard/writer/BUILD new file mode 100644 index 0000000000..f90d280a02 --- /dev/null +++ b/tensorboard/writer/BUILD @@ -0,0 +1,22 @@ +# Description: +# TensorBoard, a dashboard for investigating TensorFlow + +package(default_visibility = ["//tensorboard:internal"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +py_library( + name = "writer", + srcs = glob([ + "*.py", + ]), + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + "//tensorboard:expect_numpy_installed", + "//tensorboard/compat/proto:protos_all_py_pb2", + "@org_pythonhosted_six", + ], +) \ No newline at end of file diff --git a/tensorboard/writer/__init__.py b/tensorboard/writer/__init__.py new file mode 100644 index 0000000000..1014fa8d1b --- /dev/null +++ b/tensorboard/writer/__init__.py @@ -0,0 +1,37 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Compatibility interfaces for TensorBoard.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +USING_TF = True + +# Don't attempt to use TF at all if this import exists due to build rules. +try: + from tensorboard.compat import notf + USING_TF = False +except ImportError: + pass + +if USING_TF: + try: + import tensorflow as tf + except ImportError: + USING_TF = False + +if not USING_TF: + from tensorboard.compat import tensorflow_stub as tf diff --git a/tensorboard/writer/crc32c.py b/tensorboard/writer/crc32c.py new file mode 100644 index 0000000000..461334cd15 --- /dev/null +++ b/tensorboard/writer/crc32c.py @@ -0,0 +1,138 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import array + + +CRC_TABLE = ( + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, + 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, + 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, + 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, + 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, + 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, + 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, + 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, + 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, + 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, + 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, + 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, + 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, + 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, + 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, + 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351, +) + + +CRC_INIT = 0 + +_MASK = 0xFFFFFFFF + + +def crc_update(crc, data): + """Update CRC-32C checksum with data. + + Args: + crc: 32-bit checksum to update as long. + data: byte array, string or iterable over bytes. + + Returns: + 32-bit updated CRC-32C as long. + """ + + if type(data) != array.array or data.itemsize != 1: + buf = array.array("B", data) + else: + buf = data + + crc ^= _MASK + for b in buf: + table_index = (crc ^ b) & 0xff + crc = (CRC_TABLE[table_index] ^ (crc >> 8)) & _MASK + return crc ^ _MASK + + +def crc_finalize(crc): + """Finalize CRC-32C checksum. + + This function should be called as last step of crc calculation. + + Args: + crc: 32-bit checksum as long. + + Returns: + finalized 32-bit checksum as long + """ + return crc & _MASK + + +def crc32c(data): + """Compute CRC-32C checksum of the data. + + Args: + data: byte array, string or iterable over bytes. + + Returns: + 32-bit CRC-32C checksum of data as long. + """ + return crc_finalize(crc_update(CRC_INIT, data)) diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/writer/event_file_writer.py new file mode 100644 index 0000000000..59097a9abb --- /dev/null +++ b/tensorboard/writer/event_file_writer.py @@ -0,0 +1,231 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Writes events to disk in a logdir.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os.path +import socket +import threading +import time + +import six + +from ..proto import event_pb2 +from .record_writer import RecordWriter, directory_check + + +class EventsWriter(object): + '''Writes `Event` protocol buffers to an event file.''' + + def __init__(self, file_prefix, filename_suffix=''): + ''' + Events files have a name of the form + '/some/file/path/events.out.tfevents.[timestamp].[hostname]' + ''' + self._file_name = file_prefix + ".out.tfevents." + str(time.time())[:10] + "." +\ + socket.gethostname() + filename_suffix + + self._num_outstanding_events = 0 + + self._py_recordio_writer = RecordWriter(self._file_name) + + # Initialize an event instance. + self._event = event_pb2.Event() + + self._event.wall_time = time.time() + + self._lock = threading.Lock() + + self.write_event(self._event) + + def write_event(self, event): + '''Append "event" to the file.''' + + # Check if event is of type event_pb2.Event proto. + if not isinstance(event, event_pb2.Event): + raise TypeError("Expected an event_pb2.Event proto, " + " but got %s" % type(event)) + return self._write_serialized_event(event.SerializeToString()) + + def _write_serialized_event(self, event_str): + with self._lock: + self._num_outstanding_events += 1 + self._py_recordio_writer.write(event_str) + + def flush(self): + '''Flushes the event file to disk.''' + with self._lock: + self._num_outstanding_events = 0 + self._py_recordio_writer.flush() + return True + + def close(self): + '''Call self.flush().''' + return_value = self.flush() + with self._lock: + self._py_recordio_writer.close() + return return_value + + +class EventFileWriter(object): + """Writes `Event` protocol buffers to an event file. + The `EventFileWriter` class creates an event file in the specified directory, + and asynchronously writes Event protocol buffers to the file. The Event file + is encoded using the tfrecord format, which is similar to RecordIO. + @@__init__ + @@add_event + @@flush + @@close + """ + + def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=''): + """Creates a `EventFileWriter` and an event file to write to. + On construction the summary writer creates a new event file in `logdir`. + This event file will contain `Event` protocol buffers, which are written to + disk via the add_event method. + The other arguments to the constructor control the asynchronous writes to + the event file: + * `flush_secs`: How often, in seconds, to flush the added summaries + and events to disk. + * `max_queue`: Maximum number of summaries or events pending to be + written to disk before one of the 'add' calls block. + Args: + logdir: A string. Directory where event file will be written. + max_queue: Integer. Size of the queue for pending events and summaries. + flush_secs: Number. How often, in seconds, to flush the + pending events and summaries to disk. + """ + self._logdir = logdir + directory_check(self._logdir) + self._event_queue = six.moves.queue.Queue(max_queue) + self._ev_writer = EventsWriter(os.path.join( + self._logdir, "events"), filename_suffix) + self._flush_secs = flush_secs + self._closed = False + self._worker = _EventLoggerThread(self._event_queue, self._ev_writer, + flush_secs) + + self._worker.start() + + def get_logdir(self): + """Returns the directory where event file will be written.""" + return self._logdir + + def reopen(self): + """Reopens the EventFileWriter. + Can be called after `close()` to add more events in the same directory. + The events will go into a new events file and a new write/flush worker + is created. Does nothing if the EventFileWriter was not closed. + """ + if self._closed: + self._closed = False + self._worker = _EventLoggerThread( + self._event_queue, self._ev_writer, self._flush_secs + ) + self._worker.start() + + def add_event(self, event): + """Adds an event to the event file. + Args: + event: An `Event` protocol buffer. + """ + if not self._closed: + self._event_queue.put(event) + + def flush(self): + """Flushes the event file to disk. + Call this method to make sure that all pending events have been written to + disk. + """ + if not self._closed: + self._event_queue.join() + self._ev_writer.flush() + + def close(self): + """Performs a final flush of the event file to disk, stops the + write/flush worker and closes the file. Call this method when you do not + need the summary writer anymore. + """ + if not self._closed: + self.flush() + self._worker.stop() + self._ev_writer.close() + self._closed = True + + +class _EventLoggerThread(threading.Thread): + """Thread that logs events.""" + + def __init__(self, queue, ev_writer, flush_secs): + """Creates an _EventLoggerThread. + Args: + queue: A Queue from which to dequeue events. + ev_writer: An event writer. Used to log brain events for + the visualizer. + flush_secs: How often, in seconds, to flush the + pending file to disk. + """ + threading.Thread.__init__(self) + self.daemon = True + self._queue = queue + self._ev_writer = ev_writer + self._flush_secs = flush_secs + # The first event will be flushed immediately. + self._next_event_flush_time = 0 + self._has_pending_events = False + self._shutdown_signal = object() + + def stop(self): + self._queue.put(self._shutdown_signal) + self.join() + + def run(self): + # Here wait on the queue until an event appears, or till the next + # time to flush the writer, whichever is earlier. If we have an + # event, write it. If not, an empty queue exception will be raised + # and we can proceed to flush the writer. + while True: + now = time.time() + queue_wait_duration = self._next_event_flush_time - now + event = None + try: + if queue_wait_duration > 0: + event = self._queue.get(True, queue_wait_duration) + else: + event = self._queue.get(False) + + if event == self._shutdown_signal: + return + self._ev_writer.write_event(event) + self._has_pending_events = True + except six.moves.queue.Empty: + pass + finally: + if event: + self._queue.task_done() + + now = time.time() + if now > self._next_event_flush_time: + if self._has_pending_events: + # Small optimization - if there are no pending events, + # there's no need to flush, since each flush can be + # expensive (e.g. uploading a new file to a server). + self._ev_writer.flush() + self._has_pending_events = False + # Do it again in flush_secs. + self._next_event_flush_time = now + self._flush_secs diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py new file mode 100644 index 0000000000..cca51f8317 --- /dev/null +++ b/tensorboard/writer/record_writer.py @@ -0,0 +1,158 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +""" +To write tf_record into file. Here we use it for tensorboard's event writting. +The code was borrowed from https://github.com/TeamHG-Memex/tensorboard_logger +""" + +import copy +import io +import os.path +import re +import struct +try: + import boto3 + S3_ENABLED = True +except ImportError: + S3_ENABLED = False + +from .crc32c import crc32c + + +_VALID_OP_NAME_START = re.compile('^[A-Za-z0-9.]') +_VALID_OP_NAME_PART = re.compile('[A-Za-z0-9_.\\-/]+') + +# Registry of writer factories by prefix backends. +# +# Currently supports "s3://" URLs for S3 based on boto and falls +# back to local filesystem. +REGISTERED_FACTORIES = {} + + +def register_writer_factory(prefix, factory): + if ':' in prefix: + raise ValueError('prefix cannot contain a :') + REGISTERED_FACTORIES[prefix] = factory + + +def directory_check(path): + '''Initialize the directory for log files.''' + try: + prefix = path.split(':')[0] + factory = REGISTERED_FACTORIES[prefix] + return factory.directory_check(path) + except KeyError: + if not os.path.exists(path): + os.makedirs(path) + + +def open_file(path): + '''Open a writer for outputting event files.''' + try: + prefix = path.split(':')[0] + factory = REGISTERED_FACTORIES[prefix] + return factory.open(path) + except KeyError: + return open(path, 'wb') + + +class S3RecordWriter(object): + """Writes tensorboard protocol buffer files to S3.""" + + def __init__(self, path): + if not S3_ENABLED: + raise ImportError("boto3 must be installed for S3 support.") + self.path = path + self.buffer = io.BytesIO() + + def __del__(self): + self.close() + + def bucket_and_path(self): + path = self.path + if path.startswith("s3://"): + path = path[len("s3://"):] + bp = path.split("/") + bucket = bp[0] + path = path[1 + len(bucket):] + return bucket, path + + def write(self, val): + self.buffer.write(val) + + def flush(self): + s3 = boto3.client('s3') + bucket, path = self.bucket_and_path() + upload_buffer = copy.copy(self.buffer) + upload_buffer.seek(0) + s3.upload_fileobj(upload_buffer, bucket, path) + + def close(self): + self.flush() + + +class S3RecordWriterFactory(object): + """Factory for event protocol buffer files to S3.""" + + def open(self, path): + return S3RecordWriter(path) + + def directory_check(self, path): + # S3 doesn't need directories created before files are added + # so we can just skip this check + pass + + +register_writer_factory("s3", S3RecordWriterFactory()) + + +class RecordWriter(object): + def __init__(self, path): + self._name_to_tf_name = {} + self._tf_names = set() + self.path = path + self._writer = None + self._writer = open_file(path) + + def write(self, event_str): + w = self._writer.write + header = struct.pack('Q', len(event_str)) + w(header) + w(struct.pack('I', masked_crc32c(header))) + w(event_str) + w(struct.pack('I', masked_crc32c(event_str))) + + def flush(self): + self._writer.flush() + + def close(self): + self._writer.close() + + +def masked_crc32c(data): + x = u32(crc32c(data)) + return u32(((x >> 15) | u32(x << 17)) + 0xa282ead8) + + +def u32(x): + return x & 0xffffffff + + +def make_valid_tf_name(name): + if not _VALID_OP_NAME_START.match(name): + # Must make it valid somehow, but don't want to remove stuff + name = '.' + name + return '_'.join(_VALID_OP_NAME_PART.findall(name)) From fdbc2d1f5fae77c1e569914575416ca2f7bca1ec Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 28 Mar 2019 02:08:26 +0800 Subject: [PATCH 02/32] dummy unit test --- tensorboard/writer/BUILD | 20 ++ tensorboard/writer/event_file_writer_test.py | 200 +++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 tensorboard/writer/event_file_writer_test.py diff --git a/tensorboard/writer/BUILD b/tensorboard/writer/BUILD index f90d280a02..72b9e74f6c 100644 --- a/tensorboard/writer/BUILD +++ b/tensorboard/writer/BUILD @@ -19,4 +19,24 @@ py_library( "//tensorboard/compat/proto:protos_all_py_pb2", "@org_pythonhosted_six", ], +) + + +py_test( + name = "event_file_writer_test", + size = "small", + srcs = ["event_file_writer_test.py"], + main = "event_file_writer_test.py", + srcs_version = "PY2AND3", + deps = [ + ":event_file_writer", + # "//tensorboard:expect_tensorflow_installed", + # "//tensorboard/backend:application", + # "//tensorboard/backend/event_processing:event_accumulator", + # "//tensorboard/backend/event_processing:event_multiplexer", + # "//tensorboard/plugins:base_plugin", + # "//tensorboard/util:test_util", + # "@org_pocoo_werkzeug", + # "@org_pythonhosted_six", + ], # TODO: I am not sure what to put here ) \ No newline at end of file diff --git a/tensorboard/writer/event_file_writer_test.py b/tensorboard/writer/event_file_writer_test.py new file mode 100644 index 0000000000..7ff4ab0ccd --- /dev/null +++ b/tensorboard/writer/event_file_writer_test.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +if __name__ == '__main__': + return + +"""Integration tests for the Writer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os.path + +import six +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + +from tensorboard.backend.event_processing import plugin_event_accumulator as event_accumulator # pylint: disable=line-too-long +from tensorboard.backend.event_processing import plugin_event_multiplexer as event_multiplexer # pylint: disable=line-too-long +from tensorboard.plugins import base_plugin +from tensorboard.plugins.histogram import histograms_plugin +from tensorboard.plugins.histogram import summary +from tensorboard.util import test_util + +tf.compat.v1.disable_v2_behavior() + + +class HistogramsPluginTest(tf.test.TestCase): + + _STEPS = 99 + + _LEGACY_HISTOGRAM_TAG = 'my-ancient-histogram' + _HISTOGRAM_TAG = 'my-favorite-histogram' + _SCALAR_TAG = 'my-boring-scalars' + + _DISPLAY_NAME = 'Important production statistics' + _DESCRIPTION = 'quod *erat* scribendum' + _HTML_DESCRIPTION = '

quod erat scribendum

' + + _RUN_WITH_LEGACY_HISTOGRAM = '_RUN_WITH_LEGACY_HISTOGRAM' + _RUN_WITH_HISTOGRAM = '_RUN_WITH_HISTOGRAM' + _RUN_WITH_SCALARS = '_RUN_WITH_SCALARS' + + def __init__(self, *args, **kwargs): + super(HistogramsPluginTest, self).__init__(*args, **kwargs) + self.logdir = None + self.plugin = None + + def set_up_with_runs(self, run_names): + self.logdir = self.get_temp_dir() + for run_name in run_names: + self.generate_run(run_name) + multiplexer = event_multiplexer.EventMultiplexer(size_guidance={ + # don't truncate my test data, please + event_accumulator.TENSORS: self._STEPS, + }) + multiplexer.AddRunsFromDirectory(self.logdir) + multiplexer.Reload() + context = base_plugin.TBContext(logdir=self.logdir, multiplexer=multiplexer) + self.plugin = histograms_plugin.HistogramsPlugin(context) + + def generate_run(self, run_name): + tf.compat.v1.reset_default_graph() + sess = tf.compat.v1.Session() + placeholder = tf.compat.v1.placeholder(tf.float32, shape=[3]) + + if run_name == self._RUN_WITH_LEGACY_HISTOGRAM: + tf.compat.v1.summary.histogram(self._LEGACY_HISTOGRAM_TAG, placeholder) + elif run_name == self._RUN_WITH_HISTOGRAM: + summary.op(self._HISTOGRAM_TAG, placeholder, + display_name=self._DISPLAY_NAME, + description=self._DESCRIPTION) + elif run_name == self._RUN_WITH_SCALARS: + tf.compat.v1.summary.scalar(self._SCALAR_TAG, tf.reduce_mean(input_tensor=placeholder)) + else: + assert False, 'Invalid run name: %r' % run_name + summ = tf.compat.v1.summary.merge_all() + + subdir = os.path.join(self.logdir, run_name) + with test_util.FileWriterCache.get(subdir) as writer: + writer.add_graph(sess.graph) + for step in xrange(self._STEPS): + feed_dict = {placeholder: [1 + step, 2 + step, 3 + step]} + s = sess.run(summ, feed_dict=feed_dict) + writer.add_summary(s, global_step=step) + + def test_routes_provided(self): + """Tests that the plugin offers the correct routes.""" + self.set_up_with_runs([self._RUN_WITH_SCALARS]) + routes = self.plugin.get_plugin_apps() + self.assertIsInstance(routes['/histograms'], collections.Callable) + self.assertIsInstance(routes['/tags'], collections.Callable) + + def test_index(self): + self.set_up_with_runs([self._RUN_WITH_SCALARS, + self._RUN_WITH_LEGACY_HISTOGRAM, + self._RUN_WITH_HISTOGRAM]) + self.assertEqual({ + self._RUN_WITH_SCALARS: {}, + self._RUN_WITH_LEGACY_HISTOGRAM: { + self._LEGACY_HISTOGRAM_TAG: { + 'displayName': self._LEGACY_HISTOGRAM_TAG, + 'description': '', + }, + }, + self._RUN_WITH_HISTOGRAM: { + '%s/histogram_summary' % self._HISTOGRAM_TAG: { + 'displayName': self._DISPLAY_NAME, + 'description': self._HTML_DESCRIPTION, + }, + }, + }, self.plugin.index_impl()) + + def _test_histograms(self, run_name, tag_name, should_work=True): + self.set_up_with_runs([self._RUN_WITH_SCALARS, + self._RUN_WITH_LEGACY_HISTOGRAM, + self._RUN_WITH_HISTOGRAM]) + if should_work: + self._check_histograms_result(tag_name, run_name, downsample=False) + self._check_histograms_result(tag_name, run_name, downsample=True) + else: + with six.assertRaisesRegex(self, ValueError, 'No histogram tag'): + self.plugin.histograms_impl(self._HISTOGRAM_TAG, run_name) + + def _check_histograms_result(self, tag_name, run_name, downsample): + if downsample: + downsample_to = 50 + expected_length = 50 + else: + downsample_to = None + expected_length = self._STEPS + + (data, mime_type) = self.plugin.histograms_impl(tag_name, run_name, + downsample_to=downsample_to) + self.assertEqual('application/json', mime_type) + self.assertEqual(expected_length, len(data), + 'expected %r, got %r (downsample=%r)' + % (expected_length, len(data), downsample)) + last_step_seen = None + for (i, datum) in enumerate(data): + [_unused_wall_time, step, buckets] = datum + if last_step_seen is not None: + self.assertGreater(step, last_step_seen) + last_step_seen = step + if not downsample: + self.assertEqual(i, step) + self.assertEqual(1 + step, buckets[0][0]) # first left-edge + self.assertEqual(3 + step, buckets[-1][1]) # last right-edge + self.assertAlmostEqual( + 3, # three items across all buckets + sum(bucket[2] for bucket in buckets)) + + def test_histograms_with_scalars(self): + self._test_histograms(self._RUN_WITH_SCALARS, self._HISTOGRAM_TAG, + should_work=False) + + def test_histograms_with_legacy_histogram(self): + self._test_histograms(self._RUN_WITH_LEGACY_HISTOGRAM, + self._LEGACY_HISTOGRAM_TAG) + + def test_histograms_with_histogram(self): + self._test_histograms(self._RUN_WITH_HISTOGRAM, + '%s/histogram_summary' % self._HISTOGRAM_TAG) + + def test_active_with_legacy_histogram(self): + self.set_up_with_runs([self._RUN_WITH_LEGACY_HISTOGRAM]) + self.assertTrue(self.plugin.is_active()) + + def test_active_with_histogram(self): + self.set_up_with_runs([self._RUN_WITH_HISTOGRAM]) + self.assertTrue(self.plugin.is_active()) + + def test_active_with_scalars(self): + self.set_up_with_runs([self._RUN_WITH_SCALARS]) + self.assertFalse(self.plugin.is_active()) + + def test_active_with_all(self): + self.set_up_with_runs([self._RUN_WITH_SCALARS, + self._RUN_WITH_LEGACY_HISTOGRAM, + self._RUN_WITH_HISTOGRAM]) + self.assertTrue(self.plugin.is_active()) + + +if __name__ == '__main__': + return + tf.test.main() From 7a4b52866bf74f682ee6e6c34ecbfb0fe49c95a2 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 28 Mar 2019 09:03:37 +0800 Subject: [PATCH 03/32] addresses code review comment (the trivial ones) --- tensorboard/writer/BUILD | 13 +++++++----- tensorboard/writer/__init__.py | 37 ---------------------------------- 2 files changed, 8 insertions(+), 42 deletions(-) diff --git a/tensorboard/writer/BUILD b/tensorboard/writer/BUILD index 72b9e74f6c..65c66bb254 100644 --- a/tensorboard/writer/BUILD +++ b/tensorboard/writer/BUILD @@ -9,9 +9,12 @@ exports_files(["LICENSE"]) py_library( name = "writer", - srcs = glob([ - "*.py", - ]), + srcs = [ + "__init__.py", + "crc32c.py", + "event_file_writer.py", + "record_writer.py", + ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ @@ -29,7 +32,7 @@ py_test( main = "event_file_writer_test.py", srcs_version = "PY2AND3", deps = [ - ":event_file_writer", + ":writer", # "//tensorboard:expect_tensorflow_installed", # "//tensorboard/backend:application", # "//tensorboard/backend/event_processing:event_accumulator", @@ -39,4 +42,4 @@ py_test( # "@org_pocoo_werkzeug", # "@org_pythonhosted_six", ], # TODO: I am not sure what to put here -) \ No newline at end of file +) diff --git a/tensorboard/writer/__init__.py b/tensorboard/writer/__init__.py index 1014fa8d1b..e69de29bb2 100644 --- a/tensorboard/writer/__init__.py +++ b/tensorboard/writer/__init__.py @@ -1,37 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Compatibility interfaces for TensorBoard.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -USING_TF = True - -# Don't attempt to use TF at all if this import exists due to build rules. -try: - from tensorboard.compat import notf - USING_TF = False -except ImportError: - pass - -if USING_TF: - try: - import tensorflow as tf - except ImportError: - USING_TF = False - -if not USING_TF: - from tensorboard.compat import tensorflow_stub as tf From fe809157b991ce301f26f368ed2604ce87af8999 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 28 Mar 2019 13:03:05 +0800 Subject: [PATCH 04/32] need license even if no code. --- tensorboard/writer/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorboard/writer/__init__.py b/tensorboard/writer/__init__.py index e69de29bb2..88675df059 100644 --- a/tensorboard/writer/__init__.py +++ b/tensorboard/writer/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. From 10fbb485bb2b0e9605da75a1fef664083a3b3f84 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 28 Mar 2019 19:00:48 +0800 Subject: [PATCH 05/32] fix dummy test --- tensorboard/writer/event_file_writer_test.py | 354 ++++++++++--------- 1 file changed, 179 insertions(+), 175 deletions(-) diff --git a/tensorboard/writer/event_file_writer_test.py b/tensorboard/writer/event_file_writer_test.py index 7ff4ab0ccd..f10e978226 100644 --- a/tensorboard/writer/event_file_writer_test.py +++ b/tensorboard/writer/event_file_writer_test.py @@ -13,188 +13,192 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -if __name__ == '__main__': - return +# if __name__ == '__main__': +# return -"""Integration tests for the Writer.""" +# """Integration tests for the Writer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +# from __future__ import absolute_import +# from __future__ import division +# from __future__ import print_function -import collections -import os.path +# import collections +# import os.path -import six -from six.moves import xrange # pylint: disable=redefined-builtin -import tensorflow as tf +# import six +# from six.moves import xrange # pylint: disable=redefined-builtin +# import tensorflow as tf -from tensorboard.backend.event_processing import plugin_event_accumulator as event_accumulator # pylint: disable=line-too-long -from tensorboard.backend.event_processing import plugin_event_multiplexer as event_multiplexer # pylint: disable=line-too-long -from tensorboard.plugins import base_plugin -from tensorboard.plugins.histogram import histograms_plugin -from tensorboard.plugins.histogram import summary -from tensorboard.util import test_util +# from tensorboard.backend.event_processing import plugin_event_accumulator as event_accumulator # pylint: disable=line-too-long +# from tensorboard.backend.event_processing import plugin_event_multiplexer as event_multiplexer # pylint: disable=line-too-long +# from tensorboard.plugins import base_plugin +# from tensorboard.plugins.histogram import histograms_plugin +# from tensorboard.plugins.histogram import summary +# from tensorboard.util import test_util -tf.compat.v1.disable_v2_behavior() +# tf.compat.v1.disable_v2_behavior() +import tensorflow as tf class HistogramsPluginTest(tf.test.TestCase): - - _STEPS = 99 - - _LEGACY_HISTOGRAM_TAG = 'my-ancient-histogram' - _HISTOGRAM_TAG = 'my-favorite-histogram' - _SCALAR_TAG = 'my-boring-scalars' - - _DISPLAY_NAME = 'Important production statistics' - _DESCRIPTION = 'quod *erat* scribendum' - _HTML_DESCRIPTION = '

quod erat scribendum

' - - _RUN_WITH_LEGACY_HISTOGRAM = '_RUN_WITH_LEGACY_HISTOGRAM' - _RUN_WITH_HISTOGRAM = '_RUN_WITH_HISTOGRAM' - _RUN_WITH_SCALARS = '_RUN_WITH_SCALARS' - - def __init__(self, *args, **kwargs): - super(HistogramsPluginTest, self).__init__(*args, **kwargs) - self.logdir = None - self.plugin = None - def set_up_with_runs(self, run_names): - self.logdir = self.get_temp_dir() - for run_name in run_names: - self.generate_run(run_name) - multiplexer = event_multiplexer.EventMultiplexer(size_guidance={ - # don't truncate my test data, please - event_accumulator.TENSORS: self._STEPS, - }) - multiplexer.AddRunsFromDirectory(self.logdir) - multiplexer.Reload() - context = base_plugin.TBContext(logdir=self.logdir, multiplexer=multiplexer) - self.plugin = histograms_plugin.HistogramsPlugin(context) - - def generate_run(self, run_name): - tf.compat.v1.reset_default_graph() - sess = tf.compat.v1.Session() - placeholder = tf.compat.v1.placeholder(tf.float32, shape=[3]) - - if run_name == self._RUN_WITH_LEGACY_HISTOGRAM: - tf.compat.v1.summary.histogram(self._LEGACY_HISTOGRAM_TAG, placeholder) - elif run_name == self._RUN_WITH_HISTOGRAM: - summary.op(self._HISTOGRAM_TAG, placeholder, - display_name=self._DISPLAY_NAME, - description=self._DESCRIPTION) - elif run_name == self._RUN_WITH_SCALARS: - tf.compat.v1.summary.scalar(self._SCALAR_TAG, tf.reduce_mean(input_tensor=placeholder)) - else: - assert False, 'Invalid run name: %r' % run_name - summ = tf.compat.v1.summary.merge_all() - - subdir = os.path.join(self.logdir, run_name) - with test_util.FileWriterCache.get(subdir) as writer: - writer.add_graph(sess.graph) - for step in xrange(self._STEPS): - feed_dict = {placeholder: [1 + step, 2 + step, 3 + step]} - s = sess.run(summ, feed_dict=feed_dict) - writer.add_summary(s, global_step=step) - - def test_routes_provided(self): - """Tests that the plugin offers the correct routes.""" - self.set_up_with_runs([self._RUN_WITH_SCALARS]) - routes = self.plugin.get_plugin_apps() - self.assertIsInstance(routes['/histograms'], collections.Callable) - self.assertIsInstance(routes['/tags'], collections.Callable) - - def test_index(self): - self.set_up_with_runs([self._RUN_WITH_SCALARS, - self._RUN_WITH_LEGACY_HISTOGRAM, - self._RUN_WITH_HISTOGRAM]) - self.assertEqual({ - self._RUN_WITH_SCALARS: {}, - self._RUN_WITH_LEGACY_HISTOGRAM: { - self._LEGACY_HISTOGRAM_TAG: { - 'displayName': self._LEGACY_HISTOGRAM_TAG, - 'description': '', - }, - }, - self._RUN_WITH_HISTOGRAM: { - '%s/histogram_summary' % self._HISTOGRAM_TAG: { - 'displayName': self._DISPLAY_NAME, - 'description': self._HTML_DESCRIPTION, - }, - }, - }, self.plugin.index_impl()) - - def _test_histograms(self, run_name, tag_name, should_work=True): - self.set_up_with_runs([self._RUN_WITH_SCALARS, - self._RUN_WITH_LEGACY_HISTOGRAM, - self._RUN_WITH_HISTOGRAM]) - if should_work: - self._check_histograms_result(tag_name, run_name, downsample=False) - self._check_histograms_result(tag_name, run_name, downsample=True) - else: - with six.assertRaisesRegex(self, ValueError, 'No histogram tag'): - self.plugin.histograms_impl(self._HISTOGRAM_TAG, run_name) - - def _check_histograms_result(self, tag_name, run_name, downsample): - if downsample: - downsample_to = 50 - expected_length = 50 - else: - downsample_to = None - expected_length = self._STEPS - - (data, mime_type) = self.plugin.histograms_impl(tag_name, run_name, - downsample_to=downsample_to) - self.assertEqual('application/json', mime_type) - self.assertEqual(expected_length, len(data), - 'expected %r, got %r (downsample=%r)' - % (expected_length, len(data), downsample)) - last_step_seen = None - for (i, datum) in enumerate(data): - [_unused_wall_time, step, buckets] = datum - if last_step_seen is not None: - self.assertGreater(step, last_step_seen) - last_step_seen = step - if not downsample: - self.assertEqual(i, step) - self.assertEqual(1 + step, buckets[0][0]) # first left-edge - self.assertEqual(3 + step, buckets[-1][1]) # last right-edge - self.assertAlmostEqual( - 3, # three items across all buckets - sum(bucket[2] for bucket in buckets)) - - def test_histograms_with_scalars(self): - self._test_histograms(self._RUN_WITH_SCALARS, self._HISTOGRAM_TAG, - should_work=False) - - def test_histograms_with_legacy_histogram(self): - self._test_histograms(self._RUN_WITH_LEGACY_HISTOGRAM, - self._LEGACY_HISTOGRAM_TAG) - - def test_histograms_with_histogram(self): - self._test_histograms(self._RUN_WITH_HISTOGRAM, - '%s/histogram_summary' % self._HISTOGRAM_TAG) - - def test_active_with_legacy_histogram(self): - self.set_up_with_runs([self._RUN_WITH_LEGACY_HISTOGRAM]) - self.assertTrue(self.plugin.is_active()) - - def test_active_with_histogram(self): - self.set_up_with_runs([self._RUN_WITH_HISTOGRAM]) - self.assertTrue(self.plugin.is_active()) - - def test_active_with_scalars(self): - self.set_up_with_runs([self._RUN_WITH_SCALARS]) - self.assertFalse(self.plugin.is_active()) - - def test_active_with_all(self): - self.set_up_with_runs([self._RUN_WITH_SCALARS, - self._RUN_WITH_LEGACY_HISTOGRAM, - self._RUN_WITH_HISTOGRAM]) - self.assertTrue(self.plugin.is_active()) - - -if __name__ == '__main__': - return - tf.test.main() + return + +# class HistogramsPluginTest(tf.test.TestCase): + +# _STEPS = 99 + +# _LEGACY_HISTOGRAM_TAG = 'my-ancient-histogram' +# _HISTOGRAM_TAG = 'my-favorite-histogram' +# _SCALAR_TAG = 'my-boring-scalars' + +# _DISPLAY_NAME = 'Important production statistics' +# _DESCRIPTION = 'quod *erat* scribendum' +# _HTML_DESCRIPTION = '

quod erat scribendum

' + +# _RUN_WITH_LEGACY_HISTOGRAM = '_RUN_WITH_LEGACY_HISTOGRAM' +# _RUN_WITH_HISTOGRAM = '_RUN_WITH_HISTOGRAM' +# _RUN_WITH_SCALARS = '_RUN_WITH_SCALARS' + +# def __init__(self, *args, **kwargs): +# super(HistogramsPluginTest, self).__init__(*args, **kwargs) +# self.logdir = None +# self.plugin = None + +# def set_up_with_runs(self, run_names): +# self.logdir = self.get_temp_dir() +# for run_name in run_names: +# self.generate_run(run_name) +# multiplexer = event_multiplexer.EventMultiplexer(size_guidance={ +# # don't truncate my test data, please +# event_accumulator.TENSORS: self._STEPS, +# }) +# multiplexer.AddRunsFromDirectory(self.logdir) +# multiplexer.Reload() +# context = base_plugin.TBContext(logdir=self.logdir, multiplexer=multiplexer) +# self.plugin = histograms_plugin.HistogramsPlugin(context) + +# def generate_run(self, run_name): +# tf.compat.v1.reset_default_graph() +# sess = tf.compat.v1.Session() +# placeholder = tf.compat.v1.placeholder(tf.float32, shape=[3]) + +# if run_name == self._RUN_WITH_LEGACY_HISTOGRAM: +# tf.compat.v1.summary.histogram(self._LEGACY_HISTOGRAM_TAG, placeholder) +# elif run_name == self._RUN_WITH_HISTOGRAM: +# summary.op(self._HISTOGRAM_TAG, placeholder, +# display_name=self._DISPLAY_NAME, +# description=self._DESCRIPTION) +# elif run_name == self._RUN_WITH_SCALARS: +# tf.compat.v1.summary.scalar(self._SCALAR_TAG, tf.reduce_mean(input_tensor=placeholder)) +# else: +# assert False, 'Invalid run name: %r' % run_name +# summ = tf.compat.v1.summary.merge_all() + +# subdir = os.path.join(self.logdir, run_name) +# with test_util.FileWriterCache.get(subdir) as writer: +# writer.add_graph(sess.graph) +# for step in xrange(self._STEPS): +# feed_dict = {placeholder: [1 + step, 2 + step, 3 + step]} +# s = sess.run(summ, feed_dict=feed_dict) +# writer.add_summary(s, global_step=step) + +# def test_routes_provided(self): +# """Tests that the plugin offers the correct routes.""" +# self.set_up_with_runs([self._RUN_WITH_SCALARS]) +# routes = self.plugin.get_plugin_apps() +# self.assertIsInstance(routes['/histograms'], collections.Callable) +# self.assertIsInstance(routes['/tags'], collections.Callable) + +# def test_index(self): +# self.set_up_with_runs([self._RUN_WITH_SCALARS, +# self._RUN_WITH_LEGACY_HISTOGRAM, +# self._RUN_WITH_HISTOGRAM]) +# self.assertEqual({ +# self._RUN_WITH_SCALARS: {}, +# self._RUN_WITH_LEGACY_HISTOGRAM: { +# self._LEGACY_HISTOGRAM_TAG: { +# 'displayName': self._LEGACY_HISTOGRAM_TAG, +# 'description': '', +# }, +# }, +# self._RUN_WITH_HISTOGRAM: { +# '%s/histogram_summary' % self._HISTOGRAM_TAG: { +# 'displayName': self._DISPLAY_NAME, +# 'description': self._HTML_DESCRIPTION, +# }, +# }, +# }, self.plugin.index_impl()) + +# def _test_histograms(self, run_name, tag_name, should_work=True): +# self.set_up_with_runs([self._RUN_WITH_SCALARS, +# self._RUN_WITH_LEGACY_HISTOGRAM, +# self._RUN_WITH_HISTOGRAM]) +# if should_work: +# self._check_histograms_result(tag_name, run_name, downsample=False) +# self._check_histograms_result(tag_name, run_name, downsample=True) +# else: +# with six.assertRaisesRegex(self, ValueError, 'No histogram tag'): +# self.plugin.histograms_impl(self._HISTOGRAM_TAG, run_name) + +# def _check_histograms_result(self, tag_name, run_name, downsample): +# if downsample: +# downsample_to = 50 +# expected_length = 50 +# else: +# downsample_to = None +# expected_length = self._STEPS + +# (data, mime_type) = self.plugin.histograms_impl(tag_name, run_name, +# downsample_to=downsample_to) +# self.assertEqual('application/json', mime_type) +# self.assertEqual(expected_length, len(data), +# 'expected %r, got %r (downsample=%r)' +# % (expected_length, len(data), downsample)) +# last_step_seen = None +# for (i, datum) in enumerate(data): +# [_unused_wall_time, step, buckets] = datum +# if last_step_seen is not None: +# self.assertGreater(step, last_step_seen) +# last_step_seen = step +# if not downsample: +# self.assertEqual(i, step) +# self.assertEqual(1 + step, buckets[0][0]) # first left-edge +# self.assertEqual(3 + step, buckets[-1][1]) # last right-edge +# self.assertAlmostEqual( +# 3, # three items across all buckets +# sum(bucket[2] for bucket in buckets)) + +# def test_histograms_with_scalars(self): +# self._test_histograms(self._RUN_WITH_SCALARS, self._HISTOGRAM_TAG, +# should_work=False) + +# def test_histograms_with_legacy_histogram(self): +# self._test_histograms(self._RUN_WITH_LEGACY_HISTOGRAM, +# self._LEGACY_HISTOGRAM_TAG) + +# def test_histograms_with_histogram(self): +# self._test_histograms(self._RUN_WITH_HISTOGRAM, +# '%s/histogram_summary' % self._HISTOGRAM_TAG) + +# def test_active_with_legacy_histogram(self): +# self.set_up_with_runs([self._RUN_WITH_LEGACY_HISTOGRAM]) +# self.assertTrue(self.plugin.is_active()) + +# def test_active_with_histogram(self): +# self.set_up_with_runs([self._RUN_WITH_HISTOGRAM]) +# self.assertTrue(self.plugin.is_active()) + +# def test_active_with_scalars(self): +# self.set_up_with_runs([self._RUN_WITH_SCALARS]) +# self.assertFalse(self.plugin.is_active()) + +# def test_active_with_all(self): +# self.set_up_with_runs([self._RUN_WITH_SCALARS, +# self._RUN_WITH_LEGACY_HISTOGRAM, +# self._RUN_WITH_HISTOGRAM]) +# self.assertTrue(self.plugin.is_active()) + + +# if __name__ == '__main__': +# tf.test.main() From 72b7f1299a84679ae125da9ca1adf17cfda686d0 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 28 Mar 2019 21:48:35 +0800 Subject: [PATCH 06/32] remove S3, relative import --- tensorboard/writer/event_file_writer.py | 4 +- tensorboard/writer/record_writer.py | 67 ------------------------- 2 files changed, 2 insertions(+), 69 deletions(-) diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/writer/event_file_writer.py index 59097a9abb..615a61d99d 100644 --- a/tensorboard/writer/event_file_writer.py +++ b/tensorboard/writer/event_file_writer.py @@ -25,8 +25,8 @@ import six -from ..proto import event_pb2 -from .record_writer import RecordWriter, directory_check +from tensorboard.compat.proto import event_pb2 +from tensorboard.writer.record_writer import RecordWriter, directory_check class EventsWriter(object): diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index cca51f8317..f2a75f2958 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -23,11 +23,6 @@ import os.path import re import struct -try: - import boto3 - S3_ENABLED = True -except ImportError: - S3_ENABLED = False from .crc32c import crc32c @@ -35,18 +30,6 @@ _VALID_OP_NAME_START = re.compile('^[A-Za-z0-9.]') _VALID_OP_NAME_PART = re.compile('[A-Za-z0-9_.\\-/]+') -# Registry of writer factories by prefix backends. -# -# Currently supports "s3://" URLs for S3 based on boto and falls -# back to local filesystem. -REGISTERED_FACTORIES = {} - - -def register_writer_factory(prefix, factory): - if ':' in prefix: - raise ValueError('prefix cannot contain a :') - REGISTERED_FACTORIES[prefix] = factory - def directory_check(path): '''Initialize the directory for log files.''' @@ -69,56 +52,6 @@ def open_file(path): return open(path, 'wb') -class S3RecordWriter(object): - """Writes tensorboard protocol buffer files to S3.""" - - def __init__(self, path): - if not S3_ENABLED: - raise ImportError("boto3 must be installed for S3 support.") - self.path = path - self.buffer = io.BytesIO() - - def __del__(self): - self.close() - - def bucket_and_path(self): - path = self.path - if path.startswith("s3://"): - path = path[len("s3://"):] - bp = path.split("/") - bucket = bp[0] - path = path[1 + len(bucket):] - return bucket, path - - def write(self, val): - self.buffer.write(val) - - def flush(self): - s3 = boto3.client('s3') - bucket, path = self.bucket_and_path() - upload_buffer = copy.copy(self.buffer) - upload_buffer.seek(0) - s3.upload_fileobj(upload_buffer, bucket, path) - - def close(self): - self.flush() - - -class S3RecordWriterFactory(object): - """Factory for event protocol buffer files to S3.""" - - def open(self, path): - return S3RecordWriter(path) - - def directory_check(self, path): - # S3 doesn't need directories created before files are added - # so we can just skip this check - pass - - -register_writer_factory("s3", S3RecordWriterFactory()) - - class RecordWriter(object): def __init__(self, path): self._name_to_tf_name = {} From f60d55db20f9be7e35eb4ae2818689e4edfbdef8 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 28 Mar 2019 22:46:17 +0800 Subject: [PATCH 07/32] remove remaining S3 code --- tensorboard/writer/record_writer.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index f2a75f2958..b8fdfbb103 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -31,34 +31,13 @@ _VALID_OP_NAME_PART = re.compile('[A-Za-z0-9_.\\-/]+') -def directory_check(path): - '''Initialize the directory for log files.''' - try: - prefix = path.split(':')[0] - factory = REGISTERED_FACTORIES[prefix] - return factory.directory_check(path) - except KeyError: - if not os.path.exists(path): - os.makedirs(path) - - -def open_file(path): - '''Open a writer for outputting event files.''' - try: - prefix = path.split(':')[0] - factory = REGISTERED_FACTORIES[prefix] - return factory.open(path) - except KeyError: - return open(path, 'wb') - - class RecordWriter(object): def __init__(self, path): self._name_to_tf_name = {} self._tf_names = set() self.path = path self._writer = None - self._writer = open_file(path) + self._writer = open(path) def write(self, event_str): w = self._writer.write From c29823e74d8eb69956fda1c0981bc35f10f9e03b Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Fri, 29 Mar 2019 02:47:14 +0800 Subject: [PATCH 08/32] use correct dependency --- tensorboard/BUILD | 1 + tensorboard/compat/BUILD | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorboard/BUILD b/tensorboard/BUILD index 7d45e61e88..4ec236ccf2 100644 --- a/tensorboard/BUILD +++ b/tensorboard/BUILD @@ -43,6 +43,7 @@ py_library( ":notebook", ":program", "//tensorboard/summary", + "//tensorboard/writer", ], ) diff --git a/tensorboard/compat/BUILD b/tensorboard/compat/BUILD index 04aa332df1..9a12c44bdb 100644 --- a/tensorboard/compat/BUILD +++ b/tensorboard/compat/BUILD @@ -44,7 +44,6 @@ py_library( deps = [ ":compat", "//tensorboard/compat/tensorflow_stub", - "//tensorboard/writer", ], ) From 58c32e7a3b129d440860e687c89eb0c82b5e2be3 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Fri, 29 Mar 2019 08:26:17 +0800 Subject: [PATCH 09/32] remove irrelavant proto for writer --- tensorboard/compat/proto/layout.proto | 1 - tensorboard/compat/proto/plugin_pr_curve.proto | 1 - tensorboard/compat/proto/plugin_text.proto | 1 - 3 files changed, 3 deletions(-) delete mode 120000 tensorboard/compat/proto/layout.proto delete mode 120000 tensorboard/compat/proto/plugin_pr_curve.proto delete mode 120000 tensorboard/compat/proto/plugin_text.proto diff --git a/tensorboard/compat/proto/layout.proto b/tensorboard/compat/proto/layout.proto deleted file mode 120000 index 14e484a998..0000000000 --- a/tensorboard/compat/proto/layout.proto +++ /dev/null @@ -1 +0,0 @@ -../../plugins/custom_scalar/layout.proto \ No newline at end of file diff --git a/tensorboard/compat/proto/plugin_pr_curve.proto b/tensorboard/compat/proto/plugin_pr_curve.proto deleted file mode 120000 index 320b7e557f..0000000000 --- a/tensorboard/compat/proto/plugin_pr_curve.proto +++ /dev/null @@ -1 +0,0 @@ -../../plugins/pr_curve/plugin_data.proto \ No newline at end of file diff --git a/tensorboard/compat/proto/plugin_text.proto b/tensorboard/compat/proto/plugin_text.proto deleted file mode 120000 index 2ccbacb3d2..0000000000 --- a/tensorboard/compat/proto/plugin_text.proto +++ /dev/null @@ -1 +0,0 @@ -../../plugins/text/plugin_data.proto \ No newline at end of file From 0cab12e6ecd7ae5a0a66cd23713b8825f0c04ccb Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Fri, 29 Mar 2019 10:46:11 +0800 Subject: [PATCH 10/32] fix docstring --- tensorboard/writer/event_file_writer.py | 33 +++++++++++-------------- tensorboard/writer/record_writer.py | 3 +++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/writer/event_file_writer.py index 615a61d99d..cbf99b4921 100644 --- a/tensorboard/writer/event_file_writer.py +++ b/tensorboard/writer/event_file_writer.py @@ -26,7 +26,7 @@ import six from tensorboard.compat.proto import event_pb2 -from tensorboard.writer.record_writer import RecordWriter, directory_check +from tensorboard.writer.record_writer import RecordWriter class EventsWriter(object): @@ -35,26 +35,26 @@ class EventsWriter(object): def __init__(self, file_prefix, filename_suffix=''): ''' Events files have a name of the form - '/some/file/path/events.out.tfevents.[timestamp].[hostname]' + '/some/file/path/[file_prefix].out.tfevents.[timestamp].[hostname]' + + Args: + file_prefix: The string that will be prepended to + the filename of the event file. + filename_suffix: The string that will be appended to + the filename of the event file. ''' self._file_name = file_prefix + ".out.tfevents." + str(time.time())[:10] + "." +\ socket.gethostname() + filename_suffix - self._num_outstanding_events = 0 - self._py_recordio_writer = RecordWriter(self._file_name) - # Initialize an event instance. self._event = event_pb2.Event() - self._event.wall_time = time.time() - self._lock = threading.Lock() - self.write_event(self._event) def write_event(self, event): - '''Append "event" to the file.''' + '''Append "protobuf event" to the file.''' # Check if event is of type event_pb2.Event proto. if not isinstance(event, event_pb2.Event): @@ -84,26 +84,21 @@ def close(self): class EventFileWriter(object): """Writes `Event` protocol buffers to an event file. + The `EventFileWriter` class creates an event file in the specified directory, and asynchronously writes Event protocol buffers to the file. The Event file is encoded using the tfrecord format, which is similar to RecordIO. - @@__init__ - @@add_event - @@flush - @@close """ def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=''): """Creates a `EventFileWriter` and an event file to write to. + On construction the summary writer creates a new event file in `logdir`. This event file will contain `Event` protocol buffers, which are written to disk via the add_event method. The other arguments to the constructor control the asynchronous writes to the event file: - * `flush_secs`: How often, in seconds, to flush the added summaries - and events to disk. - * `max_queue`: Maximum number of summaries or events pending to be - written to disk before one of the 'add' calls block. + Args: logdir: A string. Directory where event file will be written. max_queue: Integer. Size of the queue for pending events and summaries. @@ -111,7 +106,6 @@ def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=''): pending events and summaries to disk. """ self._logdir = logdir - directory_check(self._logdir) self._event_queue = six.moves.queue.Queue(max_queue) self._ev_writer = EventsWriter(os.path.join( self._logdir, "events"), filename_suffix) @@ -128,6 +122,7 @@ def get_logdir(self): def reopen(self): """Reopens the EventFileWriter. + Can be called after `close()` to add more events in the same directory. The events will go into a new events file and a new write/flush worker is created. Does nothing if the EventFileWriter was not closed. @@ -141,6 +136,7 @@ def reopen(self): def add_event(self, event): """Adds an event to the event file. + Args: event: An `Event` protocol buffer. """ @@ -149,6 +145,7 @@ def add_event(self, event): def flush(self): """Flushes the event file to disk. + Call this method to make sure that all pending events have been written to disk. """ diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index b8fdfbb103..c4e0501cbf 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -32,6 +32,9 @@ class RecordWriter(object): + """write encoded protobuf along with its checksum. + + """ def __init__(self, path): self._name_to_tf_name = {} self._tf_names = set() From a6b3ffb248d4e7800320929b8b59f14288062c2f Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Fri, 29 Mar 2019 11:02:57 +0800 Subject: [PATCH 11/32] remove deps --- tensorboard/compat/proto/BUILD | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorboard/compat/proto/BUILD b/tensorboard/compat/proto/BUILD index f5504b24e9..647829f982 100644 --- a/tensorboard/compat/proto/BUILD +++ b/tensorboard/compat/proto/BUILD @@ -25,12 +25,9 @@ tb_proto_library( "event.proto", "function.proto", "graph.proto", - "layout.proto", "meta_graph.proto", "node_def.proto", "op_def.proto", - "plugin_pr_curve.proto", - "plugin_text.proto", "resource_handle.proto", "rewriter_config.proto", "saver.proto", From 944752f64d407b244a4490d64b6e676d8b74b6eb Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Mon, 1 Apr 2019 01:38:32 +0800 Subject: [PATCH 12/32] add unit test --- tensorboard/writer/event_file_writer_test.py | 215 +++---------------- tensorboard/writer/record_writer.py | 2 +- 2 files changed, 34 insertions(+), 183 deletions(-) diff --git a/tensorboard/writer/event_file_writer_test.py b/tensorboard/writer/event_file_writer_test.py index f10e978226..6c4884a813 100644 --- a/tensorboard/writer/event_file_writer_test.py +++ b/tensorboard/writer/event_file_writer_test.py @@ -13,192 +13,43 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# if __name__ == '__main__': -# return # """Integration tests for the Writer.""" -# from __future__ import absolute_import -# from __future__ import division -# from __future__ import print_function - -# import collections -# import os.path - -# import six -# from six.moves import xrange # pylint: disable=redefined-builtin -# import tensorflow as tf - -# from tensorboard.backend.event_processing import plugin_event_accumulator as event_accumulator # pylint: disable=line-too-long -# from tensorboard.backend.event_processing import plugin_event_multiplexer as event_multiplexer # pylint: disable=line-too-long -# from tensorboard.plugins import base_plugin -# from tensorboard.plugins.histogram import histograms_plugin -# from tensorboard.plugins.histogram import summary -# from tensorboard.util import test_util +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function # tf.compat.v1.disable_v2_behavior() +import glob +import os import tensorflow as tf - -class HistogramsPluginTest(tf.test.TestCase): - def set_up_with_runs(self, run_names): - return - -# class HistogramsPluginTest(tf.test.TestCase): - -# _STEPS = 99 - -# _LEGACY_HISTOGRAM_TAG = 'my-ancient-histogram' -# _HISTOGRAM_TAG = 'my-favorite-histogram' -# _SCALAR_TAG = 'my-boring-scalars' - -# _DISPLAY_NAME = 'Important production statistics' -# _DESCRIPTION = 'quod *erat* scribendum' -# _HTML_DESCRIPTION = '

quod erat scribendum

' - -# _RUN_WITH_LEGACY_HISTOGRAM = '_RUN_WITH_LEGACY_HISTOGRAM' -# _RUN_WITH_HISTOGRAM = '_RUN_WITH_HISTOGRAM' -# _RUN_WITH_SCALARS = '_RUN_WITH_SCALARS' - -# def __init__(self, *args, **kwargs): -# super(HistogramsPluginTest, self).__init__(*args, **kwargs) -# self.logdir = None -# self.plugin = None - -# def set_up_with_runs(self, run_names): -# self.logdir = self.get_temp_dir() -# for run_name in run_names: -# self.generate_run(run_name) -# multiplexer = event_multiplexer.EventMultiplexer(size_guidance={ -# # don't truncate my test data, please -# event_accumulator.TENSORS: self._STEPS, -# }) -# multiplexer.AddRunsFromDirectory(self.logdir) -# multiplexer.Reload() -# context = base_plugin.TBContext(logdir=self.logdir, multiplexer=multiplexer) -# self.plugin = histograms_plugin.HistogramsPlugin(context) - -# def generate_run(self, run_name): -# tf.compat.v1.reset_default_graph() -# sess = tf.compat.v1.Session() -# placeholder = tf.compat.v1.placeholder(tf.float32, shape=[3]) - -# if run_name == self._RUN_WITH_LEGACY_HISTOGRAM: -# tf.compat.v1.summary.histogram(self._LEGACY_HISTOGRAM_TAG, placeholder) -# elif run_name == self._RUN_WITH_HISTOGRAM: -# summary.op(self._HISTOGRAM_TAG, placeholder, -# display_name=self._DISPLAY_NAME, -# description=self._DESCRIPTION) -# elif run_name == self._RUN_WITH_SCALARS: -# tf.compat.v1.summary.scalar(self._SCALAR_TAG, tf.reduce_mean(input_tensor=placeholder)) -# else: -# assert False, 'Invalid run name: %r' % run_name -# summ = tf.compat.v1.summary.merge_all() - -# subdir = os.path.join(self.logdir, run_name) -# with test_util.FileWriterCache.get(subdir) as writer: -# writer.add_graph(sess.graph) -# for step in xrange(self._STEPS): -# feed_dict = {placeholder: [1 + step, 2 + step, 3 + step]} -# s = sess.run(summ, feed_dict=feed_dict) -# writer.add_summary(s, global_step=step) - -# def test_routes_provided(self): -# """Tests that the plugin offers the correct routes.""" -# self.set_up_with_runs([self._RUN_WITH_SCALARS]) -# routes = self.plugin.get_plugin_apps() -# self.assertIsInstance(routes['/histograms'], collections.Callable) -# self.assertIsInstance(routes['/tags'], collections.Callable) - -# def test_index(self): -# self.set_up_with_runs([self._RUN_WITH_SCALARS, -# self._RUN_WITH_LEGACY_HISTOGRAM, -# self._RUN_WITH_HISTOGRAM]) -# self.assertEqual({ -# self._RUN_WITH_SCALARS: {}, -# self._RUN_WITH_LEGACY_HISTOGRAM: { -# self._LEGACY_HISTOGRAM_TAG: { -# 'displayName': self._LEGACY_HISTOGRAM_TAG, -# 'description': '', -# }, -# }, -# self._RUN_WITH_HISTOGRAM: { -# '%s/histogram_summary' % self._HISTOGRAM_TAG: { -# 'displayName': self._DISPLAY_NAME, -# 'description': self._HTML_DESCRIPTION, -# }, -# }, -# }, self.plugin.index_impl()) - -# def _test_histograms(self, run_name, tag_name, should_work=True): -# self.set_up_with_runs([self._RUN_WITH_SCALARS, -# self._RUN_WITH_LEGACY_HISTOGRAM, -# self._RUN_WITH_HISTOGRAM]) -# if should_work: -# self._check_histograms_result(tag_name, run_name, downsample=False) -# self._check_histograms_result(tag_name, run_name, downsample=True) -# else: -# with six.assertRaisesRegex(self, ValueError, 'No histogram tag'): -# self.plugin.histograms_impl(self._HISTOGRAM_TAG, run_name) - -# def _check_histograms_result(self, tag_name, run_name, downsample): -# if downsample: -# downsample_to = 50 -# expected_length = 50 -# else: -# downsample_to = None -# expected_length = self._STEPS - -# (data, mime_type) = self.plugin.histograms_impl(tag_name, run_name, -# downsample_to=downsample_to) -# self.assertEqual('application/json', mime_type) -# self.assertEqual(expected_length, len(data), -# 'expected %r, got %r (downsample=%r)' -# % (expected_length, len(data), downsample)) -# last_step_seen = None -# for (i, datum) in enumerate(data): -# [_unused_wall_time, step, buckets] = datum -# if last_step_seen is not None: -# self.assertGreater(step, last_step_seen) -# last_step_seen = step -# if not downsample: -# self.assertEqual(i, step) -# self.assertEqual(1 + step, buckets[0][0]) # first left-edge -# self.assertEqual(3 + step, buckets[-1][1]) # last right-edge -# self.assertAlmostEqual( -# 3, # three items across all buckets -# sum(bucket[2] for bucket in buckets)) - -# def test_histograms_with_scalars(self): -# self._test_histograms(self._RUN_WITH_SCALARS, self._HISTOGRAM_TAG, -# should_work=False) - -# def test_histograms_with_legacy_histogram(self): -# self._test_histograms(self._RUN_WITH_LEGACY_HISTOGRAM, -# self._LEGACY_HISTOGRAM_TAG) - -# def test_histograms_with_histogram(self): -# self._test_histograms(self._RUN_WITH_HISTOGRAM, -# '%s/histogram_summary' % self._HISTOGRAM_TAG) - -# def test_active_with_legacy_histogram(self): -# self.set_up_with_runs([self._RUN_WITH_LEGACY_HISTOGRAM]) -# self.assertTrue(self.plugin.is_active()) - -# def test_active_with_histogram(self): -# self.set_up_with_runs([self._RUN_WITH_HISTOGRAM]) -# self.assertTrue(self.plugin.is_active()) - -# def test_active_with_scalars(self): -# self.set_up_with_runs([self._RUN_WITH_SCALARS]) -# self.assertFalse(self.plugin.is_active()) - -# def test_active_with_all(self): -# self.set_up_with_runs([self._RUN_WITH_SCALARS, -# self._RUN_WITH_LEGACY_HISTOGRAM, -# self._RUN_WITH_HISTOGRAM]) -# self.assertTrue(self.plugin.is_active()) - - -# if __name__ == '__main__': -# tf.test.main() +from tensorboard.writer.event_file_writer import EventFileWriter +from tensorboard.compat.proto import event_pb2, summary_pb2 +from tensorboard.compat.proto.summary_pb2 import Summary +from google.protobuf import json_format + +class EventFileWriterTest(tf.test.TestCase): + def __init__(self, *args, **kwargs): + super(EventFileWriterTest, self).__init__(*args, **kwargs) + + def test_event_file_writer_roundtrip(self): + _TAGNAME = 'dummy' + _DUMMY_VALUE = 42 + logdir = self.get_temp_dir() + w = EventFileWriter(logdir) + summary = Summary(value=[Summary.Value(tag=_TAGNAME, simple_value=_DUMMY_VALUE)]) + fakeevent = event_pb2.Event(summary=summary) + w.add_event(fakeevent) + w.close() + event_files = sorted(glob.glob(os.path.join(self.get_temp_dir(), '*'))) + self.assertEqual(len(event_files), 1) + events = list(tf.compat.v1.train.summary_iterator(event_files[0])) + event_from_disk = events[1] + summary_from_disk = event_from_disk.summary + self.assertProtoEquals(summary.SerializeToString(), summary_from_disk.SerializeToString()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index c4e0501cbf..57e94fe99d 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -40,7 +40,7 @@ def __init__(self, path): self._tf_names = set() self.path = path self._writer = None - self._writer = open(path) + self._writer = open(path, 'wb') def write(self, event_str): w = self._writer.write From aec879a498133d6c6d127bb6e54240169adb4a60 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Mon, 1 Apr 2019 01:52:34 +0800 Subject: [PATCH 13/32] fix for remaining reviews --- tensorboard/writer/BUILD | 11 ++--------- tensorboard/writer/crc32c.py | 8 ++++---- tensorboard/writer/record_writer.py | 2 +- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/tensorboard/writer/BUILD b/tensorboard/writer/BUILD index 65c66bb254..842053a9ef 100644 --- a/tensorboard/writer/BUILD +++ b/tensorboard/writer/BUILD @@ -33,13 +33,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":writer", - # "//tensorboard:expect_tensorflow_installed", - # "//tensorboard/backend:application", - # "//tensorboard/backend/event_processing:event_accumulator", - # "//tensorboard/backend/event_processing:event_multiplexer", - # "//tensorboard/plugins:base_plugin", - # "//tensorboard/util:test_util", - # "@org_pocoo_werkzeug", - # "@org_pythonhosted_six", - ], # TODO: I am not sure what to put here + "//tensorboard:expect_tensorflow_installed", + ], ) diff --git a/tensorboard/writer/crc32c.py b/tensorboard/writer/crc32c.py index 461334cd15..eb955b81fb 100644 --- a/tensorboard/writer/crc32c.py +++ b/tensorboard/writer/crc32c.py @@ -16,7 +16,7 @@ import array -CRC_TABLE = ( +_CRC_TABLE = ( 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, @@ -84,7 +84,7 @@ ) -CRC_INIT = 0 +_CRC_INIT = 0 _MASK = 0xFFFFFFFF @@ -108,7 +108,7 @@ def crc_update(crc, data): crc ^= _MASK for b in buf: table_index = (crc ^ b) & 0xff - crc = (CRC_TABLE[table_index] ^ (crc >> 8)) & _MASK + crc = (_CRC_TABLE[table_index] ^ (crc >> 8)) & _MASK return crc ^ _MASK @@ -135,4 +135,4 @@ def crc32c(data): Returns: 32-bit CRC-32C checksum of data as long. """ - return crc_finalize(crc_update(CRC_INIT, data)) + return crc_finalize(crc_update(_CRC_INIT, data)) diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index 57e94fe99d..ea646886c4 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -38,7 +38,7 @@ class RecordWriter(object): def __init__(self, path): self._name_to_tf_name = {} self._tf_names = set() - self.path = path + self._path = path self._writer = None self._writer = open(path, 'wb') From 7bf21f599c93931de581f6035fcf1b4f53efbead Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Tue, 2 Apr 2019 01:11:08 +0800 Subject: [PATCH 14/32] rewrite record_writer --- tensorboard/writer/record_writer.py | 63 ++++++++++------------------- 1 file changed, 21 insertions(+), 42 deletions(-) diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index ea646886c4..ecb1962aac 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -13,48 +13,34 @@ # limitations under the License. # ============================================================================== -""" -To write tf_record into file. Here we use it for tensorboard's event writting. -The code was borrowed from https://github.com/TeamHG-Memex/tensorboard_logger -""" - -import copy -import io -import os.path -import re import struct - from .crc32c import crc32c - - -_VALID_OP_NAME_START = re.compile('^[A-Za-z0-9.]') -_VALID_OP_NAME_PART = re.compile('[A-Za-z0-9_.\\-/]+') - - class RecordWriter(object): - """write encoded protobuf along with its checksum. - - """ - def __init__(self, path): - self._name_to_tf_name = {} - self._tf_names = set() - self._path = path - self._writer = None - self._writer = open(path, 'wb') - - def write(self, event_str): - w = self._writer.write - header = struct.pack('Q', len(event_str)) - w(header) - w(struct.pack('I', masked_crc32c(header))) - w(event_str) - w(struct.pack('I', masked_crc32c(event_str))) + def __init__(self, logfile): + self._writer = open(logfile, 'wb') + + # Format of a single record: + # uint64 length + # uint32 masked crc of length + # byte data[length] + # uint32 masked crc of data + def write(self, data): + header_len = struct.pack('Q', len(data)) + header_len_crc = struct.pack('I', masked_crc32c(header_len)) + footer_crc = struct.pack('I', masked_crc32c(data)) + self._writer.write(header_len + header_len_crc + data + footer_crc) def flush(self): - self._writer.flush() + if self._writer is not None: + self._writer.flush() + else: + raise OSError('file writer is missing') def close(self): - self._writer.close() + if self._writer is not None: + self._writer.close() + else: + raise OSError('file writer is missing') def masked_crc32c(data): @@ -64,10 +50,3 @@ def masked_crc32c(data): def u32(x): return x & 0xffffffff - - -def make_valid_tf_name(name): - if not _VALID_OP_NAME_START.match(name): - # Must make it valid somehow, but don't want to remove stuff - name = '.' + name - return '_'.join(_VALID_OP_NAME_PART.findall(name)) From d90406a2e1db9f83bbafee2c7621e6474902381c Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Tue, 2 Apr 2019 21:07:57 +0800 Subject: [PATCH 15/32] use crc in tensorflow_stub --- tensorboard/writer/BUILD | 2 +- tensorboard/writer/crc32c.py | 138 ---------------------------- tensorboard/writer/record_writer.py | 2 +- 3 files changed, 2 insertions(+), 140 deletions(-) delete mode 100644 tensorboard/writer/crc32c.py diff --git a/tensorboard/writer/BUILD b/tensorboard/writer/BUILD index 842053a9ef..2a5dfe6343 100644 --- a/tensorboard/writer/BUILD +++ b/tensorboard/writer/BUILD @@ -11,7 +11,6 @@ py_library( name = "writer", srcs = [ "__init__.py", - "crc32c.py", "event_file_writer.py", "record_writer.py", ], @@ -20,6 +19,7 @@ py_library( deps = [ "//tensorboard:expect_numpy_installed", "//tensorboard/compat/proto:protos_all_py_pb2", + "//tensorboard/compat/tensorflow_stub", "@org_pythonhosted_six", ], ) diff --git a/tensorboard/writer/crc32c.py b/tensorboard/writer/crc32c.py deleted file mode 100644 index eb955b81fb..0000000000 --- a/tensorboard/writer/crc32c.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import array - - -_CRC_TABLE = ( - 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, - 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, - 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, - 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, - 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, - 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, - 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, - 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, - 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, - 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, - 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, - 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, - 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, - 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, - 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, - 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, - 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, - 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, - 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, - 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, - 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, - 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, - 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, - 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, - 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, - 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, - 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, - 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, - 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, - 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, - 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, - 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, - 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, - 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, - 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, - 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, - 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, - 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, - 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, - 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, - 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, - 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, - 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, - 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, - 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, - 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, - 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, - 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, - 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, - 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, - 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, - 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, - 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, - 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, - 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, - 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, - 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, - 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, - 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, - 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, - 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, - 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, - 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, - 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351, -) - - -_CRC_INIT = 0 - -_MASK = 0xFFFFFFFF - - -def crc_update(crc, data): - """Update CRC-32C checksum with data. - - Args: - crc: 32-bit checksum to update as long. - data: byte array, string or iterable over bytes. - - Returns: - 32-bit updated CRC-32C as long. - """ - - if type(data) != array.array or data.itemsize != 1: - buf = array.array("B", data) - else: - buf = data - - crc ^= _MASK - for b in buf: - table_index = (crc ^ b) & 0xff - crc = (_CRC_TABLE[table_index] ^ (crc >> 8)) & _MASK - return crc ^ _MASK - - -def crc_finalize(crc): - """Finalize CRC-32C checksum. - - This function should be called as last step of crc calculation. - - Args: - crc: 32-bit checksum as long. - - Returns: - finalized 32-bit checksum as long - """ - return crc & _MASK - - -def crc32c(data): - """Compute CRC-32C checksum of the data. - - Args: - data: byte array, string or iterable over bytes. - - Returns: - 32-bit CRC-32C checksum of data as long. - """ - return crc_finalize(crc_update(_CRC_INIT, data)) diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index ecb1962aac..54ccbb0b62 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -14,7 +14,7 @@ # ============================================================================== import struct -from .crc32c import crc32c +from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import crc32c class RecordWriter(object): def __init__(self, logfile): self._writer = open(logfile, 'wb') From f4129ca2cd8cb5ac3c63fcf9c98d48da2ad25222 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 4 Apr 2019 21:08:16 +0800 Subject: [PATCH 16/32] reduce duplicated code --- tensorboard/writer/record_writer.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index 54ccbb0b62..33409d86cc 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -14,7 +14,11 @@ # ============================================================================== import struct -from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import crc32c +from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import ( + crc32c, + masked_crc32c, + u32, + ) class RecordWriter(object): def __init__(self, logfile): self._writer = open(logfile, 'wb') @@ -41,12 +45,3 @@ def close(self): self._writer.close() else: raise OSError('file writer is missing') - - -def masked_crc32c(data): - x = u32(crc32c(data)) - return u32(((x >> 15) | u32(x << 17)) + 0xa282ead8) - - -def u32(x): - return x & 0xffffffff From 4a1940dda3f543a4bec3845fbf4884fe4f06d7d1 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Thu, 4 Apr 2019 22:41:20 +0800 Subject: [PATCH 17/32] remove useless import --- tensorboard/writer/record_writer.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index 33409d86cc..c0999aa544 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -14,11 +14,8 @@ # ============================================================================== import struct -from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import ( - crc32c, - masked_crc32c, - u32, - ) +from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import masked_crc32c + class RecordWriter(object): def __init__(self, logfile): self._writer = open(logfile, 'wb') From f8fdd6d7c90f6862204d7cdcdd1c5e6048575de8 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sat, 6 Apr 2019 01:19:01 +0800 Subject: [PATCH 18/32] re-add code to make unexisting dir --- tensorboard/writer/event_file_writer.py | 3 ++- tensorboard/writer/record_writer.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/writer/event_file_writer.py index cbf99b4921..ade0f8e0d5 100644 --- a/tensorboard/writer/event_file_writer.py +++ b/tensorboard/writer/event_file_writer.py @@ -26,7 +26,7 @@ import six from tensorboard.compat.proto import event_pb2 -from tensorboard.writer.record_writer import RecordWriter +from tensorboard.writer.record_writer import RecordWriter, directory_check class EventsWriter(object): @@ -106,6 +106,7 @@ def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=''): pending events and summaries to disk. """ self._logdir = logdir + directory_check(self._logdir) self._event_queue = six.moves.queue.Queue(max_queue) self._ev_writer = EventsWriter(os.path.join( self._logdir, "events"), filename_suffix) diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index c0999aa544..4d999c9f97 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -13,9 +13,16 @@ # limitations under the License. # ============================================================================== +import os import struct from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import masked_crc32c + +def directory_check(path): + '''Initialize the directory for log files.''' + if not os.path.exists(path): + os.makedirs(path) + class RecordWriter(object): def __init__(self, logfile): self._writer = open(logfile, 'wb') From d95bfa5b02e65db711567128a6d97bd45d9035c1 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sun, 7 Apr 2019 01:57:12 +0800 Subject: [PATCH 19/32] fix for simple review comments --- tensorboard/writer/__init__.py | 2 +- tensorboard/writer/event_file_writer.py | 2 +- tensorboard/writer/event_file_writer_test.py | 2 -- tensorboard/writer/record_writer.py | 14 ++++++++++---- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tensorboard/writer/__init__.py b/tensorboard/writer/__init__.py index 88675df059..61b9129e83 100644 --- a/tensorboard/writer/__init__.py +++ b/tensorboard/writer/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/writer/event_file_writer.py index ade0f8e0d5..985b21c58c 100644 --- a/tensorboard/writer/event_file_writer.py +++ b/tensorboard/writer/event_file_writer.py @@ -207,7 +207,7 @@ def run(self): else: event = self._queue.get(False) - if event == self._shutdown_signal: + if event is self._shutdown_signal: return self._ev_writer.write_event(event) self._has_pending_events = True diff --git a/tensorboard/writer/event_file_writer_test.py b/tensorboard/writer/event_file_writer_test.py index 6c4884a813..e9045cd712 100644 --- a/tensorboard/writer/event_file_writer_test.py +++ b/tensorboard/writer/event_file_writer_test.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +19,6 @@ from __future__ import division from __future__ import print_function -# tf.compat.v1.disable_v2_behavior() import glob import os diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index 4d999c9f97..94d3b5f64c 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -19,12 +19,18 @@ def directory_check(path): - '''Initialize the directory for log files.''' + """Initialize the directory for log files.""" if not os.path.exists(path): os.makedirs(path) class RecordWriter(object): + """Write encoded protobuf to a file with packing defined in tensorflow""" def __init__(self, logfile): + """Open a file to keep the tensorboard records. + + Args: + logfile: (string) The location where the file will be opened. + """ self._writer = open(logfile, 'wb') # Format of a single record: @@ -33,10 +39,10 @@ def __init__(self, logfile): # byte data[length] # uint32 masked crc of data def write(self, data): - header_len = struct.pack('Q', len(data)) - header_len_crc = struct.pack('I', masked_crc32c(header_len)) + header = struct.pack('Q', len(data)) + header_crc = struct.pack('I', masked_crc32c(header)) footer_crc = struct.pack('I', masked_crc32c(data)) - self._writer.write(header_len + header_len_crc + data + footer_crc) + self._writer.write(header + header_crc + data + footer_crc) def flush(self): if self._writer is not None: From c9c152914b83bd860fe3dca8fbb5797eddb5a0b2 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sun, 7 Apr 2019 02:26:36 +0800 Subject: [PATCH 20/32] more simple fix --- tensorboard/writer/BUILD | 4 +--- tensorboard/writer/record_writer.py | 18 ++++++------------ 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/tensorboard/writer/BUILD b/tensorboard/writer/BUILD index 2a5dfe6343..1d2d711261 100644 --- a/tensorboard/writer/BUILD +++ b/tensorboard/writer/BUILD @@ -1,6 +1,5 @@ # Description: -# TensorBoard, a dashboard for investigating TensorFlow - +# Writer interfaces for TensorBoard if tensorflow is not present package(default_visibility = ["//tensorboard:internal"]) licenses(["notice"]) # Apache 2.0 @@ -17,7 +16,6 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - "//tensorboard:expect_numpy_installed", "//tensorboard/compat/proto:protos_all_py_pb2", "//tensorboard/compat/tensorflow_stub", "@org_pythonhosted_six", diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index 94d3b5f64c..5df68f77e6 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -33,25 +33,19 @@ def __init__(self, logfile): """ self._writer = open(logfile, 'wb') - # Format of a single record: + # Format of a single record: (little-endian) # uint64 length # uint32 masked crc of length # byte data[length] # uint32 masked crc of data def write(self, data): - header = struct.pack('Q', len(data)) - header_crc = struct.pack('I', masked_crc32c(header)) - footer_crc = struct.pack('I', masked_crc32c(data)) + header = struct.pack(' Date: Sun, 7 Apr 2019 22:43:52 +0800 Subject: [PATCH 21/32] fix time format remove `directory_check` and `reopen` --- tensorboard/writer/event_file_writer.py | 21 ++++----------------- tensorboard/writer/record_writer.py | 6 ------ 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/writer/event_file_writer.py index 985b21c58c..a4e294a8e0 100644 --- a/tensorboard/writer/event_file_writer.py +++ b/tensorboard/writer/event_file_writer.py @@ -26,7 +26,7 @@ import six from tensorboard.compat.proto import event_pb2 -from tensorboard.writer.record_writer import RecordWriter, directory_check +from tensorboard.writer.record_writer import RecordWriter class EventsWriter(object): @@ -43,7 +43,7 @@ def __init__(self, file_prefix, filename_suffix=''): filename_suffix: The string that will be appended to the filename of the event file. ''' - self._file_name = file_prefix + ".out.tfevents." + str(time.time())[:10] + "." +\ + self._file_name = file_prefix + ".out.tfevents." + "%010d" % time.time() + "." +\ socket.gethostname() + filename_suffix self._num_outstanding_events = 0 self._py_recordio_writer = RecordWriter(self._file_name) @@ -106,7 +106,8 @@ def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=''): pending events and summaries to disk. """ self._logdir = logdir - directory_check(self._logdir) + if not os.path.exists(logdir): + os.makedirs(logdir) self._event_queue = six.moves.queue.Queue(max_queue) self._ev_writer = EventsWriter(os.path.join( self._logdir, "events"), filename_suffix) @@ -121,20 +122,6 @@ def get_logdir(self): """Returns the directory where event file will be written.""" return self._logdir - def reopen(self): - """Reopens the EventFileWriter. - - Can be called after `close()` to add more events in the same directory. - The events will go into a new events file and a new write/flush worker - is created. Does nothing if the EventFileWriter was not closed. - """ - if self._closed: - self._closed = False - self._worker = _EventLoggerThread( - self._event_queue, self._ev_writer, self._flush_secs - ) - self._worker.start() - def add_event(self, event): """Adds an event to the event file. diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index 5df68f77e6..1d68f9b942 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -13,16 +13,10 @@ # limitations under the License. # ============================================================================== -import os import struct from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import masked_crc32c -def directory_check(path): - """Initialize the directory for log files.""" - if not os.path.exists(path): - os.makedirs(path) - class RecordWriter(object): """Write encoded protobuf to a file with packing defined in tensorflow""" def __init__(self, logfile): From 15ea771c8c2c8880ead23ec3eed25f9312a8beaf Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Tue, 9 Apr 2019 09:02:35 +0800 Subject: [PATCH 22/32] - unique filename - dummy tests --- tensorboard/writer/event_file_writer.py | 42 +++++++++++++++++- tensorboard/writer/event_file_writer_test.py | 33 +++++++++++++- tensorboard/writer/record_writer_test.py | 46 ++++++++++++++++++++ 3 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 tensorboard/writer/record_writer_test.py diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/writer/event_file_writer.py index a4e294a8e0..47e4c16419 100644 --- a/tensorboard/writer/event_file_writer.py +++ b/tensorboard/writer/event_file_writer.py @@ -18,7 +18,7 @@ from __future__ import division from __future__ import print_function -import os.path +import os import socket import threading import time @@ -29,6 +29,21 @@ from tensorboard.writer.record_writer import RecordWriter +class AtomicCounter(object): + def __init__(self, initial_value): + self._value = initial_value + self._lock = threading.Lock() + + def get(self): + with self._lock: + try: + return self._value + finally: + self._value += 1 + +_global_uid = AtomicCounter(0) + + class EventsWriter(object): '''Writes `Event` protocol buffers to an event file.''' @@ -44,12 +59,13 @@ def __init__(self, file_prefix, filename_suffix=''): the filename of the event file. ''' self._file_name = file_prefix + ".out.tfevents." + "%010d" % time.time() + "." +\ - socket.gethostname() + filename_suffix + socket.gethostname()+ ".%s.%s" % (os.getpid(), _global_uid.get()) + filename_suffix self._num_outstanding_events = 0 self._py_recordio_writer = RecordWriter(self._file_name) # Initialize an event instance. self._event = event_pb2.Event() self._event.wall_time = time.time() + self._event.file_version='brain.Event:2' self._lock = threading.Lock() self.write_event(self._event) @@ -214,3 +230,25 @@ def run(self): self._has_pending_events = False # Do it again in flush_secs. self._next_event_flush_time = now + self._flush_secs + + +class _AsyncWriter(object): + def __init__(self, writer): + pass + + def write(self, data): + pass + + def flush(self): + pass + + def close(self): + pass + + +class _AsyncWriterThread(object): + def __init__(self): + pass + + def run(self): + pass \ No newline at end of file diff --git a/tensorboard/writer/event_file_writer_test.py b/tensorboard/writer/event_file_writer_test.py index e9045cd712..887451fb12 100644 --- a/tensorboard/writer/event_file_writer_test.py +++ b/tensorboard/writer/event_file_writer_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,8 @@ import os import tensorflow as tf from tensorboard.writer.event_file_writer import EventFileWriter +from tensorboard.writer.event_file_writer import _AsyncWriter +from tensorboard.writer.event_file_writer import _AsyncWriterThread from tensorboard.compat.proto import event_pb2, summary_pb2 from tensorboard.compat.proto.summary_pb2 import Summary from google.protobuf import json_format @@ -48,6 +50,35 @@ def test_event_file_writer_roundtrip(self): summary_from_disk = event_from_disk.summary self.assertProtoEquals(summary.SerializeToString(), summary_from_disk.SerializeToString()) + def test_setting_filename_suffix_works(self): + pass + + def test_async_writer_without_write(self): + pass + + def test_async_writer_write_once(self): + pass + + def test_async_writer_write_queue_full(self): + # call writer multiple times + pass + + def test_async_writer_write_one_slot_queue(self): + # set max_queue = 1 + pass + + def test_async_writer_auto_flushing(self): + pass + + def test_async_writer_flush_before_flush_secs(self): + pass + + def test_async_writer_close_triggers_flush(self): + pass + + def test_write_after_async_writer_closed(self): + # expect nothing is written + pass if __name__ == '__main__': tf.test.main() diff --git a/tensorboard/writer/record_writer_test.py b/tensorboard/writer/record_writer_test.py new file mode 100644 index 0000000000..f5042beb09 --- /dev/null +++ b/tensorboard/writer/record_writer_test.py @@ -0,0 +1,46 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# """Integration tests for the Writer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import glob +import os +import tensorflow as tf +from tensorboard.writer.record_writer import RecordWriter +from tensorboard.compat.proto import event_pb2, summary_pb2 +from tensorboard.compat.proto.summary_pb2 import Summary +from google.protobuf import json_format + +class RecordWriterTest(tf.test.TestCase): + def __init__(self, *args, **kwargs): + super(RecordWriterTest, self).__init__(*args, **kwargs) + + def test_expect_bytes_written(self): + pass + + def test_empty_record(self): + pass + + def test_record_writer_roundtrip(self): + pass + + +if __name__ == '__main__': + tf.test.main() From 69bd4b500b3e99de732010ba1dba5343123eeaa3 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Fri, 12 Apr 2019 22:23:04 +0800 Subject: [PATCH 23/32] add many tests and simplifies async --- tensorboard/writer/BUILD | 12 ++ tensorboard/writer/event_file_writer.py | 207 ++++++++----------- tensorboard/writer/event_file_writer_test.py | 125 +++++++++-- tensorboard/writer/record_writer.py | 4 + tensorboard/writer/record_writer_test.py | 37 +++- 5 files changed, 242 insertions(+), 143 deletions(-) diff --git a/tensorboard/writer/BUILD b/tensorboard/writer/BUILD index 1d2d711261..9de13f0cd7 100644 --- a/tensorboard/writer/BUILD +++ b/tensorboard/writer/BUILD @@ -34,3 +34,15 @@ py_test( "//tensorboard:expect_tensorflow_installed", ], ) + +py_test( + name = "record_writer_test", + size = "small", + srcs = ["record_writer_test.py"], + main = "record_writer_test.py", + srcs_version = "PY2AND3", + deps = [ + ":writer", + "//tensorboard:expect_tensorflow_installed", + ], +) \ No newline at end of file diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/writer/event_file_writer.py index 47e4c16419..b407138ad2 100644 --- a/tensorboard/writer/event_file_writer.py +++ b/tensorboard/writer/event_file_writer.py @@ -41,64 +41,11 @@ def get(self): finally: self._value += 1 -_global_uid = AtomicCounter(0) - - -class EventsWriter(object): - '''Writes `Event` protocol buffers to an event file.''' - - def __init__(self, file_prefix, filename_suffix=''): - ''' - Events files have a name of the form - '/some/file/path/[file_prefix].out.tfevents.[timestamp].[hostname]' - - Args: - file_prefix: The string that will be prepended to - the filename of the event file. - filename_suffix: The string that will be appended to - the filename of the event file. - ''' - self._file_name = file_prefix + ".out.tfevents." + "%010d" % time.time() + "." +\ - socket.gethostname()+ ".%s.%s" % (os.getpid(), _global_uid.get()) + filename_suffix - self._num_outstanding_events = 0 - self._py_recordio_writer = RecordWriter(self._file_name) - # Initialize an event instance. - self._event = event_pb2.Event() - self._event.wall_time = time.time() - self._event.file_version='brain.Event:2' - self._lock = threading.Lock() - self.write_event(self._event) - - def write_event(self, event): - '''Append "protobuf event" to the file.''' - - # Check if event is of type event_pb2.Event proto. - if not isinstance(event, event_pb2.Event): - raise TypeError("Expected an event_pb2.Event proto, " - " but got %s" % type(event)) - return self._write_serialized_event(event.SerializeToString()) - def _write_serialized_event(self, event_str): - with self._lock: - self._num_outstanding_events += 1 - self._py_recordio_writer.write(event_str) - - def flush(self): - '''Flushes the event file to disk.''' - with self._lock: - self._num_outstanding_events = 0 - self._py_recordio_writer.flush() - return True - - def close(self): - '''Call self.flush().''' - return_value = self.flush() - with self._lock: - self._py_recordio_writer.close() - return return_value +_global_uid = AtomicCounter(0) -class EventFileWriter(object): +class EventFileWriter(object): # Owned by FileWriter """Writes `Event` protocol buffers to an event file. The `EventFileWriter` class creates an event file in the specified directory, @@ -106,7 +53,7 @@ class EventFileWriter(object): is encoded using the tfrecord format, which is similar to RecordIO. """ - def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=''): + def __init__(self, logdir, max_queue_size=10, flush_secs=120, filename_suffix=''): """Creates a `EventFileWriter` and an event file to write to. On construction the summary writer creates a new event file in `logdir`. @@ -117,22 +64,21 @@ def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=''): Args: logdir: A string. Directory where event file will be written. - max_queue: Integer. Size of the queue for pending events and summaries. + max_queue_size: Integer. Size of the queue for pending events and summaries. flush_secs: Number. How often, in seconds, to flush the pending events and summaries to disk. """ self._logdir = logdir if not os.path.exists(logdir): os.makedirs(logdir) - self._event_queue = six.moves.queue.Queue(max_queue) - self._ev_writer = EventsWriter(os.path.join( - self._logdir, "events"), filename_suffix) - self._flush_secs = flush_secs - self._closed = False - self._worker = _EventLoggerThread(self._event_queue, self._ev_writer, - flush_secs) + self._file_name = logdir + "/events.out.tfevents.%010d.%s.%s.%s" %\ + (time.time(), socket.gethostname(), os.getpid(), _global_uid.get()) +\ + filename_suffix + self._async_writer = _AsyncWriter(RecordWriter(self._file_name), max_queue_size, flush_secs) - self._worker.start() + # Initialize an event instance. + _event = event_pb2.Event(wall_time=time.time(), file_version='brain.Event:2') + self.add_event(_event) def get_logdir(self): """Returns the directory where event file will be written.""" @@ -144,8 +90,10 @@ def add_event(self, event): Args: event: An `Event` protocol buffer. """ - if not self._closed: - self._event_queue.put(event) + if not isinstance(event, event_pb2.Event): + raise TypeError("Expected an event_pb2.Event proto, " + " but got %s" % type(event)) + self._async_writer.write(event.SerializeToString()) def flush(self): """Flushes the event file to disk. @@ -153,102 +101,115 @@ def flush(self): Call this method to make sure that all pending events have been written to disk. """ - if not self._closed: - self._event_queue.join() - self._ev_writer.flush() + self._async_writer.flush() def close(self): """Performs a final flush of the event file to disk, stops the write/flush worker and closes the file. Call this method when you do not need the summary writer anymore. """ - if not self._closed: - self.flush() - self._worker.stop() - self._ev_writer.close() - self._closed = True + self._async_writer.flush() + self._async_writer.close() + +class _AsyncWriter(object): # _AsyncWriter + '''Writes bytes to an event file.''' + + def __init__(self, writerinstance, max_queue_size=20, flush_secs=120, dummy_delay=False): + """Writes bytes to an event file. + Args: + writerinstance: A RecordWriter instance + max_queue_size: Integer. Size of the queue for pending events and summaries. + flush_secs: Number. How often, in seconds, to flush the + pending events and summaries to disk. + """ + self._writer = writerinstance + self._byte_queue = six.moves.queue.Queue(max_queue_size) + self._worker = _AsyncWriterThread(self._byte_queue, self._writer, flush_secs, dummy_delay) + self._lock = threading.Lock() + self._worker.start() -class _EventLoggerThread(threading.Thread): + def write(self, bytestream): + '''Append bytes to the queue.''' + with self._lock: + self._byte_queue.put(bytestream) + + def flush(self): + '''Write all the enqueued bytestream before this flush call to disk. + Block until all the above bytestream are written. + ''' + with self._lock: + self._writer.flush() + + def close(self): + '''Call self.flush().''' + self._worker.stop() + self.flush() + with self._lock: + self._writer.close() + assert self._writer.closed + + +class _AsyncWriterThread(threading.Thread): """Thread that logs events.""" - def __init__(self, queue, ev_writer, flush_secs): - """Creates an _EventLoggerThread. + def __init__(self, queue, record_writer, flush_secs, dummy_delay): + """Creates an _AsyncWriterThread. Args: - queue: A Queue from which to dequeue events. - ev_writer: An event writer. Used to log brain events for - the visualizer. + queue: A Queue from which to dequeue data. + record_writer: An protobuf record_writer writer. flush_secs: How often, in seconds, to flush the pending file to disk. """ threading.Thread.__init__(self) self.daemon = True self._queue = queue - self._ev_writer = ev_writer + self._record_writer = record_writer self._flush_secs = flush_secs - # The first event will be flushed immediately. - self._next_event_flush_time = 0 - self._has_pending_events = False + # The first data will be flushed immediately. + self._next_flush_time = 0 + self._has_pending_data = False self._shutdown_signal = object() + self._dummy_delay = dummy_delay def stop(self): self._queue.put(self._shutdown_signal) self.join() def run(self): - # Here wait on the queue until an event appears, or till the next + # Here wait on the queue until an data appears, or till the next # time to flush the writer, whichever is earlier. If we have an - # event, write it. If not, an empty queue exception will be raised + # data, write it. If not, an empty queue exception will be raised # and we can proceed to flush the writer. while True: now = time.time() - queue_wait_duration = self._next_event_flush_time - now - event = None + queue_wait_duration = self._next_flush_time - now + data = None try: if queue_wait_duration > 0: - event = self._queue.get(True, queue_wait_duration) + data = self._queue.get(True, queue_wait_duration) else: - event = self._queue.get(False) + data = self._queue.get(False) - if event is self._shutdown_signal: + if data is self._shutdown_signal: return - self._ev_writer.write_event(event) - self._has_pending_events = True + self._record_writer.write(data) + if self._dummy_delay: + time.sleep(0.1) + self._has_pending_data = True except six.moves.queue.Empty: pass finally: - if event: + if data: self._queue.task_done() now = time.time() - if now > self._next_event_flush_time: - if self._has_pending_events: - # Small optimization - if there are no pending events, + if now > self._next_flush_time: + if self._has_pending_data: + # Small optimization - if there are no pending data, # there's no need to flush, since each flush can be # expensive (e.g. uploading a new file to a server). - self._ev_writer.flush() - self._has_pending_events = False + self._record_writer.flush() + self._has_pending_data = False # Do it again in flush_secs. - self._next_event_flush_time = now + self._flush_secs - - -class _AsyncWriter(object): - def __init__(self, writer): - pass - - def write(self, data): - pass - - def flush(self): - pass - - def close(self): - pass - - -class _AsyncWriterThread(object): - def __init__(self): - pass - - def run(self): - pass \ No newline at end of file + self._next_flush_time = now + self._flush_secs diff --git a/tensorboard/writer/event_file_writer_test.py b/tensorboard/writer/event_file_writer_test.py index 887451fb12..ed982adf30 100644 --- a/tensorboard/writer/event_file_writer_test.py +++ b/tensorboard/writer/event_file_writer_test.py @@ -22,7 +22,10 @@ import glob import os +import shutil import tensorflow as tf +import tempfile +import time from tensorboard.writer.event_file_writer import EventFileWriter from tensorboard.writer.event_file_writer import _AsyncWriter from tensorboard.writer.event_file_writer import _AsyncWriterThread @@ -30,6 +33,7 @@ from tensorboard.compat.proto.summary_pb2 import Summary from google.protobuf import json_format + class EventFileWriterTest(tf.test.TestCase): def __init__(self, *args, **kwargs): super(EventFileWriterTest, self).__init__(*args, **kwargs) @@ -37,13 +41,14 @@ def __init__(self, *args, **kwargs): def test_event_file_writer_roundtrip(self): _TAGNAME = 'dummy' _DUMMY_VALUE = 42 - logdir = self.get_temp_dir() + logdir = tempfile.mkdtemp() w = EventFileWriter(logdir) summary = Summary(value=[Summary.Value(tag=_TAGNAME, simple_value=_DUMMY_VALUE)]) fakeevent = event_pb2.Event(summary=summary) w.add_event(fakeevent) w.close() - event_files = sorted(glob.glob(os.path.join(self.get_temp_dir(), '*'))) + event_files = sorted(glob.glob(os.path.join(logdir, '*'))) + print(event_files) self.assertEqual(len(event_files), 1) events = list(tf.compat.v1.train.summary_iterator(event_files[0])) event_from_disk = events[1] @@ -51,34 +56,124 @@ def test_event_file_writer_roundtrip(self): self.assertProtoEquals(summary.SerializeToString(), summary_from_disk.SerializeToString()) def test_setting_filename_suffix_works(self): - pass + logdir = tempfile.mkdtemp() + + w = EventFileWriter(logdir, filename_suffix='.event_horizon') + w.close() + event_files = sorted(glob.glob(os.path.join(logdir, '*'))) + assert event_files[0].split('.')[-1] == 'event_horizon' def test_async_writer_without_write(self): - pass - + for i in range(100): + logdir = tempfile.mkdtemp() + w = EventFileWriter(logdir) + w.close() + event_files = sorted(glob.glob(os.path.join(logdir, '*'))) + events = list(tf.compat.v1.train.summary_iterator(event_files[0])) + assert len(events) == 1 + assert events[0].file_version == "brain.Event:2" + def test_async_writer_write_once(self): - pass + logfile = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(logfile, 'wb')) + random_bytes = bytearray(os.urandom(64)) + w.write(random_bytes) + w.close() + with open(logfile, 'rb') as f: + assert f.read() == random_bytes def test_async_writer_write_queue_full(self): - # call writer multiple times - pass + logfile = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(logfile, 'wb'), dummy_delay=True) + random_bytes = bytearray(os.urandom(64)) + repeat = 100 + for i in range(repeat): + w.write(random_bytes) + w.close() + with open(logfile, 'rb') as f: + assert f.read() == random_bytes * repeat def test_async_writer_write_one_slot_queue(self): - # set max_queue = 1 - pass + logfile = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(logfile, 'wb'), max_queue_size=1, dummy_delay=True) + random_bytes = bytearray(os.urandom(64)) + repeat = 10 # faster + for i in range(repeat): + w.write(random_bytes) + w.close() + with open(logfile, 'rb') as f: + assert f.read() == random_bytes * repeat + + # write ................................... + # flush ---------^---------^---------^ + # numelinQ 12345678901234567890 (expected, because the IO overhead) + # Make strict comparion for the flushing result is possible, but it requires accessing + # the queue inside the async writer. So I write the test to simulate real write and flush. + # In my experiment, the tolerance can be set as high to roughly to 0.95. + # I set 0.9 here in case the CI is too slow. def test_async_writer_auto_flushing(self): - pass + logfile = tempfile.NamedTemporaryFile().name + flush_timer = 1 + tolerance = 0.9 # The undelying writer need time to complete. + w = _AsyncWriter(open(logfile, 'wb'), max_queue_size=500, flush_secs=flush_timer) + random_bytes = bytearray(os.urandom(64)) + repeat = 100 + for i in range(repeat): + w.write(random_bytes) + time.sleep(0.1) + if i % (flush_timer * 10) == 0: + with open(get_copy_by_OS(logfile), 'rb') as f: + nbytes = len(f.read()) + # print(i, nbytes, i * len(random_bytes) * tolerance, nbytes / (1+i * len(random_bytes))) + assert nbytes >= i * len(random_bytes) * tolerance + w.close() + + # make sure all data is written + with open(logfile, 'rb') as f: + assert f.read() == random_bytes * repeat def test_async_writer_flush_before_flush_secs(self): - pass + # This test equals test_async_writer_write_once, + # since flush() is implicitly called by close() and the default flush time is 120 secs. + logfile = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(logfile, 'wb')) + random_bytes = bytearray(os.urandom(64)) + w.write(random_bytes) + w.flush() # flush() is implicitly called by close() + w.close() + with open(logfile, 'rb') as f: + assert f.read() == random_bytes def test_async_writer_close_triggers_flush(self): - pass + # This test equals test_async_writer_write_once, + # since flush() is implicitly called by close() and the default flush time is 120 secs. + logfile = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(logfile, 'wb')) + random_bytes = bytearray(os.urandom(64)) + w.write(random_bytes) + w.close() + with open(logfile, 'rb') as f: + assert f.read() == random_bytes def test_write_after_async_writer_closed(self): - # expect nothing is written - pass + logfile = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(logfile, 'wb')) + random_bytes = bytearray(os.urandom(64)) + w.write(random_bytes) + w.close() + + w.write(random_bytes) + # nothing is written to the file after close + with open(logfile, 'rb') as f: + assert f.read() == random_bytes + + +def get_copy_by_OS(oldfilename): + newfilename = tempfile.NamedTemporaryFile().name + shutil.copy(oldfilename, newfilename) + return newfilename + if __name__ == '__main__': tf.test.main() diff --git a/tensorboard/writer/record_writer.py b/tensorboard/writer/record_writer.py index 1d68f9b942..e0f47aa955 100644 --- a/tensorboard/writer/record_writer.py +++ b/tensorboard/writer/record_writer.py @@ -43,3 +43,7 @@ def flush(self): def close(self): self._writer.close() + + @property + def closed(self): + return self._writer.closed diff --git a/tensorboard/writer/record_writer_test.py b/tensorboard/writer/record_writer_test.py index f5042beb09..96ba155634 100644 --- a/tensorboard/writer/record_writer_test.py +++ b/tensorboard/writer/record_writer_test.py @@ -19,28 +19,55 @@ from __future__ import division from __future__ import print_function - import glob import os +import tempfile import tensorflow as tf from tensorboard.writer.record_writer import RecordWriter from tensorboard.compat.proto import event_pb2, summary_pb2 from tensorboard.compat.proto.summary_pb2 import Summary +from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New from google.protobuf import json_format + class RecordWriterTest(tf.test.TestCase): def __init__(self, *args, **kwargs): super(RecordWriterTest, self).__init__(*args, **kwargs) def test_expect_bytes_written(self): - pass + logfile = tempfile.NamedTemporaryFile().name + w = RecordWriter(logfile) + random_bytes = bytearray(os.urandom(64)) + w.write(random_bytes) + w.close() + with open(logfile, 'rb') as f: + assert len(f.read()) == (8 + 4 + 64 + 4) # uint64+uint32+data+uint32 + + # crc'ed file content of empty data + # b'\x00\x00\x00\x00\x00\x00\x00\x00)\x03\x98\x07\xd8\xea\x82\xa2' def test_empty_record(self): - pass + logfile = tempfile.NamedTemporaryFile().name + w = RecordWriter(logfile) + random_bytes = bytearray(os.urandom(0)) + w.write(random_bytes) + w.close() + r = PyRecordReader_New(logfile) + r.read() + assert r.event_strs[0] == random_bytes def test_record_writer_roundtrip(self): - pass + logfile = tempfile.NamedTemporaryFile().name + w = RecordWriter(logfile) + random_bytes = bytearray(os.urandom(64)) + w.write(random_bytes) + w.close() + with open(logfile, 'rb') as f: + print(f.read()) + r = PyRecordReader_New(logfile) + r.read() + assert r.event_strs[0] == random_bytes + - if __name__ == '__main__': tf.test.main() From fa4d7a71327c75254683daa0d286051eedebd20c Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sat, 13 Apr 2019 00:39:29 +0800 Subject: [PATCH 24/32] remove tf dependency --- tensorboard/writer/event_file_writer_test.py | 23 ++++++++++++-------- tensorboard/writer/record_writer_test.py | 6 ++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tensorboard/writer/event_file_writer_test.py b/tensorboard/writer/event_file_writer_test.py index ed982adf30..203d525812 100644 --- a/tensorboard/writer/event_file_writer_test.py +++ b/tensorboard/writer/event_file_writer_test.py @@ -23,7 +23,7 @@ import glob import os import shutil -import tensorflow as tf +import unittest import tempfile import time from tensorboard.writer.event_file_writer import EventFileWriter @@ -31,10 +31,11 @@ from tensorboard.writer.event_file_writer import _AsyncWriterThread from tensorboard.compat.proto import event_pb2, summary_pb2 from tensorboard.compat.proto.summary_pb2 import Summary +from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New from google.protobuf import json_format -class EventFileWriterTest(tf.test.TestCase): +class EventFileWriterTest(unittest.TestCase): def __init__(self, *args, **kwargs): super(EventFileWriterTest, self).__init__(*args, **kwargs) @@ -48,12 +49,12 @@ def test_event_file_writer_roundtrip(self): w.add_event(fakeevent) w.close() event_files = sorted(glob.glob(os.path.join(logdir, '*'))) - print(event_files) self.assertEqual(len(event_files), 1) - events = list(tf.compat.v1.train.summary_iterator(event_files[0])) + r = PyRecordReader_New(event_files[0]) + r.read() + events = r.event_strs event_from_disk = events[1] - summary_from_disk = event_from_disk.summary - self.assertProtoEquals(summary.SerializeToString(), summary_from_disk.SerializeToString()) + assert fakeevent.SerializeToString() == event_from_disk def test_setting_filename_suffix_works(self): logdir = tempfile.mkdtemp() @@ -69,9 +70,13 @@ def test_async_writer_without_write(self): w = EventFileWriter(logdir) w.close() event_files = sorted(glob.glob(os.path.join(logdir, '*'))) - events = list(tf.compat.v1.train.summary_iterator(event_files[0])) + r = PyRecordReader_New(event_files[0]) + r.read() + events = r.event_strs assert len(events) == 1 - assert events[0].file_version == "brain.Event:2" + s = event_pb2.Event() + s.ParseFromString(events[0]) + assert s.file_version == "brain.Event:2" def test_async_writer_write_once(self): logfile = tempfile.NamedTemporaryFile().name @@ -176,4 +181,4 @@ def get_copy_by_OS(oldfilename): if __name__ == '__main__': - tf.test.main() + unittest.main() diff --git a/tensorboard/writer/record_writer_test.py b/tensorboard/writer/record_writer_test.py index 96ba155634..f7baac9488 100644 --- a/tensorboard/writer/record_writer_test.py +++ b/tensorboard/writer/record_writer_test.py @@ -22,7 +22,7 @@ import glob import os import tempfile -import tensorflow as tf +import unittest from tensorboard.writer.record_writer import RecordWriter from tensorboard.compat.proto import event_pb2, summary_pb2 from tensorboard.compat.proto.summary_pb2 import Summary @@ -30,7 +30,7 @@ from google.protobuf import json_format -class RecordWriterTest(tf.test.TestCase): +class RecordWriterTest(unittest.TestCase): def __init__(self, *args, **kwargs): super(RecordWriterTest, self).__init__(*args, **kwargs) @@ -70,4 +70,4 @@ def test_record_writer_roundtrip(self): if __name__ == '__main__': - tf.test.main() + unittest.main() From bafa04dd90a34434717619fa661d3a7eabbac23a Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sat, 13 Apr 2019 00:40:12 +0800 Subject: [PATCH 25/32] move to summary/writer --- tensorboard/BUILD | 2 +- tensorboard/{ => summary}/writer/BUILD | 2 -- tensorboard/{ => summary}/writer/__init__.py | 0 tensorboard/{ => summary}/writer/event_file_writer.py | 0 tensorboard/{ => summary}/writer/event_file_writer_test.py | 0 tensorboard/{ => summary}/writer/record_writer.py | 0 tensorboard/{ => summary}/writer/record_writer_test.py | 0 7 files changed, 1 insertion(+), 3 deletions(-) rename tensorboard/{ => summary}/writer/BUILD (90%) rename tensorboard/{ => summary}/writer/__init__.py (100%) rename tensorboard/{ => summary}/writer/event_file_writer.py (100%) rename tensorboard/{ => summary}/writer/event_file_writer_test.py (100%) rename tensorboard/{ => summary}/writer/record_writer.py (100%) rename tensorboard/{ => summary}/writer/record_writer_test.py (100%) diff --git a/tensorboard/BUILD b/tensorboard/BUILD index 4ec236ccf2..5979e2cac2 100644 --- a/tensorboard/BUILD +++ b/tensorboard/BUILD @@ -43,7 +43,7 @@ py_library( ":notebook", ":program", "//tensorboard/summary", - "//tensorboard/writer", + "//tensorboard/summary/writer", ], ) diff --git a/tensorboard/writer/BUILD b/tensorboard/summary/writer/BUILD similarity index 90% rename from tensorboard/writer/BUILD rename to tensorboard/summary/writer/BUILD index 9de13f0cd7..579a9127e6 100644 --- a/tensorboard/writer/BUILD +++ b/tensorboard/summary/writer/BUILD @@ -31,7 +31,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":writer", - "//tensorboard:expect_tensorflow_installed", ], ) @@ -43,6 +42,5 @@ py_test( srcs_version = "PY2AND3", deps = [ ":writer", - "//tensorboard:expect_tensorflow_installed", ], ) \ No newline at end of file diff --git a/tensorboard/writer/__init__.py b/tensorboard/summary/writer/__init__.py similarity index 100% rename from tensorboard/writer/__init__.py rename to tensorboard/summary/writer/__init__.py diff --git a/tensorboard/writer/event_file_writer.py b/tensorboard/summary/writer/event_file_writer.py similarity index 100% rename from tensorboard/writer/event_file_writer.py rename to tensorboard/summary/writer/event_file_writer.py diff --git a/tensorboard/writer/event_file_writer_test.py b/tensorboard/summary/writer/event_file_writer_test.py similarity index 100% rename from tensorboard/writer/event_file_writer_test.py rename to tensorboard/summary/writer/event_file_writer_test.py diff --git a/tensorboard/writer/record_writer.py b/tensorboard/summary/writer/record_writer.py similarity index 100% rename from tensorboard/writer/record_writer.py rename to tensorboard/summary/writer/record_writer.py diff --git a/tensorboard/writer/record_writer_test.py b/tensorboard/summary/writer/record_writer_test.py similarity index 100% rename from tensorboard/writer/record_writer_test.py rename to tensorboard/summary/writer/record_writer_test.py From 2551cbca74f796b89b1ad74d4e36088fa66b0863 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sat, 13 Apr 2019 01:43:51 +0800 Subject: [PATCH 26/32] fix test, prepare for tb_test --- tensorboard/summary/writer/BUILD | 2 ++ tensorboard/summary/writer/event_file_writer.py | 2 +- .../summary/writer/event_file_writer_test.py | 15 +++++++-------- tensorboard/summary/writer/record_writer_test.py | 11 +++++------ 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tensorboard/summary/writer/BUILD b/tensorboard/summary/writer/BUILD index 579a9127e6..aa376a9df4 100644 --- a/tensorboard/summary/writer/BUILD +++ b/tensorboard/summary/writer/BUILD @@ -31,6 +31,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":writer", + # "//tensorboard:test", ], ) @@ -42,5 +43,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":writer", + # "//tensorboard:test", ], ) \ No newline at end of file diff --git a/tensorboard/summary/writer/event_file_writer.py b/tensorboard/summary/writer/event_file_writer.py index b407138ad2..b1372ff2ac 100644 --- a/tensorboard/summary/writer/event_file_writer.py +++ b/tensorboard/summary/writer/event_file_writer.py @@ -26,7 +26,7 @@ import six from tensorboard.compat.proto import event_pb2 -from tensorboard.writer.record_writer import RecordWriter +from tensorboard.summary.writer.record_writer import RecordWriter class AtomicCounter(object): diff --git a/tensorboard/summary/writer/event_file_writer_test.py b/tensorboard/summary/writer/event_file_writer_test.py index 203d525812..916c15364b 100644 --- a/tensorboard/summary/writer/event_file_writer_test.py +++ b/tensorboard/summary/writer/event_file_writer_test.py @@ -23,19 +23,18 @@ import glob import os import shutil -import unittest import tempfile import time -from tensorboard.writer.event_file_writer import EventFileWriter -from tensorboard.writer.event_file_writer import _AsyncWriter -from tensorboard.writer.event_file_writer import _AsyncWriterThread +from tensorboard.summary.writer.event_file_writer import EventFileWriter +from tensorboard.summary.writer.event_file_writer import _AsyncWriter +from tensorboard.summary.writer.event_file_writer import _AsyncWriterThread from tensorboard.compat.proto import event_pb2, summary_pb2 from tensorboard.compat.proto.summary_pb2 import Summary from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New -from google.protobuf import json_format +# from tensorboard import test as tb_test +import unittest as tb_test - -class EventFileWriterTest(unittest.TestCase): +class EventFileWriterTest(tb_test.TestCase): def __init__(self, *args, **kwargs): super(EventFileWriterTest, self).__init__(*args, **kwargs) @@ -181,4 +180,4 @@ def get_copy_by_OS(oldfilename): if __name__ == '__main__': - unittest.main() + tb_test.main() diff --git a/tensorboard/summary/writer/record_writer_test.py b/tensorboard/summary/writer/record_writer_test.py index f7baac9488..16cbccba13 100644 --- a/tensorboard/summary/writer/record_writer_test.py +++ b/tensorboard/summary/writer/record_writer_test.py @@ -22,15 +22,14 @@ import glob import os import tempfile -import unittest -from tensorboard.writer.record_writer import RecordWriter +from tensorboard.summary.writer.record_writer import RecordWriter from tensorboard.compat.proto import event_pb2, summary_pb2 from tensorboard.compat.proto.summary_pb2 import Summary from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New -from google.protobuf import json_format +# from tensorboard import test as tb_test +import unittest as tb_test - -class RecordWriterTest(unittest.TestCase): +class RecordWriterTest(tb_test.TestCase): def __init__(self, *args, **kwargs): super(RecordWriterTest, self).__init__(*args, **kwargs) @@ -70,4 +69,4 @@ def test_record_writer_roundtrip(self): if __name__ == '__main__': - unittest.main() + tb_test.main() From 72b6d7d6b6e3a0a1f1465c4d876b8275d3963557 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sat, 13 Apr 2019 01:55:55 +0800 Subject: [PATCH 27/32] enable tb_test (expect failure on CI) --- tensorboard/summary/writer/BUILD | 4 ++-- tensorboard/summary/writer/event_file_writer_test.py | 3 +-- tensorboard/summary/writer/record_writer_test.py | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorboard/summary/writer/BUILD b/tensorboard/summary/writer/BUILD index aa376a9df4..42e408901b 100644 --- a/tensorboard/summary/writer/BUILD +++ b/tensorboard/summary/writer/BUILD @@ -31,7 +31,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":writer", - # "//tensorboard:test", + "//tensorboard:test", ], ) @@ -43,6 +43,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":writer", - # "//tensorboard:test", + "//tensorboard:test", ], ) \ No newline at end of file diff --git a/tensorboard/summary/writer/event_file_writer_test.py b/tensorboard/summary/writer/event_file_writer_test.py index 916c15364b..631111e8da 100644 --- a/tensorboard/summary/writer/event_file_writer_test.py +++ b/tensorboard/summary/writer/event_file_writer_test.py @@ -31,8 +31,7 @@ from tensorboard.compat.proto import event_pb2, summary_pb2 from tensorboard.compat.proto.summary_pb2 import Summary from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New -# from tensorboard import test as tb_test -import unittest as tb_test +from tensorboard import test as tb_test class EventFileWriterTest(tb_test.TestCase): def __init__(self, *args, **kwargs): diff --git a/tensorboard/summary/writer/record_writer_test.py b/tensorboard/summary/writer/record_writer_test.py index 16cbccba13..e11368fefd 100644 --- a/tensorboard/summary/writer/record_writer_test.py +++ b/tensorboard/summary/writer/record_writer_test.py @@ -26,8 +26,7 @@ from tensorboard.compat.proto import event_pb2, summary_pb2 from tensorboard.compat.proto.summary_pb2 import Summary from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New -# from tensorboard import test as tb_test -import unittest as tb_test +from tensorboard import test as tb_test class RecordWriterTest(tb_test.TestCase): def __init__(self, *args, **kwargs): From 149535a53c4fe55ee3e51b1369b4c6f05e6eaf9f Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sat, 20 Apr 2019 02:18:19 +0800 Subject: [PATCH 28/32] fix (1 of 3) --- .../summary/writer/event_file_writer.py | 65 +++++++++------ .../summary/writer/event_file_writer_test.py | 82 ++++++++++--------- tensorboard/summary/writer/record_writer.py | 6 +- .../summary/writer/record_writer_test.py | 50 ++++++----- 4 files changed, 109 insertions(+), 94 deletions(-) diff --git a/tensorboard/summary/writer/event_file_writer.py b/tensorboard/summary/writer/event_file_writer.py index b1372ff2ac..3326d7be88 100644 --- a/tensorboard/summary/writer/event_file_writer.py +++ b/tensorboard/summary/writer/event_file_writer.py @@ -45,12 +45,13 @@ def get(self): _global_uid = AtomicCounter(0) -class EventFileWriter(object): # Owned by FileWriter +class EventFileWriter(object): """Writes `Event` protocol buffers to an event file. The `EventFileWriter` class creates an event file in the specified directory, and asynchronously writes Event protocol buffers to the file. The Event file - is encoded using the tfrecord format, which is similar to RecordIO. + is encoded using the tfrecord format, which is similar to RecordIO. The instance is + usually created and managed by `torch.utils.tensorboard.FileWriter` in pytorch. """ def __init__(self, logdir, max_queue_size=10, flush_secs=120, filename_suffix=''): @@ -71,14 +72,14 @@ def __init__(self, logdir, max_queue_size=10, flush_secs=120, filename_suffix='' self._logdir = logdir if not os.path.exists(logdir): os.makedirs(logdir) - self._file_name = logdir + "/events.out.tfevents.%010d.%s.%s.%s" %\ - (time.time(), socket.gethostname(), os.getpid(), _global_uid.get()) +\ - filename_suffix + self._file_name = os.path.join(logdir, "events.out.tfevents.%010d.%s.%s.%s" % + (time.time(), socket.gethostname(), os.getpid(), _global_uid.get())) + filename_suffix self._async_writer = _AsyncWriter(RecordWriter(self._file_name), max_queue_size, flush_secs) # Initialize an event instance. _event = event_pb2.Event(wall_time=time.time(), file_version='brain.Event:2') self.add_event(_event) + self.flush() def get_logdir(self): """Returns the directory where event file will be written.""" @@ -112,42 +113,54 @@ def close(self): self._async_writer.close() -class _AsyncWriter(object): # _AsyncWriter - '''Writes bytes to an event file.''' +class _AsyncWriter(object): + '''Writes bytes to an file.''' + + def __init__(self, record_writer, max_queue_size=20, flush_secs=120, dummy_delay=False): + """Writes bytes to an file asynchronously. + An instance of this class holds a queue to keep the incoming data temporarily. + Data passed to the `write` function will be put to the queue and the function + returns immediately. This class also maintains a thread to write data in the + queue to disk. The first initialization paremeter is an instance of + `tensorboard.summary.record_writer` which computes the CRC checksum and then write + the combined result to the disk. So we use an async approach to improve performance. - def __init__(self, writerinstance, max_queue_size=20, flush_secs=120, dummy_delay=False): - """Writes bytes to an event file. Args: - writerinstance: A RecordWriter instance - max_queue_size: Integer. Size of the queue for pending events and summaries. + record_writer: A RecordWriter instance + max_queue_size: Integer. Size of the queue for pending bytestrings. flush_secs: Number. How often, in seconds, to flush the - pending events and summaries to disk. + pending bytestrings to disk. """ - self._writer = writerinstance + self._writer = record_writer + self._closed = False self._byte_queue = six.moves.queue.Queue(max_queue_size) self._worker = _AsyncWriterThread(self._byte_queue, self._writer, flush_secs, dummy_delay) self._lock = threading.Lock() self._worker.start() - def write(self, bytestream): - '''Append bytes to the queue.''' + def write(self, bytestring): + '''Enqueue the given bytes to be written asychronously''' + if self._closed: + raise IOError('Writer is closed') with self._lock: - self._byte_queue.put(bytestream) + self._byte_queue.put(bytestring) def flush(self): - '''Write all the enqueued bytestream before this flush call to disk. - Block until all the above bytestream are written. + '''Write all the enqueued bytestring before this flush call to disk. + Block until all the above bytestring are written. ''' with self._lock: - self._writer.flush() + self._byte_queue.join() def close(self): - '''Call self.flush().''' - self._worker.stop() - self.flush() - with self._lock: - self._writer.close() - assert self._writer.closed + '''Closes the underlying writer, flushing any pending writes first.''' + if not self._closed: + with self._lock: + if not self._closed: + self._closed = True + self._worker.stop() + self._writer.flush() + self._writer.close() class _AsyncWriterThread(threading.Thread): @@ -157,7 +170,7 @@ def __init__(self, queue, record_writer, flush_secs, dummy_delay): """Creates an _AsyncWriterThread. Args: queue: A Queue from which to dequeue data. - record_writer: An protobuf record_writer writer. + record_writer: An instance of record_writer writer. flush_secs: How often, in seconds, to flush the pending file to disk. """ diff --git a/tensorboard/summary/writer/event_file_writer_test.py b/tensorboard/summary/writer/event_file_writer_test.py index 631111e8da..2a3d225811 100644 --- a/tensorboard/summary/writer/event_file_writer_test.py +++ b/tensorboard/summary/writer/event_file_writer_test.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== -# """Integration tests for the Writer.""" +# """Tests for EventFileWriter and _AsyncWriter""" from __future__ import absolute_import from __future__ import division @@ -27,12 +27,12 @@ import time from tensorboard.summary.writer.event_file_writer import EventFileWriter from tensorboard.summary.writer.event_file_writer import _AsyncWriter -from tensorboard.summary.writer.event_file_writer import _AsyncWriterThread -from tensorboard.compat.proto import event_pb2, summary_pb2 +from tensorboard.compat.proto import event_pb2 from tensorboard.compat.proto.summary_pb2 import Summary from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New from tensorboard import test as tb_test + class EventFileWriterTest(tb_test.TestCase): def __init__(self, *args, **kwargs): super(EventFileWriterTest, self).__init__(*args, **kwargs) @@ -52,7 +52,7 @@ def test_event_file_writer_roundtrip(self): r.read() events = r.event_strs event_from_disk = events[1] - assert fakeevent.SerializeToString() == event_from_disk + self.assertEqual(fakeevent.SerializeToString(), event_from_disk) def test_setting_filename_suffix_works(self): logdir = tempfile.mkdtemp() @@ -60,7 +60,12 @@ def test_setting_filename_suffix_works(self): w = EventFileWriter(logdir, filename_suffix='.event_horizon') w.close() event_files = sorted(glob.glob(os.path.join(logdir, '*'))) - assert event_files[0].split('.')[-1] == 'event_horizon' + self.assertEqual(event_files[0].split('.')[-1], 'event_horizon') + + +class AsyncWriterTest(tb_test.TestCase): + def __init__(self, *args, **kwargs): + super(AsyncWriterTest, self).__init__(*args, **kwargs) def test_async_writer_without_write(self): for i in range(100): @@ -71,41 +76,41 @@ def test_async_writer_without_write(self): r = PyRecordReader_New(event_files[0]) r.read() events = r.event_strs - assert len(events) == 1 + self.assertEqual(len(events), 1) s = event_pb2.Event() s.ParseFromString(events[0]) - assert s.file_version == "brain.Event:2" + self.assertEqual(s.file_version, "brain.Event:2") def test_async_writer_write_once(self): - logfile = tempfile.NamedTemporaryFile().name - w = _AsyncWriter(open(logfile, 'wb')) + filename = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) w.write(random_bytes) w.close() - with open(logfile, 'rb') as f: - assert f.read() == random_bytes + with open(filename, 'rb') as f: + self.assertEqual(f.read(), random_bytes) def test_async_writer_write_queue_full(self): - logfile = tempfile.NamedTemporaryFile().name - w = _AsyncWriter(open(logfile, 'wb'), dummy_delay=True) + filename = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(filename, 'wb'), dummy_delay=True) random_bytes = bytearray(os.urandom(64)) repeat = 100 for i in range(repeat): w.write(random_bytes) w.close() - with open(logfile, 'rb') as f: - assert f.read() == random_bytes * repeat + with open(filename, 'rb') as f: + self.assertEqual(f.read(), random_bytes * repeat) def test_async_writer_write_one_slot_queue(self): - logfile = tempfile.NamedTemporaryFile().name - w = _AsyncWriter(open(logfile, 'wb'), max_queue_size=1, dummy_delay=True) + filename = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(filename, 'wb'), max_queue_size=1, dummy_delay=True) random_bytes = bytearray(os.urandom(64)) repeat = 10 # faster for i in range(repeat): w.write(random_bytes) w.close() - with open(logfile, 'rb') as f: - assert f.read() == random_bytes * repeat + with open(filename, 'rb') as f: + self.assertEqual(f.read(), random_bytes * repeat) # write ................................... # flush ---------^---------^---------^ @@ -116,60 +121,61 @@ def test_async_writer_write_one_slot_queue(self): # I set 0.9 here in case the CI is too slow. def test_async_writer_auto_flushing(self): - logfile = tempfile.NamedTemporaryFile().name + filename = tempfile.NamedTemporaryFile().name flush_timer = 1 tolerance = 0.9 # The undelying writer need time to complete. - w = _AsyncWriter(open(logfile, 'wb'), max_queue_size=500, flush_secs=flush_timer) + w = _AsyncWriter(open(filename, 'wb'), max_queue_size=500, flush_secs=flush_timer) random_bytes = bytearray(os.urandom(64)) repeat = 100 for i in range(repeat): w.write(random_bytes) time.sleep(0.1) if i % (flush_timer * 10) == 0: - with open(get_copy_by_OS(logfile), 'rb') as f: + with open(get_copy_by_OS(filename), 'rb') as f: nbytes = len(f.read()) # print(i, nbytes, i * len(random_bytes) * tolerance, nbytes / (1+i * len(random_bytes))) - assert nbytes >= i * len(random_bytes) * tolerance + self.assertGreaterEqual(nbytes, i * len(random_bytes) * tolerance) w.close() # make sure all data is written - with open(logfile, 'rb') as f: - assert f.read() == random_bytes * repeat + with open(filename, 'rb') as f: + self.assertEqual(f.read(), random_bytes * repeat) def test_async_writer_flush_before_flush_secs(self): # This test equals test_async_writer_write_once, # since flush() is implicitly called by close() and the default flush time is 120 secs. - logfile = tempfile.NamedTemporaryFile().name - w = _AsyncWriter(open(logfile, 'wb')) + filename = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) w.write(random_bytes) w.flush() # flush() is implicitly called by close() w.close() - with open(logfile, 'rb') as f: - assert f.read() == random_bytes + with open(filename, 'rb') as f: + self.assertEqual(f.read(), random_bytes) def test_async_writer_close_triggers_flush(self): # This test equals test_async_writer_write_once, # since flush() is implicitly called by close() and the default flush time is 120 secs. - logfile = tempfile.NamedTemporaryFile().name - w = _AsyncWriter(open(logfile, 'wb')) + filename = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) w.write(random_bytes) w.close() - with open(logfile, 'rb') as f: - assert f.read() == random_bytes + with open(filename, 'rb') as f: + self.assertEqual(f.read(), random_bytes) def test_write_after_async_writer_closed(self): - logfile = tempfile.NamedTemporaryFile().name - w = _AsyncWriter(open(logfile, 'wb')) + filename = tempfile.NamedTemporaryFile().name + w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) w.write(random_bytes) w.close() - w.write(random_bytes) + with self.assertRaises(IOError): + w.write(random_bytes) # nothing is written to the file after close - with open(logfile, 'rb') as f: - assert f.read() == random_bytes + with open(filename, 'rb') as f: + self.assertEqual(f.read(), random_bytes) def get_copy_by_OS(oldfilename): diff --git a/tensorboard/summary/writer/record_writer.py b/tensorboard/summary/writer/record_writer.py index e0f47aa955..3d78b3d721 100644 --- a/tensorboard/summary/writer/record_writer.py +++ b/tensorboard/summary/writer/record_writer.py @@ -19,13 +19,13 @@ class RecordWriter(object): """Write encoded protobuf to a file with packing defined in tensorflow""" - def __init__(self, logfile): + def __init__(self, filename): """Open a file to keep the tensorboard records. Args: - logfile: (string) The location where the file will be opened. + filename: (string) The location where the file will be opened. """ - self._writer = open(logfile, 'wb') + self._writer = open(filename, 'wb') # Format of a single record: (little-endian) # uint64 length diff --git a/tensorboard/summary/writer/record_writer_test.py b/tensorboard/summary/writer/record_writer_test.py index e11368fefd..2de2061845 100644 --- a/tensorboard/summary/writer/record_writer_test.py +++ b/tensorboard/summary/writer/record_writer_test.py @@ -13,58 +13,54 @@ # limitations under the License. # ============================================================================== -# """Integration tests for the Writer.""" +# """Tests for RecordWriter""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import glob import os -import tempfile from tensorboard.summary.writer.record_writer import RecordWriter -from tensorboard.compat.proto import event_pb2, summary_pb2 -from tensorboard.compat.proto.summary_pb2 import Summary from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New from tensorboard import test as tb_test + class RecordWriterTest(tb_test.TestCase): def __init__(self, *args, **kwargs): super(RecordWriterTest, self).__init__(*args, **kwargs) def test_expect_bytes_written(self): - logfile = tempfile.NamedTemporaryFile().name - w = RecordWriter(logfile) - random_bytes = bytearray(os.urandom(64)) + filename = os.path.join(self.get_temp_dir(), "recordtest") + byte_len = 64 + w = RecordWriter(filename) + random_bytes = bytearray(os.urandom(byte_len)) w.write(random_bytes) w.close() - with open(logfile, 'rb') as f: - assert len(f.read()) == (8 + 4 + 64 + 4) # uint64+uint32+data+uint32 - - # crc'ed file content of empty data - # b'\x00\x00\x00\x00\x00\x00\x00\x00)\x03\x98\x07\xd8\xea\x82\xa2' + with open(filename, 'rb') as f: + self.assertEqual(len(f.read()), (8 + 4 + byte_len + 4)) # uint64+uint32+data+uint32 def test_empty_record(self): - logfile = tempfile.NamedTemporaryFile().name - w = RecordWriter(logfile) - random_bytes = bytearray(os.urandom(0)) - w.write(random_bytes) + filename = os.path.join(self.get_temp_dir(), "recordtest") + w = RecordWriter(filename) + bytes_to_write = b"" + w.write(bytes_to_write) w.close() - r = PyRecordReader_New(logfile) + r = PyRecordReader_New(filename) r.read() - assert r.event_strs[0] == random_bytes + self.assertEqual(r.event_strs[0], bytes_to_write) def test_record_writer_roundtrip(self): - logfile = tempfile.NamedTemporaryFile().name - w = RecordWriter(logfile) - random_bytes = bytearray(os.urandom(64)) - w.write(random_bytes) + filename = os.path.join(self.get_temp_dir(), "recordtest") + w = RecordWriter(filename) + bytes_to_write = b"hello world" + for _ in range(50): + w.write(bytes_to_write) w.close() - with open(logfile, 'rb') as f: - print(f.read()) - r = PyRecordReader_New(logfile) + + r = PyRecordReader_New(filename) r.read() - assert r.event_strs[0] == random_bytes + for i in range(50): + self.assertEqual(r.event_strs[i], bytes_to_write) if __name__ == '__main__': From 6b3d377d51982a9131cd373c44036c05bc9c05c9 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sat, 20 Apr 2019 03:09:36 +0800 Subject: [PATCH 29/32] fix (2 of 3) --- .../summary/writer/event_file_writer.py | 3 +- .../summary/writer/event_file_writer_test.py | 44 +++++++++---------- .../summary/writer/record_writer_test.py | 11 ++--- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/tensorboard/summary/writer/event_file_writer.py b/tensorboard/summary/writer/event_file_writer.py index 3326d7be88..d716603bf2 100644 --- a/tensorboard/summary/writer/event_file_writer.py +++ b/tensorboard/summary/writer/event_file_writer.py @@ -109,7 +109,6 @@ def close(self): write/flush worker and closes the file. Call this method when you do not need the summary writer anymore. """ - self._async_writer.flush() self._async_writer.close() @@ -164,7 +163,7 @@ def close(self): class _AsyncWriterThread(threading.Thread): - """Thread that logs events.""" + """Thread that processes asynchronous writes for _AsyncWriter.""" def __init__(self, queue, record_writer, flush_secs, dummy_delay): """Creates an _AsyncWriterThread. diff --git a/tensorboard/summary/writer/event_file_writer_test.py b/tensorboard/summary/writer/event_file_writer_test.py index 2a3d225811..eedc05cb90 100644 --- a/tensorboard/summary/writer/event_file_writer_test.py +++ b/tensorboard/summary/writer/event_file_writer_test.py @@ -40,7 +40,7 @@ def __init__(self, *args, **kwargs): def test_event_file_writer_roundtrip(self): _TAGNAME = 'dummy' _DUMMY_VALUE = 42 - logdir = tempfile.mkdtemp() + logdir = self.get_temp_dir() w = EventFileWriter(logdir) summary = Summary(value=[Summary.Value(tag=_TAGNAME, simple_value=_DUMMY_VALUE)]) fakeevent = event_pb2.Event(summary=summary) @@ -55,7 +55,7 @@ def test_event_file_writer_roundtrip(self): self.assertEqual(fakeevent.SerializeToString(), event_from_disk) def test_setting_filename_suffix_works(self): - logdir = tempfile.mkdtemp() + logdir = self.get_temp_dir() w = EventFileWriter(logdir, filename_suffix='.event_horizon') w.close() @@ -69,7 +69,7 @@ def __init__(self, *args, **kwargs): def test_async_writer_without_write(self): for i in range(100): - logdir = tempfile.mkdtemp() + logdir = self.get_temp_dir() w = EventFileWriter(logdir) w.close() event_files = sorted(glob.glob(os.path.join(logdir, '*'))) @@ -82,46 +82,46 @@ def test_async_writer_without_write(self): self.assertEqual(s.file_version, "brain.Event:2") def test_async_writer_write_once(self): - filename = tempfile.NamedTemporaryFile().name + filename = os.path.join(self.get_temp_dir(), "async_writer_write_once") w = _AsyncWriter(open(filename, 'wb')) - random_bytes = bytearray(os.urandom(64)) - w.write(random_bytes) + bytes_to_write = b"hello world" + w.write(bytes_to_write) w.close() with open(filename, 'rb') as f: - self.assertEqual(f.read(), random_bytes) + self.assertEqual(f.read(), bytes_to_write) def test_async_writer_write_queue_full(self): - filename = tempfile.NamedTemporaryFile().name + filename = os.path.join(self.get_temp_dir(), "async_writer_write_queue_full") w = _AsyncWriter(open(filename, 'wb'), dummy_delay=True) - random_bytes = bytearray(os.urandom(64)) + bytes_to_write = b"hello world" repeat = 100 for i in range(repeat): - w.write(random_bytes) + w.write(bytes_to_write) w.close() with open(filename, 'rb') as f: - self.assertEqual(f.read(), random_bytes * repeat) + self.assertEqual(f.read(), bytes_to_write * repeat) def test_async_writer_write_one_slot_queue(self): - filename = tempfile.NamedTemporaryFile().name + filename = os.path.join(self.get_temp_dir(), "async_writer_write_one_slot_queue") w = _AsyncWriter(open(filename, 'wb'), max_queue_size=1, dummy_delay=True) - random_bytes = bytearray(os.urandom(64)) + bytes_to_write = b"hello world" repeat = 10 # faster for i in range(repeat): - w.write(random_bytes) + w.write(bytes_to_write) w.close() with open(filename, 'rb') as f: - self.assertEqual(f.read(), random_bytes * repeat) + self.assertEqual(f.read(), bytes_to_write * repeat) - # write ................................... - # flush ---------^---------^---------^ - # numelinQ 12345678901234567890 (expected, because the IO overhead) + # write ................................... + # flush ---------^---------^---------^ (^: flush -: idle) + # #obj in queue 12345678901234567890 (expected, because the IO overhead) # Make strict comparion for the flushing result is possible, but it requires accessing # the queue inside the async writer. So I write the test to simulate real write and flush. # In my experiment, the tolerance can be set as high to roughly to 0.95. # I set 0.9 here in case the CI is too slow. def test_async_writer_auto_flushing(self): - filename = tempfile.NamedTemporaryFile().name + filename = os.path.join(self.get_temp_dir(), "async_writer_auto_flushing") flush_timer = 1 tolerance = 0.9 # The undelying writer need time to complete. w = _AsyncWriter(open(filename, 'wb'), max_queue_size=500, flush_secs=flush_timer) @@ -144,7 +144,7 @@ def test_async_writer_auto_flushing(self): def test_async_writer_flush_before_flush_secs(self): # This test equals test_async_writer_write_once, # since flush() is implicitly called by close() and the default flush time is 120 secs. - filename = tempfile.NamedTemporaryFile().name + filename = os.path.join(self.get_temp_dir(), "async_writer_flush_before_flush_secs") w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) w.write(random_bytes) @@ -156,7 +156,7 @@ def test_async_writer_flush_before_flush_secs(self): def test_async_writer_close_triggers_flush(self): # This test equals test_async_writer_write_once, # since flush() is implicitly called by close() and the default flush time is 120 secs. - filename = tempfile.NamedTemporaryFile().name + filename = os.path.join(self.get_temp_dir(), "async_writer_close_triggers_flush") w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) w.write(random_bytes) @@ -165,7 +165,7 @@ def test_async_writer_close_triggers_flush(self): self.assertEqual(f.read(), random_bytes) def test_write_after_async_writer_closed(self): - filename = tempfile.NamedTemporaryFile().name + filename = os.path.join(self.get_temp_dir(), "write_after_async_writer_closed") w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) w.write(random_bytes) diff --git a/tensorboard/summary/writer/record_writer_test.py b/tensorboard/summary/writer/record_writer_test.py index 2de2061845..671bbf8afc 100644 --- a/tensorboard/summary/writer/record_writer_test.py +++ b/tensorboard/summary/writer/record_writer_test.py @@ -30,7 +30,7 @@ def __init__(self, *args, **kwargs): super(RecordWriterTest, self).__init__(*args, **kwargs) def test_expect_bytes_written(self): - filename = os.path.join(self.get_temp_dir(), "recordtest") + filename = os.path.join(self.get_temp_dir(), "expect_bytes_written") byte_len = 64 w = RecordWriter(filename) random_bytes = bytearray(os.urandom(byte_len)) @@ -40,7 +40,7 @@ def test_expect_bytes_written(self): self.assertEqual(len(f.read()), (8 + 4 + byte_len + 4)) # uint64+uint32+data+uint32 def test_empty_record(self): - filename = os.path.join(self.get_temp_dir(), "recordtest") + filename = os.path.join(self.get_temp_dir(), "empty_record") w = RecordWriter(filename) bytes_to_write = b"" w.write(bytes_to_write) @@ -50,16 +50,17 @@ def test_empty_record(self): self.assertEqual(r.event_strs[0], bytes_to_write) def test_record_writer_roundtrip(self): - filename = os.path.join(self.get_temp_dir(), "recordtest") + filename = os.path.join(self.get_temp_dir(), "record_writer_roundtrip") w = RecordWriter(filename) bytes_to_write = b"hello world" - for _ in range(50): + times_to_test = 50 + for _ in range(times_to_test): w.write(bytes_to_write) w.close() r = PyRecordReader_New(filename) r.read() - for i in range(50): + for i in range(times_to_test): self.assertEqual(r.event_strs[i], bytes_to_write) From bcbb1ac59b23d3ee4a3ac5264ba5f344ae08c7f8 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sat, 20 Apr 2019 19:19:42 +0800 Subject: [PATCH 30/32] fix (3 of 3) --- .../summary/writer/event_file_writer.py | 4 ++- .../summary/writer/event_file_writer_test.py | 35 +++++++++++-------- tensorboard/summary/writer/record_writer.py | 4 +-- .../summary/writer/record_writer_test.py | 23 ++++++++---- 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/tensorboard/summary/writer/event_file_writer.py b/tensorboard/summary/writer/event_file_writer.py index d716603bf2..7ba5b60542 100644 --- a/tensorboard/summary/writer/event_file_writer.py +++ b/tensorboard/summary/writer/event_file_writer.py @@ -74,7 +74,8 @@ def __init__(self, logdir, max_queue_size=10, flush_secs=120, filename_suffix='' os.makedirs(logdir) self._file_name = os.path.join(logdir, "events.out.tfevents.%010d.%s.%s.%s" % (time.time(), socket.gethostname(), os.getpid(), _global_uid.get())) + filename_suffix - self._async_writer = _AsyncWriter(RecordWriter(self._file_name), max_queue_size, flush_secs) + self._general_file_writer = open(self._file_name, 'wb') + self._async_writer = _AsyncWriter(RecordWriter(self._general_file_writer), max_queue_size, flush_secs) # Initialize an event instance. _event = event_pb2.Event(wall_time=time.time(), file_version='brain.Event:2') @@ -150,6 +151,7 @@ def flush(self): ''' with self._lock: self._byte_queue.join() + self._writer.flush() def close(self): '''Closes the underlying writer, flushing any pending writes first.''' diff --git a/tensorboard/summary/writer/event_file_writer_test.py b/tensorboard/summary/writer/event_file_writer_test.py index eedc05cb90..271ee664e3 100644 --- a/tensorboard/summary/writer/event_file_writer_test.py +++ b/tensorboard/summary/writer/event_file_writer_test.py @@ -49,10 +49,9 @@ def test_event_file_writer_roundtrip(self): event_files = sorted(glob.glob(os.path.join(logdir, '*'))) self.assertEqual(len(event_files), 1) r = PyRecordReader_New(event_files[0]) - r.read() - events = r.event_strs - event_from_disk = events[1] - self.assertEqual(fakeevent.SerializeToString(), event_from_disk) + r.GetNext() # meta data, so skip + r.GetNext() + self.assertEqual(fakeevent.SerializeToString(), r.record()) def test_setting_filename_suffix_works(self): logdir = self.get_temp_dir() @@ -74,11 +73,10 @@ def test_async_writer_without_write(self): w.close() event_files = sorted(glob.glob(os.path.join(logdir, '*'))) r = PyRecordReader_New(event_files[0]) - r.read() - events = r.event_strs - self.assertEqual(len(events), 1) + r.GetNext() + event = r.record() s = event_pb2.Event() - s.ParseFromString(events[0]) + s.ParseFromString(event) self.assertEqual(s.file_version, "brain.Event:2") def test_async_writer_write_once(self): @@ -92,7 +90,7 @@ def test_async_writer_write_once(self): def test_async_writer_write_queue_full(self): filename = os.path.join(self.get_temp_dir(), "async_writer_write_queue_full") - w = _AsyncWriter(open(filename, 'wb'), dummy_delay=True) + w = _AsyncWriter(open(filename, 'wb'), dummy_delay=False) bytes_to_write = b"hello world" repeat = 100 for i in range(repeat): @@ -103,7 +101,7 @@ def test_async_writer_write_queue_full(self): def test_async_writer_write_one_slot_queue(self): filename = os.path.join(self.get_temp_dir(), "async_writer_write_one_slot_queue") - w = _AsyncWriter(open(filename, 'wb'), max_queue_size=1, dummy_delay=True) + w = _AsyncWriter(open(filename, 'wb'), max_queue_size=1, dummy_delay=False) bytes_to_write = b"hello world" repeat = 10 # faster for i in range(repeat): @@ -142,20 +140,27 @@ def test_async_writer_auto_flushing(self): self.assertEqual(f.read(), random_bytes * repeat) def test_async_writer_flush_before_flush_secs(self): - # This test equals test_async_writer_write_once, - # since flush() is implicitly called by close() and the default flush time is 120 secs. filename = os.path.join(self.get_temp_dir(), "async_writer_flush_before_flush_secs") w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) w.write(random_bytes) w.flush() # flush() is implicitly called by close() - w.close() with open(filename, 'rb') as f: self.assertEqual(f.read(), random_bytes) + w.write(random_bytes) + with open(filename, 'rb') as f: # without flush, the file content should be the same. + self.assertEqual(f.read(), random_bytes) + w.flush() + with open(filename, 'rb') as f: # after flush, the file content will be updated. + self.assertEqual(f.read(), random_bytes + random_bytes) + w.write(random_bytes) + with open(filename, 'rb') as f: # without flush, the file content should be the same. + self.assertEqual(f.read(), random_bytes + random_bytes) + w.close() + with open(filename, 'rb') as f: # after close, new contents flushes implicitly. + self.assertEqual(f.read(), random_bytes + random_bytes + random_bytes) def test_async_writer_close_triggers_flush(self): - # This test equals test_async_writer_write_once, - # since flush() is implicitly called by close() and the default flush time is 120 secs. filename = os.path.join(self.get_temp_dir(), "async_writer_close_triggers_flush") w = _AsyncWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(64)) diff --git a/tensorboard/summary/writer/record_writer.py b/tensorboard/summary/writer/record_writer.py index 3d78b3d721..cd2c2a32e2 100644 --- a/tensorboard/summary/writer/record_writer.py +++ b/tensorboard/summary/writer/record_writer.py @@ -19,13 +19,13 @@ class RecordWriter(object): """Write encoded protobuf to a file with packing defined in tensorflow""" - def __init__(self, filename): + def __init__(self, general_writer): """Open a file to keep the tensorboard records. Args: filename: (string) The location where the file will be opened. """ - self._writer = open(filename, 'wb') + self._writer = general_writer # Format of a single record: (little-endian) # uint64 length diff --git a/tensorboard/summary/writer/record_writer_test.py b/tensorboard/summary/writer/record_writer_test.py index 671bbf8afc..5e388a16e3 100644 --- a/tensorboard/summary/writer/record_writer_test.py +++ b/tensorboard/summary/writer/record_writer_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function +import six import os from tensorboard.summary.writer.record_writer import RecordWriter from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New @@ -32,7 +33,7 @@ def __init__(self, *args, **kwargs): def test_expect_bytes_written(self): filename = os.path.join(self.get_temp_dir(), "expect_bytes_written") byte_len = 64 - w = RecordWriter(filename) + w = RecordWriter(open(filename, 'wb')) random_bytes = bytearray(os.urandom(byte_len)) w.write(random_bytes) w.close() @@ -41,17 +42,17 @@ def test_expect_bytes_written(self): def test_empty_record(self): filename = os.path.join(self.get_temp_dir(), "empty_record") - w = RecordWriter(filename) + w = RecordWriter(open(filename, 'wb')) bytes_to_write = b"" w.write(bytes_to_write) w.close() r = PyRecordReader_New(filename) - r.read() - self.assertEqual(r.event_strs[0], bytes_to_write) + r.GetNext() + self.assertEqual(r.record(), bytes_to_write) def test_record_writer_roundtrip(self): filename = os.path.join(self.get_temp_dir(), "record_writer_roundtrip") - w = RecordWriter(filename) + w = RecordWriter(open(filename, 'wb')) bytes_to_write = b"hello world" times_to_test = 50 for _ in range(times_to_test): @@ -59,9 +60,17 @@ def test_record_writer_roundtrip(self): w.close() r = PyRecordReader_New(filename) - r.read() for i in range(times_to_test): - self.assertEqual(r.event_strs[i], bytes_to_write) + r.GetNext() + self.assertEqual(r.record(), bytes_to_write) + + def test_expect_bytes_written_bytes_IO(self): + byte_len = 64 + Bytes_io = six.BytesIO() + w = RecordWriter(Bytes_io) + random_bytes = bytearray(os.urandom(byte_len)) + w.write(random_bytes) + self.assertEqual(len(Bytes_io.getvalue()), (8 + 4 + byte_len + 4)) # uint64+uint32+data+uint32 if __name__ == '__main__': From 0c732fcc527825b520cf8f4112b2536c4082b834 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Sun, 21 Apr 2019 01:35:58 +0800 Subject: [PATCH 31/32] remove dummy_delay --- tensorboard/summary/writer/event_file_writer.py | 9 +++------ tensorboard/summary/writer/event_file_writer_test.py | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tensorboard/summary/writer/event_file_writer.py b/tensorboard/summary/writer/event_file_writer.py index 7ba5b60542..36d5503ae2 100644 --- a/tensorboard/summary/writer/event_file_writer.py +++ b/tensorboard/summary/writer/event_file_writer.py @@ -116,7 +116,7 @@ def close(self): class _AsyncWriter(object): '''Writes bytes to an file.''' - def __init__(self, record_writer, max_queue_size=20, flush_secs=120, dummy_delay=False): + def __init__(self, record_writer, max_queue_size=20, flush_secs=120): """Writes bytes to an file asynchronously. An instance of this class holds a queue to keep the incoming data temporarily. Data passed to the `write` function will be put to the queue and the function @@ -134,7 +134,7 @@ def __init__(self, record_writer, max_queue_size=20, flush_secs=120, dummy_delay self._writer = record_writer self._closed = False self._byte_queue = six.moves.queue.Queue(max_queue_size) - self._worker = _AsyncWriterThread(self._byte_queue, self._writer, flush_secs, dummy_delay) + self._worker = _AsyncWriterThread(self._byte_queue, self._writer, flush_secs) self._lock = threading.Lock() self._worker.start() @@ -167,7 +167,7 @@ def close(self): class _AsyncWriterThread(threading.Thread): """Thread that processes asynchronous writes for _AsyncWriter.""" - def __init__(self, queue, record_writer, flush_secs, dummy_delay): + def __init__(self, queue, record_writer, flush_secs): """Creates an _AsyncWriterThread. Args: queue: A Queue from which to dequeue data. @@ -184,7 +184,6 @@ def __init__(self, queue, record_writer, flush_secs, dummy_delay): self._next_flush_time = 0 self._has_pending_data = False self._shutdown_signal = object() - self._dummy_delay = dummy_delay def stop(self): self._queue.put(self._shutdown_signal) @@ -208,8 +207,6 @@ def run(self): if data is self._shutdown_signal: return self._record_writer.write(data) - if self._dummy_delay: - time.sleep(0.1) self._has_pending_data = True except six.moves.queue.Empty: pass diff --git a/tensorboard/summary/writer/event_file_writer_test.py b/tensorboard/summary/writer/event_file_writer_test.py index 271ee664e3..543dbdd271 100644 --- a/tensorboard/summary/writer/event_file_writer_test.py +++ b/tensorboard/summary/writer/event_file_writer_test.py @@ -90,7 +90,7 @@ def test_async_writer_write_once(self): def test_async_writer_write_queue_full(self): filename = os.path.join(self.get_temp_dir(), "async_writer_write_queue_full") - w = _AsyncWriter(open(filename, 'wb'), dummy_delay=False) + w = _AsyncWriter(open(filename, 'wb')) bytes_to_write = b"hello world" repeat = 100 for i in range(repeat): @@ -101,7 +101,7 @@ def test_async_writer_write_queue_full(self): def test_async_writer_write_one_slot_queue(self): filename = os.path.join(self.get_temp_dir(), "async_writer_write_one_slot_queue") - w = _AsyncWriter(open(filename, 'wb'), max_queue_size=1, dummy_delay=False) + w = _AsyncWriter(open(filename, 'wb'), max_queue_size=1) bytes_to_write = b"hello world" repeat = 10 # faster for i in range(repeat): From 6fde811d35f2388d7a6080d58d7c68d5924f94b8 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Huang Date: Tue, 23 Apr 2019 20:38:01 +0800 Subject: [PATCH 32/32] addressing comments on apr23 --- .../summary/writer/event_file_writer.py | 17 +-- .../summary/writer/event_file_writer_test.py | 100 +++--------------- tensorboard/summary/writer/record_writer.py | 6 +- .../summary/writer/record_writer_test.py | 10 +- 4 files changed, 33 insertions(+), 100 deletions(-) diff --git a/tensorboard/summary/writer/event_file_writer.py b/tensorboard/summary/writer/event_file_writer.py index 36d5503ae2..dd87a23ca0 100644 --- a/tensorboard/summary/writer/event_file_writer.py +++ b/tensorboard/summary/writer/event_file_writer.py @@ -50,8 +50,7 @@ class EventFileWriter(object): The `EventFileWriter` class creates an event file in the specified directory, and asynchronously writes Event protocol buffers to the file. The Event file - is encoded using the tfrecord format, which is similar to RecordIO. The instance is - usually created and managed by `torch.utils.tensorboard.FileWriter` in pytorch. + is encoded using the tfrecord format, which is similar to RecordIO. """ def __init__(self, logdir, max_queue_size=10, flush_secs=120, filename_suffix=''): @@ -73,7 +72,7 @@ def __init__(self, logdir, max_queue_size=10, flush_secs=120, filename_suffix='' if not os.path.exists(logdir): os.makedirs(logdir) self._file_name = os.path.join(logdir, "events.out.tfevents.%010d.%s.%s.%s" % - (time.time(), socket.gethostname(), os.getpid(), _global_uid.get())) + filename_suffix + (time.time(), socket.gethostname(), os.getpid(), _global_uid.get())) + filename_suffix # noqa E128 self._general_file_writer = open(self._file_name, 'wb') self._async_writer = _AsyncWriter(RecordWriter(self._general_file_writer), max_queue_size, flush_secs) @@ -114,14 +113,14 @@ def close(self): class _AsyncWriter(object): - '''Writes bytes to an file.''' + '''Writes bytes to a file.''' def __init__(self, record_writer, max_queue_size=20, flush_secs=120): - """Writes bytes to an file asynchronously. + """Writes bytes to a file asynchronously. An instance of this class holds a queue to keep the incoming data temporarily. Data passed to the `write` function will be put to the queue and the function returns immediately. This class also maintains a thread to write data in the - queue to disk. The first initialization paremeter is an instance of + queue to disk. The first initialization parameter is an instance of `tensorboard.summary.record_writer` which computes the CRC checksum and then write the combined result to the disk. So we use an async approach to improve performance. @@ -140,9 +139,9 @@ def __init__(self, record_writer, max_queue_size=20, flush_secs=120): def write(self, bytestring): '''Enqueue the given bytes to be written asychronously''' - if self._closed: - raise IOError('Writer is closed') with self._lock: + if self._closed: + raise IOError('Writer is closed') self._byte_queue.put(bytestring) def flush(self): @@ -150,6 +149,8 @@ def flush(self): Block until all the above bytestring are written. ''' with self._lock: + if self._closed: + raise IOError('Writer is closed') self._byte_queue.join() self._writer.flush() diff --git a/tensorboard/summary/writer/event_file_writer_test.py b/tensorboard/summary/writer/event_file_writer_test.py index 543dbdd271..acbef412b8 100644 --- a/tensorboard/summary/writer/event_file_writer_test.py +++ b/tensorboard/summary/writer/event_file_writer_test.py @@ -22,9 +22,6 @@ import glob import os -import shutil -import tempfile -import time from tensorboard.summary.writer.event_file_writer import EventFileWriter from tensorboard.summary.writer.event_file_writer import _AsyncWriter from tensorboard.compat.proto import event_pb2 @@ -34,8 +31,6 @@ class EventFileWriterTest(tb_test.TestCase): - def __init__(self, *args, **kwargs): - super(EventFileWriterTest, self).__init__(*args, **kwargs) def test_event_file_writer_roundtrip(self): _TAGNAME = 'dummy' @@ -61,23 +56,18 @@ def test_setting_filename_suffix_works(self): event_files = sorted(glob.glob(os.path.join(logdir, '*'))) self.assertEqual(event_files[0].split('.')[-1], 'event_horizon') + def test_async_writer_without_write(self): + logdir = self.get_temp_dir() + w = EventFileWriter(logdir) + w.close() + event_files = sorted(glob.glob(os.path.join(logdir, '*'))) + r = PyRecordReader_New(event_files[0]) + r.GetNext() + s = event_pb2.Event.FromString(r.record()) + self.assertEqual(s.file_version, "brain.Event:2") -class AsyncWriterTest(tb_test.TestCase): - def __init__(self, *args, **kwargs): - super(AsyncWriterTest, self).__init__(*args, **kwargs) - def test_async_writer_without_write(self): - for i in range(100): - logdir = self.get_temp_dir() - w = EventFileWriter(logdir) - w.close() - event_files = sorted(glob.glob(os.path.join(logdir, '*'))) - r = PyRecordReader_New(event_files[0]) - r.GetNext() - event = r.record() - s = event_pb2.Event() - s.ParseFromString(event) - self.assertEqual(s.file_version, "brain.Event:2") +class AsyncWriterTest(tb_test.TestCase): def test_async_writer_write_once(self): filename = os.path.join(self.get_temp_dir(), "async_writer_write_once") @@ -110,83 +100,27 @@ def test_async_writer_write_one_slot_queue(self): with open(filename, 'rb') as f: self.assertEqual(f.read(), bytes_to_write * repeat) - # write ................................... - # flush ---------^---------^---------^ (^: flush -: idle) - # #obj in queue 12345678901234567890 (expected, because the IO overhead) - # Make strict comparion for the flushing result is possible, but it requires accessing - # the queue inside the async writer. So I write the test to simulate real write and flush. - # In my experiment, the tolerance can be set as high to roughly to 0.95. - # I set 0.9 here in case the CI is too slow. - - def test_async_writer_auto_flushing(self): - filename = os.path.join(self.get_temp_dir(), "async_writer_auto_flushing") - flush_timer = 1 - tolerance = 0.9 # The undelying writer need time to complete. - w = _AsyncWriter(open(filename, 'wb'), max_queue_size=500, flush_secs=flush_timer) - random_bytes = bytearray(os.urandom(64)) - repeat = 100 - for i in range(repeat): - w.write(random_bytes) - time.sleep(0.1) - if i % (flush_timer * 10) == 0: - with open(get_copy_by_OS(filename), 'rb') as f: - nbytes = len(f.read()) - # print(i, nbytes, i * len(random_bytes) * tolerance, nbytes / (1+i * len(random_bytes))) - self.assertGreaterEqual(nbytes, i * len(random_bytes) * tolerance) - w.close() - - # make sure all data is written - with open(filename, 'rb') as f: - self.assertEqual(f.read(), random_bytes * repeat) - - def test_async_writer_flush_before_flush_secs(self): - filename = os.path.join(self.get_temp_dir(), "async_writer_flush_before_flush_secs") - w = _AsyncWriter(open(filename, 'wb')) - random_bytes = bytearray(os.urandom(64)) - w.write(random_bytes) - w.flush() # flush() is implicitly called by close() - with open(filename, 'rb') as f: - self.assertEqual(f.read(), random_bytes) - w.write(random_bytes) - with open(filename, 'rb') as f: # without flush, the file content should be the same. - self.assertEqual(f.read(), random_bytes) - w.flush() - with open(filename, 'rb') as f: # after flush, the file content will be updated. - self.assertEqual(f.read(), random_bytes + random_bytes) - w.write(random_bytes) - with open(filename, 'rb') as f: # without flush, the file content should be the same. - self.assertEqual(f.read(), random_bytes + random_bytes) - w.close() - with open(filename, 'rb') as f: # after close, new contents flushes implicitly. - self.assertEqual(f.read(), random_bytes + random_bytes + random_bytes) - def test_async_writer_close_triggers_flush(self): filename = os.path.join(self.get_temp_dir(), "async_writer_close_triggers_flush") w = _AsyncWriter(open(filename, 'wb')) - random_bytes = bytearray(os.urandom(64)) - w.write(random_bytes) + bytes_to_write = b"x" * 64 + w.write(bytes_to_write) w.close() with open(filename, 'rb') as f: - self.assertEqual(f.read(), random_bytes) + self.assertEqual(f.read(), bytes_to_write) def test_write_after_async_writer_closed(self): filename = os.path.join(self.get_temp_dir(), "write_after_async_writer_closed") w = _AsyncWriter(open(filename, 'wb')) - random_bytes = bytearray(os.urandom(64)) - w.write(random_bytes) + bytes_to_write = b"x" * 64 + w.write(bytes_to_write) w.close() with self.assertRaises(IOError): - w.write(random_bytes) + w.write(bytes_to_write) # nothing is written to the file after close with open(filename, 'rb') as f: - self.assertEqual(f.read(), random_bytes) - - -def get_copy_by_OS(oldfilename): - newfilename = tempfile.NamedTemporaryFile().name - shutil.copy(oldfilename, newfilename) - return newfilename + self.assertEqual(f.read(), bytes_to_write) if __name__ == '__main__': diff --git a/tensorboard/summary/writer/record_writer.py b/tensorboard/summary/writer/record_writer.py index cd2c2a32e2..a49a6b14d1 100644 --- a/tensorboard/summary/writer/record_writer.py +++ b/tensorboard/summary/writer/record_writer.py @@ -19,13 +19,13 @@ class RecordWriter(object): """Write encoded protobuf to a file with packing defined in tensorflow""" - def __init__(self, general_writer): + def __init__(self, writer): """Open a file to keep the tensorboard records. Args: - filename: (string) The location where the file will be opened. + writer: A file-like object that implements `write`, `flush` and `close`. """ - self._writer = general_writer + self._writer = writer # Format of a single record: (little-endian) # uint64 length diff --git a/tensorboard/summary/writer/record_writer_test.py b/tensorboard/summary/writer/record_writer_test.py index 5e388a16e3..e0af94dc8e 100644 --- a/tensorboard/summary/writer/record_writer_test.py +++ b/tensorboard/summary/writer/record_writer_test.py @@ -27,15 +27,13 @@ class RecordWriterTest(tb_test.TestCase): - def __init__(self, *args, **kwargs): - super(RecordWriterTest, self).__init__(*args, **kwargs) def test_expect_bytes_written(self): filename = os.path.join(self.get_temp_dir(), "expect_bytes_written") byte_len = 64 w = RecordWriter(open(filename, 'wb')) - random_bytes = bytearray(os.urandom(byte_len)) - w.write(random_bytes) + bytes_to_write = b"x" * byte_len + w.write(bytes_to_write) w.close() with open(filename, 'rb') as f: self.assertEqual(len(f.read()), (8 + 4 + byte_len + 4)) # uint64+uint32+data+uint32 @@ -68,8 +66,8 @@ def test_expect_bytes_written_bytes_IO(self): byte_len = 64 Bytes_io = six.BytesIO() w = RecordWriter(Bytes_io) - random_bytes = bytearray(os.urandom(byte_len)) - w.write(random_bytes) + bytes_to_write = b"x" * byte_len + w.write(bytes_to_write) self.assertEqual(len(Bytes_io.getvalue()), (8 + 4 + byte_len + 4)) # uint64+uint32+data+uint32