From 9e573de709caff6ffdc514e76353852ff6ffe9f6 Mon Sep 17 00:00:00 2001 From: Matt Watson Date: Wed, 4 May 2022 23:08:01 -0700 Subject: [PATCH] Simplified utility to preview a tfrecord One liner usage: `python -c "from keras_nlp.utils.tensor_utils import preview_tfrecord; preview_tfrecord('~/test.tfrecord')"` --- keras_nlp/utils/tensor_utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/keras_nlp/utils/tensor_utils.py b/keras_nlp/utils/tensor_utils.py index 26fc815f11..4f00b2b14c 100644 --- a/keras_nlp/utils/tensor_utils.py +++ b/keras_nlp/utils/tensor_utils.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + import tensorflow as tf +from google import protobuf def _decode_strings_to_utf8(inputs): @@ -45,3 +48,14 @@ def tensor_to_string_list(inputs): if inputs.shape.rank != 0: list_outputs = list_outputs.tolist() return _decode_strings_to_utf8(list_outputs) + + +def preview_tfrecord(filepath): + """Pretty prints a single record from a tfrecord file.""" + dataset = tf.data.TFRecordDataset(os.path.expanduser(filepath)) + example = tf.train.Example() + example.ParseFromString(next(iter(dataset)).numpy()) + formatted = protobuf.text_format.MessageToString( + example, use_short_repeated_primitives=True + ) + print(formatted)