In [1]:
import os
import tensorflow as tf

In [2]:
DATA_DIR = "./tfrecord_data"
os.makedirs(DATA_DIR, exist_ok=True)

## Write / read single TFRecord

In [3]:
file_path = os.path.join(DATA_DIR, "single.tfrecord")
with tf.io.TFRecordWriter(file_path) as f:
    f.write(b"This is the first record")
    f.write(b"And this is the second record")

In [4]:
dataset = tf.data.TFRecordDataset([file_path])
for item in dataset:
    print(item)

tf.Tensor(b'This is the first record', shape=(), dtype=string)
tf.Tensor(b'And this is the second record', shape=(), dtype=string)


## Read multiple TFRecord

In [5]:
file_paths = [os.path.join(DATA_DIR, f"multiple_{i}.tfrecord") for i in range(5)]
for i, file_path in enumerate(file_paths):
    with tf.io.TFRecordWriter(file_path) as f:
        for j in range(3):
            f.write("File {} record {}".format(i, j).encode("utf-8"))

In [6]:
dataset = tf.data.TFRecordDataset(file_paths, num_parallel_reads=3)
for item in dataset:
    print(item)

tf.Tensor(b'File 0 record 0', shape=(), dtype=string)
tf.Tensor(b'File 1 record 0', shape=(), dtype=string)
tf.Tensor(b'File 2 record 0', shape=(), dtype=string)
tf.Tensor(b'File 0 record 1', shape=(), dtype=string)
tf.Tensor(b'File 1 record 1', shape=(), dtype=string)
tf.Tensor(b'File 2 record 1', shape=(), dtype=string)
tf.Tensor(b'File 0 record 2', shape=(), dtype=string)
tf.Tensor(b'File 1 record 2', shape=(), dtype=string)
tf.Tensor(b'File 2 record 2', shape=(), dtype=string)
tf.Tensor(b'File 3 record 0', shape=(), dtype=string)
tf.Tensor(b'File 4 record 0', shape=(), dtype=string)
tf.Tensor(b'File 3 record 1', shape=(), dtype=string)
tf.Tensor(b'File 4 record 1', shape=(), dtype=string)
tf.Tensor(b'File 3 record 2', shape=(), dtype=string)
tf.Tensor(b'File 4 record 2', shape=(), dtype=string)


## Compression

In [7]:
compressed_file_path = os.path.join(DATA_DIR, "compressed.tfrecord")
options = tf.io.TFRecordOptions(compression_type="GZIP")
with tf.io.TFRecordWriter(compressed_file_path, options) as f:
    f.write(b"This is the first record")
    f.write(b"And this is the second record")

In [8]:
dataset = tf.data.TFRecordDataset([compressed_file_path], compression_type="GZIP")
for item in dataset:
    print(item)

tf.Tensor(b'This is the first record', shape=(), dtype=string)
tf.Tensor(b'And this is the second record', shape=(), dtype=string)
