# Optimize using prefetch

In [1]:
import tensorflow as tf 
import time

In [2]:
tf.__version__

'2.19.0'

In [3]:
class FileDataset(tf.data.Dataset):
    def read_file_in_batches(num_samples):

        # open file 
        time.sleep(0.03)

        # reading file 
        for sample_idx in range(num_samples):
            time.sleep(0.015)
            yield(sample_idx, )

    # whenever new object is created - call this method 
    def __new__(cls, num_samples=3):
        # print ("new called")
        return tf.data.Dataset.from_generator(
            cls.read_file_in_batches, 
            output_signature=tf.TensorSpec(shape=(1, ), dtype=tf.int64),
            args=(num_samples, )
        )

In [4]:
def benchmark(dataset, num_epochs=2):
    for epoch in range(num_epochs):
        for sample in dataset:
            time.sleep(0.01)

In [5]:
%%timeit 
benchmark(FileDataset())

2025-06-24 13:39:24.296920: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-06-24 13:39:24.296967: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-06-24 13:39:24.296976: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
I0000 00:00:1750752564.297038   95659 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1750752564.297109   95659 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-06-24 13:39:24.460182: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-06-24 13:39:24.573222: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting

253 ms ± 7.68 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


2025-06-24 13:39:26.348292: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [6]:
%%timeit 
benchmark(FileDataset().prefetch(1))

252 ms ± 3.43 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


2025-06-24 13:39:30.589049: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [7]:
%%timeit 
benchmark(FileDataset().prefetch(tf.data.AUTOTUNE))

257 ms ± 5.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
