In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Optimize tensorflow pipeline performance with prefetch and caching

In [3]:
import tensorflow as tf
import time

import warnings
warnings.filterwarnings('ignore')

## Prefetch

In [5]:
class FileDataset(tf.data.Dataset):
    def read_file_in_batches(num_samples):
        # Opening the file
        time.sleep(0.03)

        for sample_idx in range(num_samples):
            # Reading data (line, record) from the file
            time.sleep(0.015)

            yield (sample_idx,)

    def __new__(cls, num_samples=3):
        return tf.data.Dataset.from_generator(
            cls.read_file_in_batches,
            output_signature = tf.TensorSpec(shape = (1,), dtype = tf.int64),
            args=(num_samples,)
        )

In [6]:
def benchmark(dataset, num_epochs=2):
    for epoch_num in range(num_epochs):
        for sample in dataset:
            # Performing a training step
            time.sleep(0.01)

In [12]:
%%timeit
benchmark(FileDataset())

259 ms ± 1.06 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%timeit
benchmark(FileDataset().prefetch(1))

262 ms ± 2.89 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
benchmark(FileDataset().prefetch(tf.data.AUTOTUNE))

260 ms ± 1.38 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Cache

In [15]:
dataset = tf.data.Dataset.range(5)
dataset = dataset.map(lambda x: x**2)
dataset = dataset.cache("mycache.txt")
# The first time reading through the data will generate the data using
# `range` and `map`.
list(dataset.as_numpy_iterator())

[0, 1, 4, 9, 16]

In [16]:
# Subsequent iterations read from the cache.
list(dataset.as_numpy_iterator())

[0, 1, 4, 9, 16]

In [17]:
def mapped_function(s):
    # Do some hard pre-processing
    tf.py_function(lambda: time.sleep(0.03), [], ())
    return s

In [18]:
%%timeit -r1 -n1
benchmark(FileDataset().map(mapped_function), 5)

1.11 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


**Further Reading**: [TensorFlow Data Performance Guide: Caching](https://www.tensorflow.org/guide/data_performance#caching)
