In [None]:
import tensorflow as tf
from tensorflow_transform.tf_metadata import dataset_schema

tf.__version__

### High Performance Input Tensors

In [None]:
feature_spec = {
    'beta1': tf.io.FixedLenFeature([1], tf.float32),
    'beta2': tf.io.FixedLenFeature([1], tf.float32),
    'weekday': tf.io.FixedLenFeature([1], tf.int64),
    'hour': tf.io.FixedLenFeature([1], tf.int64),
    'humidity': tf.io.FixedLenFeature([1], tf.float32)
}
schema = dataset_schema.from_feature_spec(feature_spec)

In [None]:
def make_tfr_input_fn(filename_pattern, batch_size, options):
    
    def _input_fn():
        dataset = tf.data.experimental.make_batched_features_dataset(
            file_pattern=filename_pattern,
            batch_size=batch_size,
            features=feature_spec,
            shuffle_buffer_size=options['shuffle_buffer_size'],
            prefetch_buffer_size=options['prefetch_buffer_size'],
            reader_num_threads=options['reader_num_threads'],
            parser_num_threads=options['parser_num_threads'],
            sloppy_ordering=options['sloppy_ordering'],
            num_epochs=options['num_epochs'],
            label_key='humidity')

        if options['distribute']:
            return dataset 
        else:
            return dataset.make_one_shot_iterator().get_next()
    return _input_fn

Take the pattern from ```Beam_Pipelines.ipynb```:

In [None]:
with open('temp_dir.txt') as file:
    temp_dir = file.read()
import os

file_pattern = os.path.join(temp_dir, "training.tfr-*")
file_pattern

In [None]:
train_input_fn = make_tfr_input_fn(
    filename_pattern=file_pattern,
    batch_size=5, 
    options={'num_epochs': None,  # repeat infinitely
             'shuffle_buffer_size': 1000,
             'prefetch_buffer_size': 1000,
             'reader_num_threads': 10,
             'parser_num_threads': 10,
             'sloppy_ordering': True,
             'distribute': False})

This design pattern allows us to provide parameters to a function that is not allowed to take some. We essentially have a function now that provides its parameters to a *daughter* function as constants.

Later, we will provide this ```train_input_fn``` to the so-called ```estimator```. It is then up to the ```estimator``` to call ```train_input_fn``` and by that create the input-generating computational sub-graph within it's own graph and session context.

For demonstration purposes, we call the function ourselves and see what it returns.

In [None]:
samples, labels = train_input_fn()

In [None]:
samples

In [None]:
labels

Now, each time we evaluate ```samples``` and ```labels```, we'll get a new batch of 1000 samples with the associated 'humidity' labels.

In [None]:
with tf.Session() as sess:
    s, l = sess.run([samples, labels])

In [None]:
s, l