In [18]:
import tensorflow as tf
from tensorflow.feature_column import numeric_column
from tensorflow.feature_column import crossed_column
from tensorflow.feature_column import indicator_column
from tensorflow.feature_column import categorical_column_with_identity
from tensorflow_transform.tf_metadata import dataset_schema

tf.__version__

'1.13.1'

In [19]:
beta1 = numeric_column('beta1')
beta2 = numeric_column('beta2')

Remember: There were particular hours on particular days where the quality of our prediction of the humidity suddenly decreased significantly. Thus here, we encode the hour of the week in the assumption that it is essentially influencing the problem.
We create that $24 \times 7 = 168$-dimensional feature cross for the one-hot-encoded *hour of the week*.

In [39]:
weekday = categorical_column_with_identity('weekday', num_buckets=7)
hour = categorical_column_with_identity('hour', num_buckets=24)
hour_of_week = indicator_column(crossed_column([weekday, hour], 24*7))

---
Below is code from ```InputFunctions.ipynb```. For simplicity and readability, I repeated it here.

In [34]:
def make_tfr_input_fn(filename_pattern, batch_size, options):
    
    def _input_fn():
        dataset = tf.data.experimental.make_batched_features_dataset(
            file_pattern=filename_pattern,
            batch_size=batch_size,
            features=feature_spec,
            shuffle_buffer_size=options['shuffle_buffer_size'],
            prefetch_buffer_size=options['prefetch_buffer_size'],
            reader_num_threads=options['reader_num_threads'],
            parser_num_threads=options['parser_num_threads'],
            sloppy_ordering=options['sloppy_ordering'],
            num_epochs=options['num_epochs'],
            label_key='humidity')

        if options['distribute']:
            return dataset 
        else:
            return dataset.make_one_shot_iterator().get_next()
    return _input_fn

Take the pattern from ```Beam_Pipelines.ipynb```:

In [35]:
file_pattern = "/tmp/tmpsrnhj_zd/training.tfr-*"

In [36]:
feature_spec = {
    'beta1': tf.io.FixedLenFeature([1], tf.float32),
    'beta2': tf.io.FixedLenFeature([1], tf.float32),
    'weekday': tf.io.FixedLenFeature([1], tf.int64),
    'hour': tf.io.FixedLenFeature([1], tf.int64),
    'humidity': tf.io.FixedLenFeature([1], tf.float32)
}
schema = dataset_schema.from_feature_spec(feature_spec)

In [37]:
train_input_fn = make_tfr_input_fn(
    filename_pattern=file_pattern,
    batch_size=2, 
    options={'num_epochs': None,  # repeat infinitely
             'shuffle_buffer_size': 1000,
             'prefetch_buffer_size': 1000,
             'reader_num_threads': 10,
             'parser_num_threads': 10,
             'sloppy_ordering': True,
             'distribute': False})

### Creating the input layer
We expect a $170$-dimensional layer: $168$ dimensions for the hour of the week and two more for $\beta_1$ and $\beta_2$.

In [42]:
features = train_input_fn()[0] # We omit the 'humidity' label
features

{'beta1': <tf.Tensor 'IteratorGetNext_6:0' shape=(2, 1) dtype=float32>,
 'beta2': <tf.Tensor 'IteratorGetNext_6:1' shape=(2, 1) dtype=float32>,
 'hour': <tf.Tensor 'IteratorGetNext_6:2' shape=(2, 1) dtype=int64>,
 'weekday': <tf.Tensor 'IteratorGetNext_6:3' shape=(2, 1) dtype=int64>}

In [46]:
all_feature_columns = [beta1, beta2, hour_of_week]

input_layer = tf.feature_column.input_layer( 
    features, feature_columns=all_feature_columns)
input_layer

<tf.Tensor 'input_layer_4/concat:0' shape=(2, 170) dtype=float32>

In [47]:
with tf.Session() as sess:
    inp170=sess.run(input_layer)

Below you can see, that we have 2 records (that's the batch size, we chose), both consisting of two float features - the $\beta$s, and a single value of $1$ the position of which indicating the very hour of the week when the $\beta$s have been measured. What may appear a massive waste is actually a very efficient way of dealing with categorical values in the context of machine learning.

In [50]:
inp170

array([[0.93222827, 0.6836412 , 0.        , 0.        , 1.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.  