# DNN Regression: Floor Price Optimisation

## Continuous Learning Experiment

- Author: Reshad Dernjani
- Tensorflow Transform documentation: https://www.tensorflow.org/tfx/transform/tutorials/TFT_census_example
- This will load the latest checkpoint of a pre-trained model and continue the training with reduced DSP bids

## Usage
- Make sure transformed data was made available by running "Preprocessing with Tensorflow Transform-Continuous Learning.ipynb"
- Make sure base model was created by running "Custom DNN Regression Floor Price Optimisation.ipynb"
- Make sure CHECKPOINT is set correctly

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
import pprint
import tempfile
import time

try:
    import tensorflow_transform as tft
    import apache_beam as beam
    import tensorflow_model_analysis as tfma
except ImportError:
    # This will take a minute, ignore the warnings.
    !pip install -q tensorflow-transform
    !pip install -q apache_beam
    !pip install -q tensorflow-model-analysis
    import tensorflow_transform as tft
    import apache_beam as beam
    import tensorflow_model_analysis as tfma

# This will seed the reandom generators the same way, in order
# to make results more deterministic. Neural networks use ramdomness by design 
# for weights initialization, regularization, word embedding or stochastic optimizers.
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)    
    
import tensorflow as tf
import tensorflow_transform.beam as tft_beam
from tensorflow_transform.tf_metadata import dataset_metadata
from tensorflow_transform.tf_metadata import dataset_schema

from tensorflow_transform.saved import saved_transform_io
from tensorflow_transform.beam.tft_beam_io import transform_fn_io

## Setting hyperparameters and basic housekeeping

In [None]:
CHECKPOINT = '/notebooks/tmp/47/logs/model.ckpt-511'

TRAIN_NUM_EPOCHS = 1
TRAIN_BATCH_SIZE = 1
HIDDEN_UNITS = [1024]
LEARNING_RATE = 0.001
SHIFT_LOSS = 0.7

IS_TESTING = False
NUM_TEST_INSTANCES = 1000
                
# Working directories
TEMP = '/notebooks/tmp/'
TRANSFORMED_DIR = '/notebooks/transformed/'

# Names of temp files
TRANSFORMED_TRAIN_DATA_FILEBASE = 'train_transformed'
TRANSFORMED_TEST_DATA_FILEBASE = 'test_transformed'
EXPORTED_MODEL_DIR = 'exported_model_dir'
EXPORTED_EVAL_MODEL_DIR = 'eval_dir'

## Name our columns

In [None]:
CATEGORICAL_FEATURE_KEYS = [
    'inventory_id',
    'request_type',
    'state_code',
    'country_code',
    'city_code',
    'device_os',
    'device_os_version',
    'hour_of_day',
]

NUMERIC_FEATURE_KEYS = [
    'ex_floor_price',
]

OPTIONAL_NUMERIC_FEATURE_KEYS = [ 
    # Actually we handled optionals on the data query (at least for research).
]

LABEL_KEY = 'ex_bid_price'

## Define our features and schema

In [None]:
RAW_DATA_FEATURE_SPEC = dict(
    [(name, tf.FixedLenFeature([], tf.string))
     for name in CATEGORICAL_FEATURE_KEYS] +
    [(name, tf.FixedLenFeature([], tf.float32))
     for name in NUMERIC_FEATURE_KEYS] +
    [(name, tf.VarLenFeature(tf.float32))
     for name in OPTIONAL_NUMERIC_FEATURE_KEYS] +
    [(LABEL_KEY, tf.FixedLenFeature([], tf.float32))]
)

RAW_DATA_METADATA = dataset_metadata.DatasetMetadata(
    dataset_schema.from_feature_spec(RAW_DATA_FEATURE_SPEC)
)

## Create an input function for training

In [None]:
def _make_training_input_fn(tf_transform_output, transformed_examples, batch_size):
    """Creates an input function reading from transformed data.
    
    Args:
        tf_transform_output: Wrapper around output of tf.Transform.
        transformed_examples: Base filename of examples.
        batch_size: Batch size.
        
    Returns:
        The input function for training or eval.
    """
    def input_fn():
        """Input function for training and eval."""
        dataset = tf.data.experimental.make_batched_features_dataset(
            file_pattern=transformed_examples,
            batch_size=batch_size,
            features=tf_transform_output.transformed_feature_spec(),
            reader=tf.data.TFRecordDataset,
            shuffle=True
        )

        transformed_features = dataset.make_one_shot_iterator().get_next()
        # Extract features and label(s) from the transformed tensors.
        transformed_labels = transformed_features.pop(LABEL_KEY)

        return transformed_features, transformed_labels

    return input_fn

## Create an input function for serving

In [None]:
def _make_serving_input_fn(tf_transform_output):
    """Creates an input function reading from raw data.
    
    Args:
        tf_transform_output: Wrapper around output of tf.Transform.
        
    Returns:
        The serving input function.
    """
    raw_feature_spec = RAW_DATA_METADATA.schema.as_feature_spec()
    # Remove label since it is not available during serving.
    raw_feature_spec.pop(LABEL_KEY)

    def serving_input_fn():
        """Input function for serving."""
        # Get raw features by generating the basic serving input_fn and calling it.
        # Here we generate an input_fn that expects a parsed data point to be fed to the model at serving time.
        # See also: tf.estimator.export.build_raw_serving_input_receiver_fn.
        raw_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
            raw_feature_spec, 
            default_batch_size=None
        )
        serving_input_receiver = raw_input_fn()
    
        # Apply the transform function that was used to generate the materialized data.
        raw_features = serving_input_receiver.features
        transformed_features = tf_transform_output.transform_raw_features(raw_features)

        return tf.estimator.export.ServingInputReceiver(raw_features, serving_input_receiver.receiver_tensors)

    return serving_input_fn

## Wrap our input data in FeatureColumns

In [None]:
def get_feature_columns(tf_transform_output):
    """Returns the FeatureColumns for the model.
  
    Args:
        tf_transform_output: A `TFTransformOutput` object.
      
    Returns:
        A list of FeatureColumns.
    """
    # Wrap scalars as real value columns.
    real_value_columns = [
        tf.feature_column.numeric_column(key, shape=())
        for key in NUMERIC_FEATURE_KEYS
    ]

    # Wrap categorical columns.
    one_hot_encoded_columns = [
        tf.feature_column.categorical_column_with_vocabulary_file(
            key=key,
            vocabulary_file=tf_transform_output.vocabulary_file_by_name(vocab_filename=key)
        )
        for key in CATEGORICAL_FEATURE_KEYS
    ]
  
    # Wrap indicator colmuns and use embedding on high dimensional feature columns.
    embedding_columns = []
    indicator_columns = []
    for column in one_hot_encoded_columns:
        if(column.key == 'hour_of_day'):
            indicator_columns.append(tf.feature_column.indicator_column(column))
        elif(column.key == 'device_os_version'):
            indicator_columns.append(tf.feature_column.indicator_column(column))
        elif(column.key == 'device_os'):
            indicator_columns.append(tf.feature_column.indicator_column(column))
        elif(column.key == 'city_code'):
            vocab_size = tf_transform_output.vocabulary_size_by_name('city_code')
            embedding_columns.append(tf.feature_column.embedding_column(column, round(vocab_size**0.25, 0)))
        elif(column.key == 'country_code'):
            indicator_columns.append(tf.feature_column.indicator_column(column))
        elif(column.key == 'state_code'):
            vocab_size = tf_transform_output.vocabulary_size_by_name('state_code')
            embedding_columns.append(tf.feature_column.embedding_column(column, round(vocab_size**0.25, 0)))
        elif(column.key == 'request_type'):
            indicator_columns.append(tf.feature_column.indicator_column(column))
        elif(column.key == 'inventory_id'):
            vocab_size = tf_transform_output.vocabulary_size_by_name('inventory_id')
            embedding_columns.append(tf.feature_column.embedding_column(column, round(vocab_size**0.25, 0)))
    
    return real_value_columns + indicator_columns + embedding_columns

## Create Estimator

In [None]:
def floorPriceEstimator(features, labels, mode, params):
    """This is a custom Neural Network Regressor implementation,
    which uses a asymmetric loss function.
    
    Args:
        features: Feature columns.
        labels: Labels columns.
        mode: predict, train or eval mode.
        params: Configuration dict containing keys:
            feature_columns, hidden_units, learning_rate, optimizer or shift_loss.
        
    Returns:
        EstimatorSpec: Objects of EstimatorSpec define the model to be run.
        
    """
    
    # Using 'input_layer' to apply the feature columns.
    net = tf.feature_column.input_layer(features, params['feature_columns'])

    # Build the hidden layers, sized according to the 'hidden_units' param.
    for units in params['hidden_units']:
        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)

    # Output layer takes as input the results of the latest hidden layer
    net = tf.layers.dense(net, units=1, activation=None)
    
    # Reshape the output layer to a 1-dim Tensor to return predictions
    net = tf.squeeze(net, 1)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode, 
            predictions={"predictions": net}
        )
    # Get batch size
    batch_size = tf.shape(labels)[0]
    
    # Tweaked squared loss function
    # Positive values for shift_loss penalize overestimation. -1 < shift_loss < 1
    def asymmetric_loss(shift_loss): 
        return tf.pow(net-labels, 2) * tf.pow(tf.sign(net-labels) + shift_loss, 2)
    
    # Calculate asymmetric loss function
    asymmetric_loss = tf.reduce_sum(asymmetric_loss(params['shift_loss']))
    # Push metric to logs
    tf.summary.scalar("asymmetric_loss", asymmetric_loss)
    
    # Calculate mean squared error.
    mse = tf.metrics.mean_squared_error(tf.cast(labels, tf.float32), net)
    total_mse = tf.to_float(batch_size) * mse[1]
    # Push metric to logs
    tf.summary.scalar('total_mean_squared_error', total_mse)
    
    total_loss = tf.to_float(batch_size) * asymmetric_loss

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = params.get("optimizer", tf.train.AdamOptimizer)
        optimizer = optimizer(params.get("learning_rate", None))
        train_op = optimizer.minimize(loss=asymmetric_loss, global_step=tf.train.get_global_step())

        return tf.estimator.EstimatorSpec(
            mode=mode, 
            loss=total_loss, 
            train_op=train_op
        )

    assert mode == tf.estimator.ModeKeys.EVAL
    
    # Calculate root mean squared error
    rmse = tf.metrics.root_mean_squared_error(tf.cast(labels, tf.float32), net)
        
    def asymmetric_metric_fn(predictions=[], labels=[]):
        R, update_op1 = tf.contrib.metrics.streaming_recall(predictions, labels)
        R = asymmetric_loss
        return R, update_op1
    
    eval_metrics = {
        "mean_squared_error": mse,
        "root_mean_squared_error": rmse,
        "asymmetric_loss": asymmetric_metric_fn(
            predictions=net, 
            labels=labels
        )
    }

    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=total_loss,
        eval_metric_ops=eval_metrics)

## Train, Evaluate, and Export our model

In [None]:
def train_and_evaluate(working_dir, transformed_dir, num_train_instances,
                       num_test_instances=NUM_TEST_INSTANCES):
    """Train the model on training data and evaluate on test data.
  
    Args:
        transformed_dir: Directory to read transformed data and metadata from
        num_train_instances: Number of instances in train set
        num_test_instances: Number of instances in test set
    
    Returns:
        The results from the estimator's 'evaluate' method
    """
    # Get transformed data
    tf_transform_output = tft.TFTransformOutput(transformed_dir)

    # This will load the model trained with 24 hours,
    # in order to warm start the training with the reduced bids data of the next day hour 09
    ws = tf.estimator.WarmStartSettings(ckpt_to_initialize_from=CHECKPOINT)
    
    # Init estimator.
    estimator = tf.estimator.Estimator(
        model_fn=floorPriceEstimator,
        model_dir=working_dir + '/logs',
        warm_start_from=ws,
        params={
            'feature_columns': get_feature_columns(tf_transform_output),
            'hidden_units': HIDDEN_UNITS,
            'learning_rate': LEARNING_RATE,
            'optimizer': tf.train.AdamOptimizer,
            'shift_loss': SHIFT_LOSS
        }
    )

    # Init TRAIN input function
    train_input_fn = _make_training_input_fn(
        tf_transform_output,
        os.path.join(transformed_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'),
        batch_size=TRAIN_BATCH_SIZE
    )

    # Fit the model
    train_metrics = estimator.train(
        input_fn=train_input_fn, 
        steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE
    )
    
    # Export the trained model.
    serving_input_fn = _make_serving_input_fn(tf_transform_output)
    exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR)
    estimator.export_savedmodel(exported_model_dir, serving_input_fn)
        
    if IS_TESTING:
        # Init TRAIN input function
        eval_train_input_fn = _make_training_input_fn(
            tf_transform_output,
            os.path.join(transformed_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'),
            batch_size=1
        )
    
        # Eval the model on TRAIN.
        eval_metrics = estimator.evaluate(input_fn=eval_train_input_fn, steps=num_test_instances)
        print('\n\nEval metrics on TRAIN')
        pprint.pprint(eval_metrics)
    
        # Init TEST input function
        eval_test_input_fn = _make_training_input_fn(
            tf_transform_output,
            os.path.join(transformed_dir, TRANSFORMED_TEST_DATA_FILEBASE + '*'),
            batch_size=1
        )
    
        # Eval the model on TEST .
        eval_metrics = estimator.evaluate(input_fn=eval_test_input_fn, steps=num_test_instances)
        print('\n\nEval metrics on TEST')
        pprint.pprint(eval_metrics)
    
        # Run predictions on the model.
        estimator.predict(input_fn=eval_test_input_fn)
        
    return estimator
    

In [None]:
def count_instances(tf_records_filenames):
    counter = 0
    for fn in tf_records_filenames:
        for record in tf.python_io.tf_record_iterator(fn):
            counter += 1 
    return counter

In [None]:
def save_hyperparameters(dsp, num_train_instances):
    if not os.path.exists(TEMP+dsp):
        os.makedirs(TEMP+dsp)
    with open(TEMP+dsp+"/hyperparameters.txt", "a+") as text_file:
        print("\nTRAIN_NUM_EPOCHS: {}".format(TRAIN_NUM_EPOCHS), file=text_file)
        print("TRAIN_BATCH_SIZE: {}".format(TRAIN_BATCH_SIZE), file=text_file)
        print("LEARNING_RATE: {}".format(LEARNING_RATE), file=text_file)
        print("SHIFT_LOSS: {}".format(SHIFT_LOSS), file=text_file)
        for unit in HIDDEN_UNITS:
            print("HIDDEN LAYER{}: {}".format(HIDDEN_UNITS.index(unit), unit), file=text_file)
        print("NUMBER_OF_TRAIN_INSTANCES: {}".format(num_train_instances), file=text_file)

In [None]:
def print_hyperparameters(dsp, num_train_instances):
    print("\nTRAIN_NUM_EPOCHS: {}".format(TRAIN_NUM_EPOCHS))
    print("TRAIN_BATCH_SIZE: {}".format(TRAIN_BATCH_SIZE))
    print("LEARNING_RATE: {}".format(LEARNING_RATE))
    print("SHIFT_LOSS: {}".format(SHIFT_LOSS))
    for unit in HIDDEN_UNITS:
        print("HIDDEN LAYER{}: {}".format(HIDDEN_UNITS.index(unit), unit))
    print("NUMBER_OF_TRAIN_INSTANCES: {}".format(num_train_instances))

## Put it all together

In [None]:
# Remove following line to see more details during training
#tf.logging.set_verbosity(tf.logging.ERROR)

dsp_list = ['47_warmstarting']

for dsp in dsp_list:
    start = time.time()
    tf_records_filenames = tf.gfile.Glob(TRANSFORMED_DIR + dsp + '/' + TRANSFORMED_TRAIN_DATA_FILEBASE + '*')
    num_train_instances = count_instances(tf_records_filenames)
    save_hyperparameters(dsp, num_train_instances)
        
    estimator = train_and_evaluate(
        working_dir=TEMP+dsp, 
        transformed_dir=TRANSFORMED_DIR + dsp, 
        num_train_instances= num_train_instances
    )
    print_hyperparameters(dsp, num_train_instances)
    print('\n\nTraining for dsp {} took {:.2f} seconds'.format(dsp, time.time() - start))