In [1]:
import numpy as np
import argparse
import json
import os
import tensorflow as tf
import tensorflow_transform as tft

In [2]:
import features

In [3]:
def get_feature_columns(use_all_columns=False, force_subset=None):
  # adding the force_subset as a way to directly pass in column changes for testing/profiling
    assert not use_all_columns or force_subset is None, \
          'Cannot both use all columns and use only a subset; give only one argument'
    deep_columns, wide_columns = [], []

    if use_all_columns:
        training_columns = features.ALL_TRAINING_COLUMNS
    elif force_subset is not None:
        training_columns = force_subset
    else:
        training_columns = features.NV_TRAINING_COLUMNS

    tf.compat.v1.logging.warn('number of features: {}'.format(len(training_columns)))

    for column_name in training_columns:
        if column_name in features.HASH_BUCKET_SIZES:
            categorical_column = tf.feature_column.categorical_column_with_hash_bucket(
                column_name,
                hash_bucket_size=features.HASH_BUCKET_SIZES[column_name],
                dtype=tf.int32)
            wide_columns.append(categorical_column)

        elif column_name in features.IDENTITY_NUM_BUCKETS:
            categorical_column = tf.feature_column.categorical_column_with_identity(
                column_name, num_buckets=features.IDENTITY_NUM_BUCKETS[column_name])
            wide_columns.append(categorical_column)

        else:
            columns = []
            if column_name in features.FLOAT_COLUMNS_SIMPLE_BIN_TRANSFORM:
                columns.append(tf.feature_column.numeric_column(
                column_name, shape=(1,)))
            elif column_name in features.FLOAT_COLUMNS_LOG_BIN_TRANSFORM:
        # add a categorical_column for column_name + "_log_binned")
                columns.append(tf.feature_column.numeric_column(
                    column_name + "_log_01scaled", shape=(1,)))
            elif column_name in features.INT_COLUMNS:
        # add a categorical_column for column_name + "_log_int"
                columns.append(tf.feature_column.numeric_column(
                    column_name+"_log_01scaled", shape=(1,)))
      
            for column in columns:
                wide_columns.append(column)
                deep_columns.append(column)
            continue
        if column_name in features.EMBEDDING_DIMENSIONS:
            column = tf.feature_column.embedding_column(
                categorical_column,
                dimension=features.EMBEDDING_DIMENSIONS[column_name],
                combiner='mean')
        else:
            column = tf.feature_column.indicator_column(categorical_column)
        deep_columns.append(column)
    tf.compat.v1.logging.warn('deep columns: {}'.format(len(deep_columns)))
    tf.compat.v1.logging.warn('wide columns: {}'.format(len(wide_columns)))
    tf.compat.v1.logging.warn('wide&deep intersection: {}'.format(len(set(wide_columns).intersection(set(deep_columns)))))
    return wide_columns, deep_columns

In [4]:
model_dir = '../checkpoints'

In [5]:
run_config = tf.estimator.RunConfig(model_dir=model_dir,save_checkpoints_secs=600,keep_checkpoint_max=1)

In [6]:
wide_optimizer = tf.keras.optimizers.Ftrl(
    learning_rate=0.2,
    l1_regularization_strength=0,
    l2_regularization_strength=0)

In [7]:
deep_optimizer = tf.keras.optimizers.Adagrad(learning_rate=1)

In [8]:
wide_columns, deep_columns = get_feature_columns(use_all_columns=True)



In [9]:
estimator = tf.estimator.DNNLinearCombinedClassifier(
    config=run_config,
    linear_feature_columns=wide_columns,
    linear_optimizer=wide_optimizer,
    dnn_feature_columns=deep_columns,
    dnn_optimizer=deep_optimizer,
    dnn_hidden_units=[1024, 1024, 1024, 1024, 1024],
    dnn_dropout=0,
    linear_sparse_combiner='sum',
    loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE)

INFO:tensorflow:Using config: {'_model_dir': '../checkpoints', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 1, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [10]:
DISPLAY_ID_COLUMN = features.DISPLAY_ID_COLUMN
def map_custom_metric(features, labels, predictions):
    display_ids = tf.reshape(features[DISPLAY_ID_COLUMN], [-1])
    predictions = predictions['probabilities'][:, 1]
    labels = labels[:, 0]

  # Processing unique display_ids, indexes and counts
  # Sorting needed in case the same display_id occurs in two different places
    sorted_ids = tf.argsort(display_ids)
    display_ids = tf.gather(display_ids, indices=sorted_ids)
    predictions = tf.gather(predictions, indices=sorted_ids)
    labels = tf.gather(labels, indices=sorted_ids)

    _, display_ids_idx, display_ids_ads_count = tf.unique_with_counts(
        display_ids, out_idx=tf.int64)
    pad_length = 30 - tf.reduce_max(display_ids_ads_count)
    pad_fn = lambda x: tf.pad(x, [(0, 0), (0, pad_length)])
 
    preds = tf.RaggedTensor.from_value_rowids(
        predictions, display_ids_idx).to_tensor()
    labels = tf.RaggedTensor.from_value_rowids(
        labels, display_ids_idx).to_tensor()

    labels = tf.argmax(labels, axis=1)

    return {
        'map': tf.compat.v1.metrics.average_precision_at_k(
            predictions=pad_fn(preds),
            labels=labels,
            k=12, 
            name="streaming_map")}

In [11]:
IS_LEAK_COLUMN = features.IS_LEAK_COLUMN
def map_custom_metric_with_leak(features, labels, predictions):
    display_ids = features[DISPLAY_ID_COLUMN]
    display_ids = tf.reshape(display_ids, [-1])
    is_leak_tf = features[IS_LEAK_COLUMN]
    is_leak_tf = tf.reshape(is_leak_tf, [-1])

    predictions = predictions['probabilities'][:, 1]
    predictions = predictions + tf.cast(is_leak_tf, tf.float32)
    labels = labels[:, 0]

  # Processing unique display_ids, indexes and counts
  # Sorting needed in case the same display_id occurs in two different places
    sorted_ids = tf.argsort(display_ids)
    display_ids = tf.gather(display_ids, indices=sorted_ids)
    predictions = tf.gather(predictions, indices=sorted_ids)
    labels = tf.gather(labels, indices=sorted_ids)

    _, display_ids_idx, display_ids_ads_count = tf.unique_with_counts(
        display_ids, out_idx=tf.int64)
    pad_length = 30 - tf.reduce_max(display_ids_ads_count)
    pad_fn = lambda x: tf.pad(x, [(0, 0), (0, pad_length)])

    preds = tf.RaggedTensor.from_value_rowids(predictions, display_ids_idx).to_tensor()
    labels = tf.RaggedTensor.from_value_rowids(labels, display_ids_idx).to_tensor()
    labels = tf.argmax(labels, axis=1)

    return {
        'map_with_leak': tf.compat.v1.metrics.average_precision_at_k(
            predictions=pad_fn(preds),
            labels=labels,
            k=12,
            name="streaming_map_with_leak")}


In [12]:
estimator = tf.estimator.add_metrics(estimator, map_custom_metric)
estimator = tf.estimator.add_metrics(estimator, map_custom_metric_with_leak)

INFO:tensorflow:Using config: {'_model_dir': '../checkpoints', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 1, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Using config: {'_model_dir': '../checkpoints', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_c

In [13]:
import dataset_utils

In [14]:
tf_transform_output = tft.TFTransformOutput('../../data/outbrain/tfreccords')


In [15]:
local_batch_size = 131072 // 8
create_batches = local_batch_size // 4096

In [16]:
train_input_fn = lambda : dataset_utils.separate_input_fn(
    tf_transform_output,
    '../../data/outbrain/tfrecords/train/part*',
    create_batches,
    tf.estimator.ModeKeys.TRAIN,
    reader_num_threads=12,
    parser_num_threads=3,
    shuffle_buffer_size=int(0.001*create_batches),
    prefetch_buffer_size=1,
    print_display_ids=False)

In [17]:
eval_input_fn = lambda : dataset_utils.separate_input_fn(
    tf_transform_output,
    '../../data/outbrain/tfrecords/eval/part*',
    (32768 // 4096),
    tf.estimator.ModeKeys.EVAL,
    reader_num_threads=1,
    parser_num_threads=1,
    shuffle_buffer_size=int(0.001*create_batches),
    prefetch_buffer_size=1,
    print_display_ids=False)

In [18]:
steps_per_epoch = 59761827 / 131072

print('Steps per epoch: {}'.format(steps_per_epoch))
max_steps = int(120 * steps_per_epoch)

Steps per epoch: 455.9465560913086


In [19]:
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=max_steps)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn,
                                  throttle_secs=600,
                                  steps=8)

In [20]:
result = tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflo

INFO:tensorflow:global_step/sec: 1.36979
INFO:tensorflow:loss = 0.42579734, step = 37036 (73.002 sec)
INFO:tensorflow:global_step/sec: 1.36815
INFO:tensorflow:loss = 0.41834304, step = 37136 (73.093 sec)
INFO:tensorflow:global_step/sec: 1.36746
INFO:tensorflow:loss = 0.42385328, step = 37236 (73.127 sec)
INFO:tensorflow:global_step/sec: 1.36976
INFO:tensorflow:loss = 0.42167988, step = 37336 (73.007 sec)
INFO:tensorflow:global_step/sec: 1.37327
INFO:tensorflow:loss = 0.42192394, step = 37436 (72.819 sec)
INFO:tensorflow:global_step/sec: 1.36931
INFO:tensorflow:loss = 0.42008746, step = 37536 (73.028 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 37547...
INFO:tensorflow:Saving checkpoints for 37547 into ../checkpoints/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 37547...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorfl

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-07-13T13:46:30Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../checkpoints/model.ckpt-39933
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/8]
INFO:tensorflow:Evaluation [2/8]
INFO:tensorflow:Evaluation [3/8]
INFO:tensorflow:Evaluation [4/8]
INFO:tensorflow:Evaluation [5/8]
INFO:tensorflow:Evaluation [6/8]
INFO:tensorflow:Evaluation [7/8]
INFO:tensorflow:Evaluation [8/8]
INFO:tensorflow:Inference Time : 8.19377s
INFO:tensorflow:Finished evaluation at 2020-07-13-13:46:38
INFO:tensorflow:Saving dict for global step 39933: accuracy = 0.8128853, accuracy_baseline = 0.80654526, auc = 0.7260292, auc_precision_recall = 0.40245485, average_loss = 0.4411129, global_step = 39933, label/mean = 0.19345474, loss = 0.4411129, map = 0.647375861191909, map_with_leak = 0.6628175485135255, precision = 0.588011, prediction/m

INFO:tensorflow:Saving 'checkpoint_path' summary for global step 42316: ../checkpoints/model.ckpt-42316
INFO:tensorflow:global_step/sec: 1.09894
INFO:tensorflow:loss = 0.4194502, step = 42336 (90.997 sec)
INFO:tensorflow:global_step/sec: 1.36674
INFO:tensorflow:loss = 0.4169984, step = 42436 (73.168 sec)
INFO:tensorflow:global_step/sec: 1.36057
INFO:tensorflow:loss = 0.4176073, step = 42536 (73.498 sec)
INFO:tensorflow:global_step/sec: 1.3673
INFO:tensorflow:loss = 0.42151076, step = 42636 (73.138 sec)
INFO:tensorflow:global_step/sec: 1.35666
INFO:tensorflow:loss = 0.4124699, step = 42736 (73.710 sec)
INFO:tensorflow:global_step/sec: 1.36579
INFO:tensorflow:loss = 0.42160004, step = 42836 (73.219 sec)
INFO:tensorflow:global_step/sec: 1.3638
INFO:tensorflow:loss = 0.42282242, step = 42936 (73.323 sec)
INFO:tensorflow:global_step/sec: 1.36498
INFO:tensorflow:loss = 0.4229591, step = 43036 (73.262 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 43111...
INFO:ten

INFO:tensorflow:Saving checkpoints for 45495 into ../checkpoints/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 45495...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-07-13T14:56:33Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../checkpoints/model.ckpt-45495
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/8]
INFO:tensorflow:Evaluation [2/8]
INFO:tensorflow:Evaluation [3/8]
INFO:tensorflow:Evaluation [4/8]
INFO:tensorflow:Evaluation [5/8]
INFO:tensorflow:Evaluation [6/8]
INFO:tensorflow:Evaluation [7/8]
INFO:tensorflow:Evaluation [8/8]
INFO:tensorflow:Inference Time : 8.20473s
INFO:tensorflow:Finished evaluation at 2020-07-13-14:56:41
INFO:te

INFO:tensorflow:Finished evaluation at 2020-07-13-15:26:42
INFO:tensorflow:Saving dict for global step 47876: accuracy = 0.81287384, accuracy_baseline = 0.80654526, auc = 0.7267096, auc_precision_recall = 0.40333706, average_loss = 0.44063222, global_step = 47876, label/mean = 0.19345474, loss = 0.44063222, map = 0.64767427044783, map_with_leak = 0.663129101828332, precision = 0.58543617, prediction/mean = 0.1810887, recall = 0.112081714
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 47876: ../checkpoints/model.ckpt-47876
INFO:tensorflow:global_step/sec: 1.10073
INFO:tensorflow:loss = 0.4176535, step = 47936 (90.849 sec)
INFO:tensorflow:global_step/sec: 1.36975
INFO:tensorflow:loss = 0.4236697, step = 48036 (73.008 sec)
INFO:tensorflow:global_step/sec: 1.36483
INFO:tensorflow:loss = 0.42129263, step = 48136 (73.268 sec)
INFO:tensorflow:global_step/sec: 1.36768
INFO:tensorflow:loss = 0.41396642, step = 48236 (73.116 sec)
INFO:tensorflow:global_step/sec: 1.36554
INFO:te

INFO:tensorflow:global_step/sec: 1.35887
INFO:tensorflow:loss = 0.4200132, step = 50736 (73.592 sec)
INFO:tensorflow:global_step/sec: 1.36247
INFO:tensorflow:loss = 0.41838372, step = 50836 (73.396 sec)
INFO:tensorflow:global_step/sec: 1.35983
INFO:tensorflow:loss = 0.4213186, step = 50936 (73.538 sec)
INFO:tensorflow:global_step/sec: 1.35956
INFO:tensorflow:loss = 0.42649475, step = 51036 (73.554 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 51051...
INFO:tensorflow:Saving checkpoints for 51051 into ../checkpoints/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 51051...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-07-13T16:06:35Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ..

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/8]
INFO:tensorflow:Evaluation [2/8]
INFO:tensorflow:Evaluation [3/8]
INFO:tensorflow:Evaluation [4/8]
INFO:tensorflow:Evaluation [5/8]
INFO:tensorflow:Evaluation [6/8]
INFO:tensorflow:Evaluation [7/8]
INFO:tensorflow:Evaluation [8/8]
INFO:tensorflow:Inference Time : 8.39342s
INFO:tensorflow:Finished evaluation at 2020-07-13-16:36:45
INFO:tensorflow:Saving dict for global step 53433: accuracy = 0.81298065, accuracy_baseline = 0.80654526, auc = 0.7271172, auc_precision_recall = 0.40384835, average_loss = 0.44039547, global_step = 53433, label/mean = 0.19345474, loss = 0.44039547, map = 0.6479584794258482, map_with_leak = 0.6634446687754052, precision = 0.5874909, prediction/mean = 0.18058228, recall = 0.11168734
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 53433: ../checkpoints/model.ckpt-53433
INFO:tensorflow:global_step/sec: 1.09508
INFO:tensorflow:loss =

In [None]:
result.count