In [74]:
import tensorflow as tf
import shutil
print(tf.__version__)

1.15.0


In [64]:
TIMESERIES_COL = 'x'
DEFAULTS = [[0.0] for i in range(30)] + [[0]]
LIST_OF_LABELS = "non_fraud,fraud".split(',')
NCLASSES = len(LIST_OF_LABELS)

In [65]:
def init(hparams):
    global DEFAULTS
    DEFAULTS = [[0.0] for i in range(30)] + [[0]]

In [66]:
def linear_model(features, mode, params):
    X = features[TIMESERIES_COL]
    ylogits = tf.layers.dense(X, units = NCLASSES, activation = None)
    return ylogits, NCLASSES

In [1]:
def read_dataset(filename, mode, batch_size=512, skip_header_lines=0):
    def _input_fn():
        def decode_csv(row):
            # row is a string tensor containing the contents of one row
            features = tf.decode_csv(row, record_defaults=DEFAULTS)  # string tensor -> list of 50 rank 0 float tensors
            label = features.pop()  # remove last feature and use as label
            features = tf.stack(features)  # list of rank 0 tensors -> single rank 1 tensor
            return {TIMESERIES_COL: features}, label

        # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
        dataset = tf.data.Dataset.list_files(filename)
        # Read in data from files
        dataset = dataset.flat_map(
            lambda filename: tf.data.TextLineDataset(filename).skip(skip_header_lines)
        )
        # Parse text lines as comma-separated values (CSV)
        dataset = dataset.map(decode_csv)

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None  # loop indefinitely
            dataset = dataset.shuffle(buffer_size=10 * batch_size)
        else:
            num_epochs = 1  # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
        return dataset.make_one_shot_iterator().get_next()

    return _input_fn

In [68]:
def serving_input_fn():
    feature_placeholders = {
        TIMESERIES_COL: tf.placeholder(tf.float32, [None, 30])
    }

    features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
    features[TIMESERIES_COL] = tf.squeeze(features[TIMESERIES_COL], axis=[2])

    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [69]:
def image_classifier(features, labels, mode, params):
    model_functions = {
        "linear": linear_model,
        }
    model_function = model_functions[params["model"]] 
    ylogits, nclasses = model_function(features, mode, params)

    probabilities = tf.nn.softmax(logits = ylogits)
    class_int = tf.cast(x = tf.argmax(input = ylogits, axis = 1), dtype = tf.uint8)
    class_str = tf.gather(params = LIST_OF_LABELS, indices = tf.cast(x = class_int, dtype = tf.int32))
  
    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        # Convert string label to int
        #labels_table = tf.contrib.lookup.index_table_from_tensor(mapping = tf.constant(value = LIST_OF_LABELS, dtype = tf.string))
        #labels = labels_table.lookup(keys = labels)

        loss = tf.reduce_mean(input_tensor = tf.nn.softmax_cross_entropy_with_logits_v2(logits = ylogits, labels = tf.one_hot(indices = labels, depth = NCLASSES)))
        
        if mode == tf.estimator.ModeKeys.TRAIN:
            # This is needed for batch normalization, but has no effect otherwise
            update_ops = tf.get_collection(key = tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(control_inputs = update_ops):
                train_op = tf.contrib.layers.optimize_loss(
                    loss = loss, 
                    global_step = tf.train.get_global_step(),
                    learning_rate = params["learning_rate"],
                    optimizer = "Adam")
            eval_metric_ops = None
        else:
            train_op = None
            eval_metric_ops =  {"accuracy": tf.metrics.accuracy(labels = labels, predictions = class_int)}
    else:
        loss = None
        train_op = None
        eval_metric_ops = None
 
    return tf.estimator.EstimatorSpec(
        mode = mode,
        predictions = {"probabilities": probabilities, 
                       "classid": class_int, 
                       "class": class_str},
        loss = loss,
        train_op = train_op,
        eval_metric_ops = eval_metric_ops,
        export_outputs = {"classes": tf.estimator.export.PredictOutput(
            {"probabilities": probabilities, 
             "classid": class_int, 
             "class": class_str})}
    )

In [70]:
def train_and_evaluate(output_dir, hparams):
    tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file
    
    EVAL_INTERVAL = 300 # every 5 minutes
    
    # Instantiate base estimator class for custom model function
    estimator = tf.estimator.Estimator(
        model_fn = image_classifier,
        params = hparams,
        config = tf.estimator.RunConfig(
            save_checkpoints_secs = EVAL_INTERVAL),
            model_dir = output_dir)
    
    # Set estimator's train_spec to use train_input_fn and train for so many steps
    train_spec = tf.estimator.TrainSpec(
        input_fn = read_dataset(
            hparams['train_data_path'],
            tf.estimator.ModeKeys.TRAIN,
            hparams['batch_size']),
        max_steps = hparams["train_steps"])

    # Create exporter that uses serving_input_fn to create saved_model for serving
    exporter = tf.estimator.LatestExporter(
        name = "exporter", 
        serving_input_receiver_fn = serving_input_fn)

    # Set estimator's eval_spec to use eval_input_fn and export saved_model
    eval_spec = tf.estimator.EvalSpec(
        input_fn = read_dataset(
            hparams['eval_data_path'],
            tf.estimator.ModeKeys.EVAL,
            hparams['batch_size']),
        steps = None,
        exporters = exporter,
        start_delay_secs = EVAL_INTERVAL,
        throttle_secs = EVAL_INTERVAL)

    # Run train_and_evaluate loop
    tf.estimator.train_and_evaluate(
        estimator = estimator, 
        train_spec = train_spec, 
        eval_spec = eval_spec)

In [71]:
hparams = {
    'output_dir':'creditcard_trained',
    'train_steps':5,
    'learning_rate':0.01,
    'batch_size':2,
    'model':'linear',
    'train_data_path':'data/creditcard_train.csv',
    'eval_data_path':'data/creditcard_test.csv'
}

In [75]:
OUTDIR = './creditcard_trained'
shutil.rmtree(OUTDIR, ignore_errors = True)

In [77]:
train_and_evaluate(OUTDIR, hparams)

INFO:tensorflow:Using config: {'_global_id_in_cluster': 0, '_train_distribute': None, '_save_checkpoints_secs': 300, '_model_dir': './creditcard_trained', '_keep_checkpoint_max': 5, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_evaluation_master': '', '_service': None, '_log_step_count_steps': 100, '_experimental_distribute': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1a60144470>, '_keep_checkpoint_every_n_hours': 10000, '_session_creation_timeout_secs': 7200, '_num_worker_replicas': 1, '_protocol': None, '_task_type': 'worker', '_save_checkpoints_steps': None, '_num_ps_replicas': 0, '_experimental_max_worker_delay_secs': None, '_master': '', '_save_summary_steps': 100, '_device_fn': None, '_is_chief': True, '_eval_distribute': None, '_task_id': 0, '_tf_random_seed': None}
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and eva