In [12]:
from google.cloud import bigquery
import tensorflow as tf
import numpy as np
import shutil
print(tf.__version__)

1.15.0


In [11]:
CSV_COLUMNS = 'Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class'.split(',')
FEATURES_COLUMNS = CSV_COLUMNS[:len(CSV_COLUMNS) - 1]
LABEL_COLUMN = 'Class'
DEFAULTS = [[0.0] for i in range(31)]

In [5]:
def read_dataset(filename, mode, batch_size = 512):
    def _input_fn():
        def decode_csv(value_column):
            columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
            features = dict(zip(CSV_COLUMNS, columns))
            label = features.pop(LABEL_COLUMN)
            # No need to features.pop('key') since it is not specified in the INPUT_COLUMNS.
            # The key passes through the graph unused.
            return features, label

        # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
        filenames_dataset = tf.data.Dataset.list_files(filename)
        # Read lines from text files
        textlines_dataset = filenames_dataset.flat_map(tf.data.TextLineDataset)
        # Parse text lines as comma-separated values (CSV)
        dataset = textlines_dataset.map(decode_csv)

        # Note:
        # use tf.data.Dataset.flat_map to apply one to many transformations (here: filename -> text lines)
        # use tf.data.Dataset.map      to apply one to one  transformations (here: text line -> feature list)

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None # indefinitely
            dataset = dataset.shuffle(buffer_size = 10 * batch_size)
        else:
            num_epochs = 1 # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
      
        return dataset
    return _input_fn

In [6]:
INPUT_COLUMNS = [tf.feature_column.numeric_column(key) for key in FEATURES_COLUMNS]

def add_more_features(feats):
    # Nothing to add (yet!)
    return feats

feature_cols = add_more_features(INPUT_COLUMNS)

In [7]:
def serving_input_fn():
    feature_placeholders = {
        key: tf.placeholder(tf.float32, [None]) for key in FEATURES_COLUMNS
    }
    features = feature_placeholders
    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [8]:
def train_and_evaluate(output_dir, num_train_steps):
    estimator = tf.estimator.LinearClassifier(
                       model_dir = output_dir,
                       feature_columns = feature_cols)
    
    train_spec=tf.estimator.TrainSpec(
                       input_fn = read_dataset('data/creditcard_train.csv', mode = tf.estimator.ModeKeys.TRAIN),
                       max_steps = num_train_steps)

    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)

    eval_spec=tf.estimator.EvalSpec(
                       input_fn = read_dataset('data/creditcard_test.csv', mode = tf.estimator.ModeKeys.EVAL),
                       steps = None,
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 10,  # evaluate every N seconds
                       exporters = exporter)
    
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [9]:
OUTDIR = './creditcard_trained'

In [10]:
# Run training    
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file
train_and_evaluate(OUTDIR, num_train_steps = 100)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_tf_random_seed': None, '_train_distribute': None, '_log_step_count_steps': 100, '_model_dir': './creditcard_trained', '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fefd926df60>, '_session_creation_timeout_secs': 7200, '_master': '', '_global_id_in_cluster': 0, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_evaluation_master': '', '_keep_checkpoint_max': 5, '_save_checkpoints_steps': None, '_protocol': None, '_task_type': 'worker', '_keep_checkpoint_every_n_hours': 10000, '_num_ps_replicas': 0, '_experimental_max_worker_delay_secs': None, '_experimental_distribute': None, '_num_worker_replicas': 1, '_save_summary_steps': 100, '_eval_distribute': None, '_task_id': 0, '_is_chief': True, '_device_fn': None}
INFO:tensorflow:Not using Distribute Coordinator.
IN