## 1. Python check, imports, and globals

In [1]:
import sys

# Confirm that we're using Python 3
assert sys.version_info.major is 3, 'Oops, not running Python 3. Use Runtime > Change runtime type'

In [2]:
import os
import pprint
import tempfile

import tensorflow as tf
print('TF: {}'.format(tf.__version__))

print('Installing Apache Beam')
!pip install -Uq apache_beam==2.17.0
import apache_beam as beam
print('Beam: {}'.format(beam.__version__))

print('Installing TensorFlow Transform')
!pip install -q tensorflow-transform==0.21
import tensorflow_transform as tft
print('Transform: {}'.format(tft.__version__))

import tensorflow_transform.beam as tft_beam

!wget https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/census/adult.data
!wget https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/census/adult.test

train = './adult.data'
test = './adult.test'

TF: 2.1.0
Installing Apache Beam
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Beam: 2.17.0
Installing TensorFlow Transform
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Transform: 0.21.0
--2020-05-08 21:52:18--  https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/census/adult.data
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.204.128, 2607:f8b0:400c:c13::80
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.204.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3974305 (3.8M) [application/octet-stream]
Saving to: ‘adult.data’


2020-05-08 21:52:18 (178 MB/s) - ‘adult.data’ saved [3974305/3974305]

--2020-05-08 21:52:18--  https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/census/adult.test
Resolving storage.googleapis.com (storage.googleapis.com)... 173.194.216.1

### 1.1 Name our columns

In [3]:
CATEGORICAL_FEATURE_KEYS = [
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country',
]
NUMERIC_FEATURE_KEYS = [
    'age',
    'capital-gain',
    'capital-loss',
    'hours-per-week',
]
OPTIONAL_NUMERIC_FEATURE_KEYS = [
    'education-num',
]
LABEL_KEY = 'label'

### 1.2 Define our features and schema

In [4]:
RAW_DATA_FEATURE_SPEC = dict(
    [(name, tf.io.FixedLenFeature([], tf.string)) for name in CATEGORICAL_FEATURE_KEYS] + \
    [(name, tf.io.FixedLenFeature([], tf.float32)) for name in NUMERIC_FEATURE_KEYS] + \
    [(name, tf.io.VarLenFeature(tf.float32)) for name in OPTIONAL_NUMERIC_FEATURE_KEYS] + \
    [(LABEL_KEY, tf.io.FixedLenFeature([], tf.string))]
)

RAW_DATA_METADATA = tft.tf_metadata.dataset_metadata.DatasetMetadata(
    tft.tf_metadata.dataset_schema.schema_utils.schema_from_feature_spec(RAW_DATA_FEATURE_SPEC))

### 1.3 Setting hyperparameters and basic housekeeping

In [5]:
testing = os.getenv('WEB_TEST_BROWSER', False)
if testing:
    TRAIN_NUM_EPOCHS = 1
    NUM_TRAIN_INSTANCES = 1
    TRAIN_BATCH_SIZE = 1
    NUM_TEST_INSTANCES = 1
else:
    TRAIN_NUM_EPOCHS = 16
    NUM_TRAIN_INSTANCES = 32561
    TRAIN_BATCH_SIZE = 128
    NUM_TEST_INSTANCES = 16281

# Names of temp files
TRANSFORMED_TRAIN_DATA_FILEBASE = 'train_transformed'
TRANSFORMED_TEST_DATA_FILEBASE = 'test_transformed'
EXPORTED_MODEL_DIR = 'exported_model_dir'

## 2. Cleaning

### 2.1 Create a Beam Transform for cleaning our input data

In [6]:
class MapAndFilterErrors(beam.PTransform):
    """Like beam.Map but filters out errors in the map_fn."""
    
    class _MapAndFilterErrorsDoFn(beam.DoFn):
        """Count the bad examples using a beam metric."""
        def __init__(self, fn):
            self._fn = fn
            # Create a counter to measure number of bad elements
            self._bad_elements_counter = beam.metrics.Metrics.counter(
                namespace='census_example', name='bad_elements')
        def process(self, element):
            try:
                yield self._fn(element)
            except Exception:
                # catch any exception the above call.
                self._bad_elements_counter.inc(1)
        
    def __init__(self, fn):
        self._fn = fn
    def expand(self, pcoll):
        return pcoll | beam.ParDo(self._MapAndFilterErrorsDoFn(self._fn))

## 3. Preprocessing with tf.Transform

The preprocessing function is the most important concept of tf.Transform. A preprocessing function is where the transformation of the dataset really happens. It accepts and returns a dictionary of tensors, where a tensor means a Tensor or SparseTensor. There are two main groups of API calls that typically form the heart of a preprocessing function:

- TensorFlow Ops: Any function that accepts and returns tensors, which usually means TensorFlow ops. These add TensorFlow operations to the graph that transforms raw data into transformed data one feature vector at a time. These will run for every example, during both training and serving.
- TensorFlow Transform Analyzers: Any of the analyzers provided by tf.Transform. Analyzers also accept and return tensors, but unlike TensorFlow ops they only run once, during training, and typically make a full pass over the entire training dataset. They create tensor constants, which are added to your graph. For example, tft.min computes the minimum of a tensor over the training dataset. tf.Transform provides a fixed set of analyzers, but this will be extended in future versions.

### 3.1 Create a tf.Transform preprocessing_fn

In [7]:
def preprocessing_fn(inputs):
    """Preprocess input columns into transformed columns"""
    # Since we are modifying some features and leaving others unchanged, we
    # start by setting `outputs` to a copy of `inputs`.
    outputs = inputs.copy()
    
    # Scale numeric columns to have range [0, 1]
    for key in NUMERIC_FEATURE_KEYS:
        outputs[key] = tft.scale_to_0_1(outputs[key])
        
    for key in OPTIONAL_NUMERIC_FEATURE_KEYS:
        # This is a SparseTensor because it is optional. Here we fill in a default
        # value when it is missing.
        sparse = tf.sparse.SparseTensor(outputs[key].indices, \
                                        outputs[key].values, \
                                        [outputs[key].dense_shape[0], 1])
        dense = tf.sparse.to_dense(sp_input=sparse, default_value=0.)
        # Reshaping from a batch of vectors of size 1 to a batch to scalers
        dense = tf.squeeze(dense, axis=1)
        outputs[key] = tft.scale_to_0_1(dense)
        
    # For all categorical columns except the label column, we generate a
    # vocabulary but do not modify the feature. This vocabulary is instead
    # used in the trainer, by means of a feature column, to convert to feature
    # from a string to an integer id
    for key in CATEGORICAL_FEATURE_KEYS:
        tft.vocabulary(inputs[key], vocab_filename=key)

    # For the label column we provide the mapping from string to index.
    table_keys = ['>50K', '<=50K']
    initializer = tf.lookup.KeyValueTensorInitializer(
        keys=table_keys,
        values=tf.cast(tf.range(len(table_keys)), tf.int64),
        key_dtype=tf.string,
        value_dtype=tf.int64
    )
    table = tf.lookup.StaticHashTable(initializer, default_value=-1)
    outputs[LABEL_KEY] = table.lookup(outputs[LABEL_KEY])
    
    return outputs

### 3.2 Transform the data

In [8]:
def transform_data(train_data_file, test_data_file, working_dir):
    """Transform the data and write out as a TFRecord of Example protos.
    
    Read in the data using the CSV reader, and transform it using a
    preprocessing pipeline that scales numeric data and converts categorical data
    from strings to int64 values indices, by creating a vocabulary for each
    category.
    
    Args:
        train_data_file: File containing training data
        test_data_file: File containing test data
        working_dir: Directory to write transformed data and metadata to
    """
    
    # The 'with' block will create a pipeline, and run that pipeline at the exit
    # of the block.
    with beam.Pipeline() as pipeline:
        with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
            # Create a coder to read the census data with the schema.  To do this we
            # need to list all columns in order since the schema doesn't specify the
            # order of columns in the csv.
            ordered_columns = [
              'age', 'workclass', 'fnlwgt', 'education', 'education-num',
              'marital-status', 'occupation', 'relationship', 'race', 'sex',
              'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
              'label'
            ]
            converter = tft.coders.CsvCoder(ordered_columns, RAW_DATA_METADATA.schema)
            
            ##
            ## Train data part
            ##
            # Read in raw data and convert using CSV converter.  Note that we apply
            # some Beam transformations here, which will not be encoded in the TF
            # graph since we don't do them from within tf.Transform's methods
            # (AnalyzeDataset, TransformDataset etc.).  These transformations are just
            # to get data into a format that the CSV converter can read, in particular
            # removing spaces after commas.
            #
            # We use MapAndFilterErrors instead of Map to filter out decode errors in
            # convert.decode which should only occur for the trailing blank line.
            raw_data = (
                pipeline \
                | 'ReadTrainData' >> beam.io.ReadFromText(train_data_file) \
                | 'FixCommasTrainData' >> beam.Map(
                    lambda line: line.replace(', ', ',')) \
                | 'DecodeTrainData' >> MapAndFilterErrors(converter.decode)
            ) # `raw_data` looks like [{'feature1': value1, 'feature2': value2, ...}]

            # Combine data and schema into a dataset tuple.  Note that we already used
            # the schema to read the CSV data, but we also need it to interpret
            # raw_data.
            raw_dataset = (raw_data, RAW_DATA_METADATA)
            transformed_dataset, transform_fn = (
                raw_dataset | tft_beam.AnalyzeAndTransformDataset(preprocessing_fn))
            transformed_data, transformed_metadata = transformed_dataset
            transformed_data_coder = tft.coders.ExampleProtoCoder(transformed_metadata.schema)
            
            _ = (
                transformed_data \
                | 'EncodeTrainData' >> beam.Map(transformed_data_coder.encode) \
                | 'WriteTrainData' >> beam.io.WriteToTFRecord(
                    os.path.join(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE)
                )
            )
            
            ##
            ## Test data part
            ##
            # Now apply transform function to test data.  In this case we remove the
            # trailing period at the end of each line, and also ignore the header line
            # that is present in the test data file.
            raw_test_data = (
                pipeline \
                | 'ReadTestData' >> beam.io.ReadFromText(test_data_file, \
                                                         skip_header_lines=1) \
                | 'FixCommaTestData' >> beam.Map(lambda line: line.replace(', ', ',')) \
                | 'RemoveTrailingPeriodsTestData' >> beam.Map(lambda line: line[:-1]) \
                | 'DecodeTestData' >> MapAndFilterErrors(converter.decode)
            )
            
            raw_test_dataset = (raw_test_data, RAW_DATA_METADATA)
            
            transformed_test_dataset = (
                (raw_test_dataset, transform_fn) | tft_beam.TransformDataset()
            )
            # Don't need transformed data schema, it's the same as before.
            transformed_test_data, _ = transformed_test_dataset
            
            _ = (
                transformed_test_data \
                | 'EncodeTestData' >> beam.Map(transformed_data_coder.encode) \
                | 'WriteTestData' >> beam.io.WriteToTFRecord(
                    os.path.join(working_dir, TRANSFORMED_TEST_DATA_FILEBASE)
                )
            )
            
            # Will write a SavedModel and metadata to working_dir, which can then
            # be read by the tft.TFTransformOutput class.
            _ = (
                transform_fn \
                | 'WriteTransformFn' >> tft_beam.WriteTransformFn(working_dir)
            )

## 4. Using our preprocessed data to train a moodel

### 4.1 Create an input function for training

In [9]:
def _make_training_input_fn(tf_transform_output, transformed_examples, batch_size):
    """Creates an input function reading from transformed data.

    Args:
        tf_transform_output: Wrapper around output of tf.Transform.
        transformed_examples: Base filename of examples.
        batch_size: Batch size.

    Returns:
        The input function for training or eval.
    """
    def input_fn():
        """Input function for training and eval."""
        dataset = tf.data.experimental.make_batched_features_dataset(
            file_pattern=transformed_examples,\
            batch_size=batch_size,\
            features=tf_transform_output.transformed_feature_spec(),\
            reader=tf.data.TFRecordDataset,
            shuffle=True
        )
        transformed_features = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()
        
        # Extract features and label from the transformed tensors
        transformed_labels = transformed_features.pop(LABEL_KEY)
        
        return transformed_features, transformed_labels
    
    return input_fn

### 4.2 Create an input function for serving

In [10]:
def _make_serving_input_fn(tf_transform_output):
    """Creates an input function reading from raw data.

    Args:
        tf_transform_output: Wrapper around output of tf.Transform.

    Returns:
        The serving input function.
    """
    raw_feature_spec = RAW_DATA_FEATURE_SPEC.copy()
    # Remove label since it is not available during serving.
    raw_feature_spec.pop(LABEL_KEY)
    
    def serving_input_fn():
        """Input function for serving."""
        # Get raw features by generating the basic serving input_fn and calling it.
        # Here we generate an input_fn that expects a parsed Example prote to be fed
        # to the model at serving time. See also
        # tf.estimator.export.build_raw_serving_input_receiver_fn.
        raw_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
            raw_feature_spec, default_batch_size=None)
        serving_input_receiver = raw_input_fn()
        
        # Apply the transform function that was used to generate the materialized data
        raw_features = serving_input_receiver.features
        transformed_features = tf_transform_output.transform_raw_features(
            raw_features)
        
        return tf.estimator.export.ServingInputReceiver(
            transformed_features, serving_input_receiver.receiver_tensors)
    
    return serving_input_fn

### 4.3 Wrap our input data in FeatureColumns

Our model will expect our data in TensorFlow FeatureColumns.

In [11]:
def get_feature_columns(tf_transform_output):
    """Return the FeatureColumns for the model.
    
    Args:
        tf_transform_output: A `TFTransformOutput` object.
    
    Returns:
        A list of FeatureColumns.
    """
    # Wrap scalars as real valued columns.
    real_valued_columns = [tf.feature_column.numeric_column(key, shape=()) \
                           for key in NUMERIC_FEATURE_KEYS]
    
    # Wrap categorical columns.
    one_hot_columns = [
        tf.feature_column.categorical_column_with_vocabulary_file(
            key=key,\
            vocabulary_file=tf_transform_output.vocabulary_file_by_name(vocab_filename=key)
        ) for key in CATEGORICAL_FEATURE_KEYS
    ]
    
    return real_valued_columns + one_hot_columns

## 5. Train, Evaluate, and Export out model

In [12]:
def train_and_evaluate(working_dir, \
                       num_train_instances=NUM_TRAIN_INSTANCES, \
                       num_test_instances=NUM_TEST_INSTANCES):
    """Train the model oon training data and evaluate ono test data.
    
    Args:
        working_dir: Directory to read transformed data and metadata from and to write exported model to.
        num_train_instances: Number of instances in train set
        num_test_instances: Number of instances in test set
        
    Returns:
        The results from the estimators' 'evaluate' method
    """
    tf_transform_output = tft.TFTransformOutput(working_dir)
    
    run_config = tf.estimator.RunConfig()
    
    estimator = tf.estimator.LinearClassifier(
        feature_columns=get_feature_columns(tf_transform_output),\
        config=run_config,\
        loss_reduction=tf.losses.Reduction.SUM
    )
    
    # Fit the model using the default optimizer.
    train_input_fn = _make_training_input_fn(
        tf_transform_output=tf_transform_output, \
        transformed_examples=os.path.join(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'), \
        batch_size=TRAIN_BATCH_SIZE
    )
    estimator.train(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE)
    
    # Evaluate model on test dataset.
    eval_input_fn = _make_training_input_fn(
        tf_transform_output=tf_transform_output,\
        transformed_examples=os.path.join(working_dir, TRANSFORMED_TEST_DATA_FILEBASE + '*'),\
        batch_size=1
    )
    
    # Export the model.
    serving_input_fn = _make_serving_input_fn(tf_transform_output)
    exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR)
    estimator.export_saved_model(exported_model_dir, serving_input_fn)
    
    return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)

## 6. Put it together

In [13]:
import tempfile
temp = tempfile.gettempdir()

transform_data(train, test, temp)









Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:No assets to write.


INFO:tensorflow:No assets to write.


'Counter' object has no attribute 'name'


'Counter' object has no attribute 'name'


'Counter' object has no attribute 'name'


'Counter' object has no attribute 'name'


INFO:tensorflow:SavedModel written to: /tmp/tmpuab7d393/tftransform_tmp/92db7790ceee4996bbe4c97f8b94a5e9/saved_model.pb


INFO:tensorflow:SavedModel written to: /tmp/tmpuab7d393/tftransform_tmp/92db7790ceee4996bbe4c97f8b94a5e9/saved_model.pb


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:No assets to write.


INFO:tensorflow:No assets to write.


'Counter' object has no attribute 'name'


'Counter' object has no attribute 'name'


'Counter' object has no attribute 'name'


'Counter' object has no attribute 'name'


INFO:tensorflow:SavedModel written to: /tmp/tmpuab7d393/tftransform_tmp/c27516028afd48acaca188a84c01aad4/saved_model.pb


INFO:tensorflow:SavedModel written to: /tmp/tmpuab7d393/tftransform_tmp/c27516028afd48acaca188a84c01aad4/saved_model.pb










INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:Assets written to: /tmp/tmpuab7d393/tftransform_tmp/4efcf067ad1841728762eae652cf5a56/assets


INFO:tensorflow:Assets written to: /tmp/tmpuab7d393/tftransform_tmp/4efcf067ad1841728762eae652cf5a56/assets


INFO:tensorflow:SavedModel written to: /tmp/tmpuab7d393/tftransform_tmp/4efcf067ad1841728762eae652cf5a56/saved_model.pb


INFO:tensorflow:SavedModel written to: /tmp/tmpuab7d393/tftransform_tmp/4efcf067ad1841728762eae652cf5a56/saved_model.pb


value: "\n\014\n\nConst_11:0\022\tworkclass"



value: "\n\014\n\nConst_11:0\022\tworkclass"



value: "\n\014\n\nConst_13:0\022\teducation"



value: "\n\014\n\nConst_13:0\022\teducation"



value: "\n\014\n\nConst_15:0\022\016marital-status"



value: "\n\014\n\nConst_15:0\022\016marital-status"



value: "\n\014\n\nConst_17:0\022\noccupation"



value: "\n\014\n\nConst_17:0\022\noccupation"



value: "\n\014\n\nConst_19:0\022\014relationship"



value: "\n\014\n\nConst_19:0\022\014relationship"



value: "\n\014\n\nConst_21:0\022\004race"



value: "\n\014\n\nConst_21:0\022\004race"



value: "\n\014\n\nConst_23:0\022\003sex"



value: "\n\014\n\nConst_23:0\022\003sex"



value: "\n\014\n\nConst_25:0\022\016native-country"



value: "\n\014\n\nConst_25:0\022\016native-country"



INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


value: "\n\014\n\nConst_11:0\022\tworkclass"



value: "\n\014\n\nConst_11:0\022\tworkclass"



value: "\n\014\n\nConst_13:0\022\teducation"



value: "\n\014\n\nConst_13:0\022\teducation"



value: "\n\014\n\nConst_15:0\022\016marital-status"



value: "\n\014\n\nConst_15:0\022\016marital-status"



value: "\n\014\n\nConst_17:0\022\noccupation"



value: "\n\014\n\nConst_17:0\022\noccupation"



value: "\n\014\n\nConst_19:0\022\014relationship"



value: "\n\014\n\nConst_19:0\022\014relationship"



value: "\n\014\n\nConst_21:0\022\004race"



value: "\n\014\n\nConst_21:0\022\004race"



value: "\n\014\n\nConst_23:0\022\003sex"



value: "\n\014\n\nConst_23:0\022\003sex"



value: "\n\014\n\nConst_25:0\022\016native-country"



value: "\n\014\n\nConst_25:0\022\016native-country"



INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


value: "\n\014\n\nConst_11:0\022\tworkclass"



value: "\n\014\n\nConst_11:0\022\tworkclass"



value: "\n\014\n\nConst_13:0\022\teducation"



value: "\n\014\n\nConst_13:0\022\teducation"



value: "\n\014\n\nConst_15:0\022\016marital-status"



value: "\n\014\n\nConst_15:0\022\016marital-status"



value: "\n\014\n\nConst_17:0\022\noccupation"



value: "\n\014\n\nConst_17:0\022\noccupation"



value: "\n\014\n\nConst_19:0\022\014relationship"



value: "\n\014\n\nConst_19:0\022\014relationship"



value: "\n\014\n\nConst_21:0\022\004race"



value: "\n\014\n\nConst_21:0\022\004race"



value: "\n\014\n\nConst_23:0\022\003sex"



value: "\n\014\n\nConst_23:0\022\003sex"



value: "\n\014\n\nConst_25:0\022\016native-country"



value: "\n\014\n\nConst_25:0\022\016native-country"



INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [14]:
results = train_and_evaluate(temp)

INFO:tensorflow:vocabulary_size = 9 in workclass is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/workclass.


INFO:tensorflow:vocabulary_size = 9 in workclass is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/workclass.


INFO:tensorflow:vocabulary_size = 16 in education is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/education.


INFO:tensorflow:vocabulary_size = 16 in education is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/education.


INFO:tensorflow:vocabulary_size = 7 in marital-status is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/marital-status.


INFO:tensorflow:vocabulary_size = 7 in marital-status is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/marital-status.


INFO:tensorflow:vocabulary_size = 15 in occupation is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/occupation.


INFO:tensorflow:vocabulary_size = 15 in occupation is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/occupation.


INFO:tensorflow:vocabulary_size = 6 in relationship is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/relationship.


INFO:tensorflow:vocabulary_size = 6 in relationship is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/relationship.


INFO:tensorflow:vocabulary_size = 5 in race is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/race.


INFO:tensorflow:vocabulary_size = 5 in race is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/race.


INFO:tensorflow:vocabulary_size = 2 in sex is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/sex.


INFO:tensorflow:vocabulary_size = 2 in sex is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/sex.


INFO:tensorflow:vocabulary_size = 42 in native-country is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/native-country.


INFO:tensorflow:vocabulary_size = 42 in native-country is inferred from the number of elements in the vocabulary_file /tmp/transform_fn/assets/native-country.






INFO:tensorflow:Using config: {'_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_train_distribute': None, '_log_step_count_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_experimental_max_worker_delay_secs': None, '_is_chief': True, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_session_creation_timeout_secs': 7200, '_global_id_in_cluster': 0, '_device_fn': None, '_experimental_distribute': None, '_tf_random_seed': None, '_num_worker_replicas': 1, '_model_dir': '/tmp/tmpqcnug2wp', '_eval_distribute': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_protocol': None, '_master': '', '_evaluation_master': '', '_task_type': 'worker', '_keep_checkpoint_max': 5, '_cluster_spec': ClusterSpec({}), '_task_id': 0}


INFO:tensorflow:Using config: {'_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_train_distribute': None, '_log_step_count_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_experimental_max_worker_delay_secs': None, '_is_chief': True, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_session_creation_timeout_secs': 7200, '_global_id_in_cluster': 0, '_device_fn': None, '_experimental_distribute': None, '_tf_random_seed': None, '_num_worker_replicas': 1, '_model_dir': '/tmp/tmpqcnug2wp', '_eval_distribute': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_protocol': None, '_master': '', '_evaluation_master': '', '_task_type': 'worker', '_keep_checkpoint_max': 5, '_cluster_spec': ClusterSpec({}), '_task_id': 0}


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Please use `layer.add_weight` method instead.


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpqcnug2wp/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpqcnug2wp/model.ckpt.


INFO:tensorflow:loss = 88.72284, step = 0


INFO:tensorflow:loss = 88.72284, step = 0


INFO:tensorflow:global_step/sec: 87.322


INFO:tensorflow:global_step/sec: 87.322


INFO:tensorflow:loss = 38.656242, step = 100 (1.150 sec)


INFO:tensorflow:loss = 38.656242, step = 100 (1.150 sec)


INFO:tensorflow:global_step/sec: 127.261


INFO:tensorflow:global_step/sec: 127.261


INFO:tensorflow:loss = 45.32393, step = 200 (0.786 sec)


INFO:tensorflow:loss = 45.32393, step = 200 (0.786 sec)


INFO:tensorflow:global_step/sec: 125.32


INFO:tensorflow:global_step/sec: 125.32


INFO:tensorflow:loss = 38.318966, step = 300 (0.796 sec)


INFO:tensorflow:loss = 38.318966, step = 300 (0.796 sec)


INFO:tensorflow:global_step/sec: 125.33


INFO:tensorflow:global_step/sec: 125.33


INFO:tensorflow:loss = 44.863556, step = 400 (0.800 sec)


INFO:tensorflow:loss = 44.863556, step = 400 (0.800 sec)


INFO:tensorflow:global_step/sec: 126.121


INFO:tensorflow:global_step/sec: 126.121


INFO:tensorflow:loss = 45.530323, step = 500 (0.790 sec)


INFO:tensorflow:loss = 45.530323, step = 500 (0.790 sec)


INFO:tensorflow:global_step/sec: 125.155


INFO:tensorflow:global_step/sec: 125.155


INFO:tensorflow:loss = 47.035667, step = 600 (0.799 sec)


INFO:tensorflow:loss = 47.035667, step = 600 (0.799 sec)


INFO:tensorflow:global_step/sec: 128.005


INFO:tensorflow:global_step/sec: 128.005


INFO:tensorflow:loss = 48.154106, step = 700 (0.781 sec)


INFO:tensorflow:loss = 48.154106, step = 700 (0.781 sec)


INFO:tensorflow:global_step/sec: 122.33


INFO:tensorflow:global_step/sec: 122.33


INFO:tensorflow:loss = 50.074112, step = 800 (0.818 sec)


INFO:tensorflow:loss = 50.074112, step = 800 (0.818 sec)


INFO:tensorflow:global_step/sec: 126.055


INFO:tensorflow:global_step/sec: 126.055


INFO:tensorflow:loss = 54.9235, step = 900 (0.795 sec)


INFO:tensorflow:loss = 54.9235, step = 900 (0.795 sec)


INFO:tensorflow:global_step/sec: 125.9


INFO:tensorflow:global_step/sec: 125.9


INFO:tensorflow:loss = 39.088783, step = 1000 (0.793 sec)


INFO:tensorflow:loss = 39.088783, step = 1000 (0.793 sec)


INFO:tensorflow:global_step/sec: 126.024


INFO:tensorflow:global_step/sec: 126.024


INFO:tensorflow:loss = 28.14651, step = 1100 (0.793 sec)


INFO:tensorflow:loss = 28.14651, step = 1100 (0.793 sec)


INFO:tensorflow:global_step/sec: 124.681


INFO:tensorflow:global_step/sec: 124.681


INFO:tensorflow:loss = 41.9496, step = 1200 (0.802 sec)


INFO:tensorflow:loss = 41.9496, step = 1200 (0.802 sec)


INFO:tensorflow:global_step/sec: 124.346


INFO:tensorflow:global_step/sec: 124.346


INFO:tensorflow:loss = 45.35068, step = 1300 (0.804 sec)


INFO:tensorflow:loss = 45.35068, step = 1300 (0.804 sec)


INFO:tensorflow:global_step/sec: 127.217


INFO:tensorflow:global_step/sec: 127.217


INFO:tensorflow:loss = 37.159218, step = 1400 (0.786 sec)


INFO:tensorflow:loss = 37.159218, step = 1400 (0.786 sec)


INFO:tensorflow:global_step/sec: 126.632


INFO:tensorflow:global_step/sec: 126.632


INFO:tensorflow:loss = 32.8702, step = 1500 (0.789 sec)


INFO:tensorflow:loss = 32.8702, step = 1500 (0.789 sec)


INFO:tensorflow:global_step/sec: 125.711


INFO:tensorflow:global_step/sec: 125.711


INFO:tensorflow:loss = 36.118576, step = 1600 (0.796 sec)


INFO:tensorflow:loss = 36.118576, step = 1600 (0.796 sec)


INFO:tensorflow:global_step/sec: 127.773


INFO:tensorflow:global_step/sec: 127.773


INFO:tensorflow:loss = 36.046135, step = 1700 (0.785 sec)


INFO:tensorflow:loss = 36.046135, step = 1700 (0.785 sec)


INFO:tensorflow:global_step/sec: 123.636


INFO:tensorflow:global_step/sec: 123.636


INFO:tensorflow:loss = 45.813995, step = 1800 (0.807 sec)


INFO:tensorflow:loss = 45.813995, step = 1800 (0.807 sec)


INFO:tensorflow:global_step/sec: 127.886


INFO:tensorflow:global_step/sec: 127.886


INFO:tensorflow:loss = 45.326862, step = 1900 (0.782 sec)


INFO:tensorflow:loss = 45.326862, step = 1900 (0.782 sec)


INFO:tensorflow:global_step/sec: 128.242


INFO:tensorflow:global_step/sec: 128.242


INFO:tensorflow:loss = 40.309376, step = 2000 (0.780 sec)


INFO:tensorflow:loss = 40.309376, step = 2000 (0.780 sec)


INFO:tensorflow:global_step/sec: 125.617


INFO:tensorflow:global_step/sec: 125.617


INFO:tensorflow:loss = 39.72683, step = 2100 (0.796 sec)


INFO:tensorflow:loss = 39.72683, step = 2100 (0.796 sec)


INFO:tensorflow:global_step/sec: 127.951


INFO:tensorflow:global_step/sec: 127.951


INFO:tensorflow:loss = 33.298424, step = 2200 (0.782 sec)


INFO:tensorflow:loss = 33.298424, step = 2200 (0.782 sec)


INFO:tensorflow:global_step/sec: 126.711


INFO:tensorflow:global_step/sec: 126.711


INFO:tensorflow:loss = 41.40884, step = 2300 (0.789 sec)


INFO:tensorflow:loss = 41.40884, step = 2300 (0.789 sec)


INFO:tensorflow:global_step/sec: 126.959


INFO:tensorflow:global_step/sec: 126.959


INFO:tensorflow:loss = 49.03764, step = 2400 (0.788 sec)


INFO:tensorflow:loss = 49.03764, step = 2400 (0.788 sec)


INFO:tensorflow:global_step/sec: 127.57


INFO:tensorflow:global_step/sec: 127.57


INFO:tensorflow:loss = 43.981377, step = 2500 (0.785 sec)


INFO:tensorflow:loss = 43.981377, step = 2500 (0.785 sec)


INFO:tensorflow:global_step/sec: 127.548


INFO:tensorflow:global_step/sec: 127.548


INFO:tensorflow:loss = 38.759964, step = 2600 (0.782 sec)


INFO:tensorflow:loss = 38.759964, step = 2600 (0.782 sec)


INFO:tensorflow:global_step/sec: 126.924


INFO:tensorflow:global_step/sec: 126.924


INFO:tensorflow:loss = 46.141518, step = 2700 (0.788 sec)


INFO:tensorflow:loss = 46.141518, step = 2700 (0.788 sec)


INFO:tensorflow:global_step/sec: 126.505


INFO:tensorflow:global_step/sec: 126.505


INFO:tensorflow:loss = 39.496117, step = 2800 (0.790 sec)


INFO:tensorflow:loss = 39.496117, step = 2800 (0.790 sec)


INFO:tensorflow:global_step/sec: 122.706


INFO:tensorflow:global_step/sec: 122.706


INFO:tensorflow:loss = 37.894825, step = 2900 (0.815 sec)


INFO:tensorflow:loss = 37.894825, step = 2900 (0.815 sec)


INFO:tensorflow:global_step/sec: 124.449


INFO:tensorflow:global_step/sec: 124.449


INFO:tensorflow:loss = 56.28445, step = 3000 (0.803 sec)


INFO:tensorflow:loss = 56.28445, step = 3000 (0.803 sec)


INFO:tensorflow:global_step/sec: 126.959


INFO:tensorflow:global_step/sec: 126.959


INFO:tensorflow:loss = 47.050682, step = 3100 (0.788 sec)


INFO:tensorflow:loss = 47.050682, step = 3100 (0.788 sec)


INFO:tensorflow:global_step/sec: 126.705


INFO:tensorflow:global_step/sec: 126.705


INFO:tensorflow:loss = 38.82512, step = 3200 (0.790 sec)


INFO:tensorflow:loss = 38.82512, step = 3200 (0.790 sec)


INFO:tensorflow:global_step/sec: 126.852


INFO:tensorflow:global_step/sec: 126.852


INFO:tensorflow:loss = 45.804558, step = 3300 (0.788 sec)


INFO:tensorflow:loss = 45.804558, step = 3300 (0.788 sec)


INFO:tensorflow:global_step/sec: 128.779


INFO:tensorflow:global_step/sec: 128.779


INFO:tensorflow:loss = 42.49257, step = 3400 (0.777 sec)


INFO:tensorflow:loss = 42.49257, step = 3400 (0.777 sec)


INFO:tensorflow:global_step/sec: 126


INFO:tensorflow:global_step/sec: 126


INFO:tensorflow:loss = 42.593792, step = 3500 (0.795 sec)


INFO:tensorflow:loss = 42.593792, step = 3500 (0.795 sec)


INFO:tensorflow:global_step/sec: 123.329


INFO:tensorflow:global_step/sec: 123.329


INFO:tensorflow:loss = 42.194607, step = 3600 (0.811 sec)


INFO:tensorflow:loss = 42.194607, step = 3600 (0.811 sec)


INFO:tensorflow:global_step/sec: 125.945


INFO:tensorflow:global_step/sec: 125.945


INFO:tensorflow:loss = 34.91106, step = 3700 (0.792 sec)


INFO:tensorflow:loss = 34.91106, step = 3700 (0.792 sec)


INFO:tensorflow:global_step/sec: 126.185


INFO:tensorflow:global_step/sec: 126.185


INFO:tensorflow:loss = 43.67619, step = 3800 (0.791 sec)


INFO:tensorflow:loss = 43.67619, step = 3800 (0.791 sec)


INFO:tensorflow:global_step/sec: 126.255


INFO:tensorflow:global_step/sec: 126.255


INFO:tensorflow:loss = 39.97071, step = 3900 (0.792 sec)


INFO:tensorflow:loss = 39.97071, step = 3900 (0.792 sec)


INFO:tensorflow:global_step/sec: 123.559


INFO:tensorflow:global_step/sec: 123.559


INFO:tensorflow:loss = 48.692913, step = 4000 (0.809 sec)


INFO:tensorflow:loss = 48.692913, step = 4000 (0.809 sec)


INFO:tensorflow:Saving checkpoints for 4071 into /tmp/tmpqcnug2wp/model.ckpt.


INFO:tensorflow:Saving checkpoints for 4071 into /tmp/tmpqcnug2wp/model.ckpt.


INFO:tensorflow:Loss for final step: 41.01284.


INFO:tensorflow:Loss for final step: 41.01284.


value: "\n\014\n\nConst_11:0\022\tworkclass"



value: "\n\014\n\nConst_11:0\022\tworkclass"



value: "\n\014\n\nConst_13:0\022\teducation"



value: "\n\014\n\nConst_13:0\022\teducation"



value: "\n\014\n\nConst_15:0\022\016marital-status"



value: "\n\014\n\nConst_15:0\022\016marital-status"



value: "\n\014\n\nConst_17:0\022\noccupation"



value: "\n\014\n\nConst_17:0\022\noccupation"



value: "\n\014\n\nConst_19:0\022\014relationship"



value: "\n\014\n\nConst_19:0\022\014relationship"



value: "\n\014\n\nConst_21:0\022\004race"



value: "\n\014\n\nConst_21:0\022\004race"



value: "\n\014\n\nConst_23:0\022\003sex"



value: "\n\014\n\nConst_23:0\022\003sex"



value: "\n\014\n\nConst_25:0\022\016native-country"



value: "\n\014\n\nConst_25:0\022\016native-country"



INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']


INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']


INFO:tensorflow:Signatures INCLUDED in export for Regress: ['regression']


INFO:tensorflow:Signatures INCLUDED in export for Regress: ['regression']


INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']


INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']


INFO:tensorflow:Signatures INCLUDED in export for Train: None


INFO:tensorflow:Signatures INCLUDED in export for Train: None


INFO:tensorflow:Signatures INCLUDED in export for Eval: None


INFO:tensorflow:Signatures INCLUDED in export for Eval: None


INFO:tensorflow:Restoring parameters from /tmp/tmpqcnug2wp/model.ckpt-4071


INFO:tensorflow:Restoring parameters from /tmp/tmpqcnug2wp/model.ckpt-4071


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:Assets added to graph.


INFO:tensorflow:Assets written to: /tmp/exported_model_dir/temp-b'1588974794'/assets


INFO:tensorflow:Assets written to: /tmp/exported_model_dir/temp-b'1588974794'/assets


INFO:tensorflow:SavedModel written to: /tmp/exported_model_dir/temp-b'1588974794'/saved_model.pb


INFO:tensorflow:SavedModel written to: /tmp/exported_model_dir/temp-b'1588974794'/saved_model.pb


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2020-05-08T21:53:16Z


INFO:tensorflow:Starting evaluation at 2020-05-08T21:53:16Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from /tmp/tmpqcnug2wp/model.ckpt-4071


INFO:tensorflow:Restoring parameters from /tmp/tmpqcnug2wp/model.ckpt-4071


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Evaluation [1628/16281]


INFO:tensorflow:Evaluation [1628/16281]


INFO:tensorflow:Evaluation [3256/16281]


INFO:tensorflow:Evaluation [3256/16281]


INFO:tensorflow:Evaluation [4884/16281]


INFO:tensorflow:Evaluation [4884/16281]


INFO:tensorflow:Evaluation [6512/16281]


INFO:tensorflow:Evaluation [6512/16281]


INFO:tensorflow:Evaluation [8140/16281]


INFO:tensorflow:Evaluation [8140/16281]


INFO:tensorflow:Evaluation [9768/16281]


INFO:tensorflow:Evaluation [9768/16281]


INFO:tensorflow:Evaluation [11396/16281]


INFO:tensorflow:Evaluation [11396/16281]


INFO:tensorflow:Evaluation [13024/16281]


INFO:tensorflow:Evaluation [13024/16281]


INFO:tensorflow:Evaluation [14652/16281]


INFO:tensorflow:Evaluation [14652/16281]


INFO:tensorflow:Evaluation [16280/16281]


INFO:tensorflow:Evaluation [16280/16281]


INFO:tensorflow:Evaluation [16281/16281]


INFO:tensorflow:Evaluation [16281/16281]


INFO:tensorflow:Inference Time : 123.59763s


INFO:tensorflow:Inference Time : 123.59763s


INFO:tensorflow:Finished evaluation at 2020-05-08-21:55:20


INFO:tensorflow:Finished evaluation at 2020-05-08-21:55:20


INFO:tensorflow:Saving dict for global step 4071: accuracy = 0.8511762, accuracy_baseline = 0.76377374, auc = 0.9017907, auc_precision_recall = 0.96716326, average_loss = 0.32410476, global_step = 4071, label/mean = 0.76377374, loss = 0.32410476, precision = 0.8810901, prediction/mean = 0.7600534, recall = 0.93075997


INFO:tensorflow:Saving dict for global step 4071: accuracy = 0.8511762, accuracy_baseline = 0.76377374, auc = 0.9017907, auc_precision_recall = 0.96716326, average_loss = 0.32410476, global_step = 4071, label/mean = 0.76377374, loss = 0.32410476, precision = 0.8810901, prediction/mean = 0.7600534, recall = 0.93075997


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 4071: /tmp/tmpqcnug2wp/model.ckpt-4071


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 4071: /tmp/tmpqcnug2wp/model.ckpt-4071


In [15]:
pprint.pprint(results)

{'accuracy': 0.8511762,
 'accuracy_baseline': 0.76377374,
 'auc': 0.9017907,
 'auc_precision_recall': 0.96716326,
 'average_loss': 0.32410476,
 'global_step': 4071,
 'label/mean': 0.76377374,
 'loss': 0.32410476,
 'precision': 0.8810901,
 'prediction/mean': 0.7600534,
 'recall': 0.93075997}
