<!-- TODO(ccy): split this notebook into (1) a beginner friendly notebook that
     avoids directly calling into TFX libraries and (2) an advanced notebook
     or notebooks that delve more deeply into each component, along with their
     underlying libraries. -->
# TFX Iterative Development Example
This notebook demonstrates how to use Jupyter notebooks for TFX iterative development.  Here, we walk through the Chicago Taxi example in an interactive Jupyter notebook.

Note: this notebook along with its associated APIs are **experimental** and are
in active development.  Major changes in functionality, behavior and
presentation are expected.

## Setup
First, download data, import modules and set up paths.

### Import packages
We import necessary packages, including standard TFX component classes.

In [2]:
import os
import tempfile
import urllib

import tfx
from tfx.components.evaluator.component import Evaluator
from tfx.components.example_gen.csv_example_gen.component import CsvExampleGen
from tfx.components.example_validator.component import ExampleValidator
from tfx.components.model_validator.component import ModelValidator
from tfx.components.pusher.component import Pusher
from tfx.components.schema_gen.component import SchemaGen
from tfx.components.statistics_gen.component import StatisticsGen
from tfx.components.trainer.component import Trainer
from tfx.components.transform.component import Transform
from tfx.orchestration.interactive.interactive_context import InteractiveContext
from tfx.proto import evaluator_pb2
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
from tfx.proto.evaluator_pb2 import SingleSlicingSpec
from tfx.utils.dsl_utils import csv_input

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  'Some syntactic constructs of Python 3 are not yet fully supported by '



Instructions for updating:
from_feature_spec is a deprecated, use schema_utils.schema_from_feature_spec


## Create the InteractiveContext
We now create the interactive context.

In [21]:
import ml_metadata

In [22]:
ml_metadata.__version__

'0.15.0dev'

In [26]:
# Here, we create an InteractiveContext using default parameters. This will
# use a temporary directory with an ephemeral ML Metadata database instance.
# To use your own pipeline root or database, the optional properties
# `pipeline_root` and `metadata_connection_config` may be passed to
# InteractiveContext.

import ml_metadata
from ml_metadata.metadata_store import metadata_store
from ml_metadata.proto import metadata_store_pb2

_connection_config = metadata_store_pb2.ConnectionConfig()
_connection_config.mysql.host = '35.192.177.172'
_connection_config.mysql.port = 3306
_connection_config.mysql.database = 'metadb'
_connection_config.mysql.user = 'root'
_connection_config.mysql.password = ''

_pipeline_name = 'chicago_taxi'
_pipeline_root = 'gs://caip-tfx/interactive/' + _pipeline_name

context = InteractiveContext(_pipeline_name, _pipeline_root, _connection_config)

## Run TFX components interactively
Next, we construct TFX components and run each one interactively using within the interactive session to obtain `ExecutionResult` objects.

### ExampleGen
`ExampleGen` brings data into the TFX pipeline.

In [27]:
# Use the packaged CSV input data.
_data_root = '/home/jarekk/datasets/'
examples = csv_input(_data_root)


In [28]:
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input_base=examples)
context.run(example_gen)

INFO:tensorflow:Run driver for CsvExampleGen


INFO:tensorflow:Run driver for CsvExampleGen


INFO:tensorflow:Created run context chicago_taxi.2019-10-07T17:38:27.202905.


INFO:tensorflow:Created run context chicago_taxi.2019-10-07T17:38:27.202905.


INFO:tensorflow:ID of run context chicago_taxi.2019-10-07T17:38:27.202905 is 56.


INFO:tensorflow:ID of run context chicago_taxi.2019-10-07T17:38:27.202905 is 56.


INFO:tensorflow:Processing input /home/jarekk/datasets/.


INFO:tensorflow:Processing input /home/jarekk/datasets/.


INFO:tensorflow:single_input Artifact(type_name: ExternalPath, uri: /home/jarekk/datasets/, split: , id: 0).


INFO:tensorflow:single_input Artifact(type_name: ExternalPath, uri: /home/jarekk/datasets/, split: , id: 0).


INFO:tensorflow:single_input.artifact uri: "/home/jarekk/datasets/"
properties {
  key: "split"
  value {
    string_value: ""
  }
}
properties {
  key: "type_name"
  value {
    string_value: "ExternalPath"
  }
}
.


INFO:tensorflow:single_input.artifact uri: "/home/jarekk/datasets/"
properties {
  key: "split"
  value {
    string_value: ""
  }
}
properties {
  key: "type_name"
  value {
    string_value: "ExternalPath"
  }
}
.


INFO:tensorflow:latest_artifact id: 59
type_id: 20
uri: "/home/jarekk/datasets/"
properties {
  key: "split"
  value {
    string_value: ""
  }
}
properties {
  key: "state"
  value {
    string_value: "published"
  }
}
properties {
  key: "type_name"
  value {
    string_value: "ExternalPath"
  }
}
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:1922668,xor_checksum:1570466499,sum_checksum:1570466499"
  }
}
.


INFO:tensorflow:latest_artifact id: 59
type_id: 20
uri: "/home/jarekk/datasets/"
properties {
  key: "split"
  value {
    string_value: ""
  }
}
properties {
  key: "state"
  value {
    string_value: "published"
  }
}
properties {
  key: "type_name"
  value {
    string_value: "ExternalPath"
  }
}
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:1922668,xor_checksum:1570466499,sum_checksum:1570466499"
  }
}
.


INFO:tensorflow:type(latest_artifact) <class 'ml_metadata.proto.metadata_store_pb2.Artifact'>.


INFO:tensorflow:type(latest_artifact) <class 'ml_metadata.proto.metadata_store_pb2.Artifact'>.


INFO:tensorflow:Resolved input artifacts are: {'input_base': [Artifact(type_name: ExternalPath, uri: /home/jarekk/datasets/, split: , id: 59)]}


INFO:tensorflow:Resolved input artifacts are: {'input_base': [Artifact(type_name: ExternalPath, uri: /home/jarekk/datasets/, split: , id: 59)]}


INFO:tensorflow:Prepared EXECUTION:
 type_id: 21
properties {
  key: "component_id"
  value {
    string_value: "CsvExampleGen"
  }
}
properties {
  key: "custom_config"
  value {
    string_value: "None"
  }
}
properties {
  key: "input_config"
  value {
    string_value: "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}"
  }
}
properties {
  key: "output_config"
  value {
    string_value: "{\n  \"splitConfig\": {\n    \"splits\": [\n      {\n        \"hashBuckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hashBuckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}"
  }
}
properties {
  key: "pipeline_name"
  value {
    string_value: "chicago_taxi"
  }
}
properties {
  key: "pipeline_root"
  value {
    string_value: "gs://caip-tfx/interactive/chicago_taxi"
  }
}
properties {
  key: "run_id"
  value {
    string_value: "2019-10-07T17:38:27.202905"
  }
}
properties {
  key: "state"
  value {
    stri

INFO:tensorflow:Prepared EXECUTION:
 type_id: 21
properties {
  key: "component_id"
  value {
    string_value: "CsvExampleGen"
  }
}
properties {
  key: "custom_config"
  value {
    string_value: "None"
  }
}
properties {
  key: "input_config"
  value {
    string_value: "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}"
  }
}
properties {
  key: "output_config"
  value {
    string_value: "{\n  \"splitConfig\": {\n    \"splits\": [\n      {\n        \"hashBuckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hashBuckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}"
  }
}
properties {
  key: "pipeline_name"
  value {
    string_value: "chicago_taxi"
  }
}
properties {
  key: "pipeline_root"
  value {
    string_value: "gs://caip-tfx/interactive/chicago_taxi"
  }
}
properties {
  key: "run_id"
  value {
    string_value: "2019-10-07T17:38:27.202905"
  }
}
properties {
  key: "state"
  value {
    stri

INFO:tensorflow:Execution id of the upcoming component execution is 55


INFO:tensorflow:Execution id of the upcoming component execution is 55


INFO:tensorflow:Checking previous run for execution_type_name tfx.components.example_gen.csv_example_gen.component.CsvExampleGen and input_artifacts {'input_base': [Artifact(type_name: ExternalPath, uri: /home/jarekk/datasets/, split: , id: 59)]}


INFO:tensorflow:Checking previous run for execution_type_name tfx.components.example_gen.csv_example_gen.component.CsvExampleGen and input_artifacts {'input_base': [Artifact(type_name: ExternalPath, uri: /home/jarekk/datasets/, split: , id: 59)]}


INFO:tensorflow:Prepared EXECUTION:
 type_id: 21
properties {
  key: "component_id"
  value {
    string_value: "CsvExampleGen"
  }
}
properties {
  key: "custom_config"
  value {
    string_value: "None"
  }
}
properties {
  key: "input_config"
  value {
    string_value: "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}"
  }
}
properties {
  key: "output_config"
  value {
    string_value: "{\n  \"splitConfig\": {\n    \"splits\": [\n      {\n        \"hashBuckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hashBuckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}"
  }
}
properties {
  key: "pipeline_name"
  value {
    string_value: "chicago_taxi"
  }
}
properties {
  key: "pipeline_root"
  value {
    string_value: "gs://caip-tfx/interactive/chicago_taxi"
  }
}
properties {
  key: "run_id"
  value {
    string_value: "2019-10-07T17:38:27.202905"
  }
}
properties {
  key: "state"
  value {
    stri

INFO:tensorflow:Prepared EXECUTION:
 type_id: 21
properties {
  key: "component_id"
  value {
    string_value: "CsvExampleGen"
  }
}
properties {
  key: "custom_config"
  value {
    string_value: "None"
  }
}
properties {
  key: "input_config"
  value {
    string_value: "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}"
  }
}
properties {
  key: "output_config"
  value {
    string_value: "{\n  \"splitConfig\": {\n    \"splits\": [\n      {\n        \"hashBuckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hashBuckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}"
  }
}
properties {
  key: "pipeline_name"
  value {
    string_value: "chicago_taxi"
  }
}
properties {
  key: "pipeline_root"
  value {
    string_value: "gs://caip-tfx/interactive/chicago_taxi"
  }
}
properties {
  key: "run_id"
  value {
    string_value: "2019-10-07T17:38:27.202905"
  }
}
properties {
  key: "state"
  value {
    stri

INFO:tensorflow:No execution matching type id and input artifacts found


INFO:tensorflow:No execution matching type id and input artifacts found


INFO:tensorflow:Cached results not found, move on to new execution


INFO:tensorflow:Cached results not found, move on to new execution


INFO:tensorflow:Creating output artifact uri gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/ as directory


INFO:tensorflow:Creating output artifact uri gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/ as directory


INFO:tensorflow:Creating output artifact uri gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/ as directory


INFO:tensorflow:Creating output artifact uri gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/ as directory


INFO:tensorflow:Output artifacts skeleton for the upcoming execution are: {'examples': [Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/, split: train, id: 0), Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/, split: eval, id: 0)]}


INFO:tensorflow:Output artifacts skeleton for the upcoming execution are: {'examples': [Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/, split: train, id: 0), Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/, split: eval, id: 0)]}


INFO:tensorflow:Execution properties for the upcoming execution are: {'input_config': '{\n  "splits": [\n    {\n      "name": "single_split",\n      "pattern": "*"\n    }\n  ]\n}', 'output_config': '{\n  "splitConfig": {\n    "splits": [\n      {\n        "hashBuckets": 2,\n        "name": "train"\n      },\n      {\n        "hashBuckets": 1,\n        "name": "eval"\n      }\n    ]\n  }\n}', 'custom_config': None}


INFO:tensorflow:Execution properties for the upcoming execution are: {'input_config': '{\n  "splits": [\n    {\n      "name": "single_split",\n      "pattern": "*"\n    }\n  ]\n}', 'output_config': '{\n  "splitConfig": {\n    "splits": [\n      {\n        "hashBuckets": 2,\n        "name": "train"\n      },\n      {\n        "hashBuckets": 1,\n        "name": "eval"\n      }\n    ]\n  }\n}', 'custom_config': None}


INFO:tensorflow:Run executor for CsvExampleGen


INFO:tensorflow:Run executor for CsvExampleGen


INFO:tensorflow:Starting Executor execution.


INFO:tensorflow:Starting Executor execution.


INFO:tensorflow:Inputs for Executor is: {"input_base": [{"artifact": {"id": "59", "typeId": "20", "uri": "/home/jarekk/datasets/", "properties": {"split": {"stringValue": ""}, "state": {"stringValue": "published"}, "type_name": {"stringValue": "ExternalPath"}}, "customProperties": {"input_fingerprint": {"stringValue": "split:single_split,num_files:1,total_bytes:1922668,xor_checksum:1570466499,sum_checksum:1570466499"}}}, "artifact_type": {"name": "ExternalPath", "properties": {"name": "STRING", "type_name": "STRING", "state": "STRING", "producer_component": "STRING", "span": "INT", "pipeline_name": "STRING", "split": "STRING"}}}]}


INFO:tensorflow:Inputs for Executor is: {"input_base": [{"artifact": {"id": "59", "typeId": "20", "uri": "/home/jarekk/datasets/", "properties": {"split": {"stringValue": ""}, "state": {"stringValue": "published"}, "type_name": {"stringValue": "ExternalPath"}}, "customProperties": {"input_fingerprint": {"stringValue": "split:single_split,num_files:1,total_bytes:1922668,xor_checksum:1570466499,sum_checksum:1570466499"}}}, "artifact_type": {"name": "ExternalPath", "properties": {"name": "STRING", "type_name": "STRING", "state": "STRING", "producer_component": "STRING", "span": "INT", "pipeline_name": "STRING", "split": "STRING"}}}]}


INFO:tensorflow:Outputs for Executor is: {"examples": [{"artifact": {"uri": "gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/", "properties": {"split": {"stringValue": "train"}, "producer_component": {"stringValue": "CsvExampleGen"}, "pipeline_name": {"stringValue": "chicago_taxi"}, "name": {"stringValue": "examples"}, "type_name": {"stringValue": "ExamplesPath"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "state": "STRING", "producer_component": "STRING", "span": "INT", "pipeline_name": "STRING", "split": "STRING"}}}, {"artifact": {"uri": "gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/", "properties": {"producer_component": {"stringValue": "CsvExampleGen"}, "pipeline_name": {"stringValue": "chicago_taxi"}, "name": {"stringValue": "examples"}, "type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "eval"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"na

INFO:tensorflow:Outputs for Executor is: {"examples": [{"artifact": {"uri": "gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/", "properties": {"split": {"stringValue": "train"}, "producer_component": {"stringValue": "CsvExampleGen"}, "pipeline_name": {"stringValue": "chicago_taxi"}, "name": {"stringValue": "examples"}, "type_name": {"stringValue": "ExamplesPath"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"name": "STRING", "type_name": "STRING", "state": "STRING", "producer_component": "STRING", "span": "INT", "pipeline_name": "STRING", "split": "STRING"}}}, {"artifact": {"uri": "gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/", "properties": {"producer_component": {"stringValue": "CsvExampleGen"}, "pipeline_name": {"stringValue": "chicago_taxi"}, "name": {"stringValue": "examples"}, "type_name": {"stringValue": "ExamplesPath"}, "split": {"stringValue": "eval"}}}, "artifact_type": {"name": "ExamplesPath", "properties": {"na

INFO:tensorflow:Execution properties for Executor is: {"input_config": "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}", "output_config": "{\n  \"splitConfig\": {\n    \"splits\": [\n      {\n        \"hashBuckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hashBuckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}", "custom_config": null}


INFO:tensorflow:Execution properties for Executor is: {"input_config": "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}", "output_config": "{\n  \"splitConfig\": {\n    \"splits\": [\n      {\n        \"hashBuckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hashBuckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}", "custom_config": null}


INFO:tensorflow:Generating examples.


INFO:tensorflow:Generating examples.


INFO:tensorflow:Processing input csv data /home/jarekk/datasets/* to TFExample.


INFO:tensorflow:Processing input csv data /home/jarekk/datasets/* to TFExample.


INFO:tensorflow:Examples generated.


INFO:tensorflow:Examples generated.


INFO:tensorflow:Run publisher for CsvExampleGen


INFO:tensorflow:Run publisher for CsvExampleGen


INFO:tensorflow:Whether cached results are used: False


INFO:tensorflow:Whether cached results are used: False


INFO:tensorflow:Execution id: 55


INFO:tensorflow:Execution id: 55


INFO:tensorflow:Inputs: {'input_base': [Artifact(type_name: ExternalPath, uri: /home/jarekk/datasets/, split: , id: 59)]}


INFO:tensorflow:Inputs: {'input_base': [Artifact(type_name: ExternalPath, uri: /home/jarekk/datasets/, split: , id: 59)]}


INFO:tensorflow:Outputs: {'examples': [Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/, split: train, id: 0), Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/, split: eval, id: 0)]}


INFO:tensorflow:Outputs: {'examples': [Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/, split: train, id: 0), Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/, split: eval, id: 0)]}


INFO:tensorflow:Publishing execution id: 55
type_id: 21
properties {
  key: "component_id"
  value {
    string_value: "CsvExampleGen"
  }
}
properties {
  key: "custom_config"
  value {
    string_value: "None"
  }
}
properties {
  key: "input_config"
  value {
    string_value: "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}"
  }
}
properties {
  key: "output_config"
  value {
    string_value: "{\n  \"splitConfig\": {\n    \"splits\": [\n      {\n        \"hashBuckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hashBuckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}"
  }
}
properties {
  key: "pipeline_name"
  value {
    string_value: "chicago_taxi"
  }
}
properties {
  key: "pipeline_root"
  value {
    string_value: "gs://caip-tfx/interactive/chicago_taxi"
  }
}
properties {
  key: "run_id"
  value {
    string_value: "2019-10-07T17:38:27.202905"
  }
}
properties {
  key: "state"
  value {
 

INFO:tensorflow:Publishing execution id: 55
type_id: 21
properties {
  key: "component_id"
  value {
    string_value: "CsvExampleGen"
  }
}
properties {
  key: "custom_config"
  value {
    string_value: "None"
  }
}
properties {
  key: "input_config"
  value {
    string_value: "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}"
  }
}
properties {
  key: "output_config"
  value {
    string_value: "{\n  \"splitConfig\": {\n    \"splits\": [\n      {\n        \"hashBuckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hashBuckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}"
  }
}
properties {
  key: "pipeline_name"
  value {
    string_value: "chicago_taxi"
  }
}
properties {
  key: "pipeline_root"
  value {
    string_value: "gs://caip-tfx/interactive/chicago_taxi"
  }
}
properties {
  key: "run_id"
  value {
    string_value: "2019-10-07T17:38:27.202905"
  }
}
properties {
  key: "state"
  value {
 

INFO:tensorflow:Published execution with final outputs {'examples': [Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/, split: train, id: 60), Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/, split: eval, id: 61)]}


INFO:tensorflow:Published execution with final outputs {'examples': [Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/, split: train, id: 60), Artifact(type_name: ExamplesPath, uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/, split: eval, id: 61)]}


0,1
.execution_id,55
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } CsvExampleGen at 0x7f366dd04588.inputs['input_base'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExternalPath' (1 artifact) at 0x7f366dd044a8.type_nameExternalPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExamplesPath' (2 artifacts) at 0x7f366dd042b0.type_nameExamplesPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain[1] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval.exec_properties['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""splitConfig"": {  ""splits"": [  {  ""hashBuckets"": 2,  ""name"": ""train""  },  {  ""hashBuckets"": 1,  ""name"": ""eval""  }  ]  } }['custom_config']None"
.component.inputs,['input_base'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExternalPath' (1 artifact) at 0x7f366dd044a8.type_nameExternalPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split
.component.outputs,['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExamplesPath' (2 artifacts) at 0x7f366dd042b0.type_nameExamplesPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain[1] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval

0,1
.inputs,['input_base'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExternalPath' (1 artifact) at 0x7f366dd044a8.type_nameExternalPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split
.outputs,['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExamplesPath' (2 artifacts) at 0x7f366dd042b0.type_nameExamplesPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain[1] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval
.exec_properties,"['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""splitConfig"": {  ""splits"": [  {  ""hashBuckets"": 2,  ""name"": ""train""  },  {  ""hashBuckets"": 1,  ""name"": ""eval""  }  ]  } }['custom_config']None"

0,1
['input_base'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExternalPath' (1 artifact) at 0x7f366dd044a8.type_nameExternalPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split

0,1
.type_name,ExternalPath
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split

0,1
.type_name,ExternalPath
.uri,/home/jarekk/datasets/
.span,0
.split,

0,1
['examples'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExamplesPath' (2 artifacts) at 0x7f366dd042b0.type_nameExamplesPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain[1] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval

0,1
.type_name,ExamplesPath
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain[1] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain
[1],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval

0,1
.type_name,ExamplesPath
.uri,gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/
.span,0
.split,train

0,1
.type_name,ExamplesPath
.uri,gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/
.span,0
.split,eval

0,1
['input_config'],"{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }"
['output_config'],"{  ""splitConfig"": {  ""splits"": [  {  ""hashBuckets"": 2,  ""name"": ""train""  },  {  ""hashBuckets"": 1,  ""name"": ""eval""  }  ]  } }"
['custom_config'],

0,1
['input_base'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExternalPath' (1 artifact) at 0x7f366dd044a8.type_nameExternalPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split

0,1
.type_name,ExternalPath
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExternalPath' (uri: /home/jarekk/datasets/) at 0x7f366dc1d080.type_nameExternalPath.uri/home/jarekk/datasets/.span0.split

0,1
.type_name,ExternalPath
.uri,/home/jarekk/datasets/
.span,0
.split,

0,1
['examples'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExamplesPath' (2 artifacts) at 0x7f366dd042b0.type_nameExamplesPath._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain[1] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval

0,1
.type_name,ExamplesPath
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain[1] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/) at 0x7f366dd04240.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/.span0.splittrain
[1],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExamplesPath' (uri: gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/) at 0x7f366dd044e0.type_nameExamplesPath.urigs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/.span0.spliteval

0,1
.type_name,ExamplesPath
.uri,gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/train/
.span,0
.split,train

0,1
.type_name,ExamplesPath
.uri,gs://caip-tfx/interactive/chicago_taxi/CsvExampleGen/examples/55/eval/
.span,0
.split,eval


### StatisticsGen (using Tensorflow Data Validation)
`StatisticsGen` computes statistics for visualization and example validation. This uses the [Tensorflow Data Validation](https://www.tensorflow.org/tfx/data_validation/get_started) library.

#### Run TFDV statistics computation using the StatisticsGen component

In [None]:
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(
    input_data=example_gen.outputs['examples'])
context.run(statistics_gen)

#### Import TFDV and visualize the statistics result

In [None]:
# Import TFDV and get the train statistics path.
import tensorflow_data_validation as tfdv
from tfx.types.artifact_utils import get_split_uri
artifact_list = statistics_gen.outputs['output'].get()
train_artifact_uri = get_split_uri(artifact_list, 'train')
train_stats_path = os.path.join(train_artifact_uri, 'stats_tfrecord')

In [None]:
# Load statistics and visualize training data stats.
train_stats = tfdv.load_statistics(train_stats_path)
tfdv.visualize_statistics(train_stats)

### SchemaGen (using Tensorflow Data Validation)
`SchemaGen` generates a schema for your data based on computed statistics. This component also uses the [Tensorflow Data Validation](https://www.tensorflow.org/tfx/data_validation/get_started) library.

#### Run TFDV schema inference using the SchemaGen component

In [None]:
# Generates schema based on statistics files.
infer_schema = SchemaGen(
    stats=statistics_gen.outputs['output'],
    infer_feature_shape=False)
context.run(infer_schema)

#### Visualize the inferred schema

In [None]:
# Get the schema path.
schema_dir = infer_schema.outputs['output'].get()[0].uri
schema_path = os.path.join(schema_dir, 'schema.pbtxt')

In [None]:
# Load and visualize the generated schema.
schema = tfdv.load_schema_text(schema_path)
tfdv.display_schema(schema)

### ExampleValidator
`ExampleValidator` performs anomaly detection based on computed statistics and your data schema.

#### Run TFDV data validation using the ExampleValidation component

In [None]:
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
    stats=statistics_gen.outputs['output'],
    schema=infer_schema.outputs['output'])
context.run(validate_stats)

#### Visualize the detected anomalies

In [None]:
# Get the validation path.
validation_dir = validate_stats.outputs['output'].get()[0].uri
anomalies_path = os.path.join(validation_dir, 'anomalies.pbtxt')

In [None]:
# Load and visualize the anomalies.
anomalies = tfdv.load_anomalies_text(anomalies_path)
tfdv.display_anomalies(anomalies)

### Transform
`Transform` performs data transformations and feature engineering which are kept in sync for training and serving.

#### Run the Transform component

In [None]:
# Performs transformations and feature engineering in training and serving.
transform = Transform(
    input_data=example_gen.outputs['examples'],
    schema=infer_schema.outputs['output'],
    module_file=_taxi_module_file)
context.run(transform)

### Trainer
`Trainer` trains your custom model using TF-Learn.

In [None]:
# Uses user-provided Python function that implements a model using TF-Learn.
trainer = Trainer(
    module_file=_taxi_module_file,
    transformed_examples=transform.outputs['transformed_examples'],
    schema=infer_schema.outputs['output'],
    transform_output=transform.outputs['transform_output'],
    train_args=trainer_pb2.TrainArgs(num_steps=10000),
    eval_args=trainer_pb2.EvalArgs(num_steps=5000))
context.run(trainer)

### Evaluator (using Tensorflow Model Analysis)
The `Evaluator` computes evaluation statistics over features of your model using [Tensorflow Model Analysis](https://www.tensorflow.org/tfx/model_analysis/get_started). In this section, we run TFMA in our TFX pipeline and then visualize the results to analyze the performance of our model.

#### Run TFMA using the Evaluator component

Here, we first define slicing specs for analyzing our data. Next, we run TFMA using these specs to generate results.

In [None]:
# An empty slice spec means the overall slice, that is, the whole dataset.
OVERALL_SLICE_SPEC = evaluator_pb2.SingleSlicingSpec()

# Data can be sliced along a feature column
# In this case, data is sliced along feature column trip_start_hour.
FEATURE_COLUMN_SLICE_SPEC = evaluator_pb2.SingleSlicingSpec(
    column_for_slicing=['trip_start_hour'])

# Data can be sliced by crossing feature columns
# In this case, slices are computed for trip_start_day x trip_start_month.
FEATURE_COLUMN_CROSS_SPEC = evaluator_pb2.SingleSlicingSpec(
    column_for_slicing=['trip_start_day', 'trip_start_month'])

ALL_SPECS = [
    OVERALL_SLICE_SPEC,
    FEATURE_COLUMN_SLICE_SPEC,
    FEATURE_COLUMN_CROSS_SPEC,
]

In [None]:
# Use TFMA to compute a evaluation statistics over features of a model.
model_analyzer = Evaluator(
    examples=example_gen.outputs['examples'],
    model_exports=trainer.outputs['output'],
    feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(
        specs=ALL_SPECS
    ))
context.run(model_analyzer)

#### Get the TFMA output result path

In [None]:
PATH_TO_RESULT = model_analyzer.outputs['output'].get()[0].uri

#### Import TFMA and load the result

In [None]:
import tensorflow_model_analysis as tfma
tfma_result = tfma.load_eval_result(PATH_TO_RESULT)

#### Visualization: Slicing Metrics

To see the slices, either use the name of the column (by setting slicing_column) or provide a tfma.slicer.SingleSliceSpec (by setting slicing_spec). If neither is provided, an overall visualization will be displayed.

The default visualization is the **slice overview** when the number of slices is small. It shows the value of a metric for each slice, sorted by the another metric. It is also possible to set a threshold to filter out slices with smaller weights.

This view also supports the **metrics histogram** as an alternative visualization. It is also the default view when the number of slices is large. The results will be divided into buckets and the number of slices / total weights / both can be visualized. Slices with small weights can be filtered out by setting the threshold. Further filtering can be applied by dragging the grey band. To reset the range, double click the band. Filtering can be used to remove outliers in the visualization and the metrics table below.

In [None]:
# Show data sliced along feature column trip_start_hour.
tfma.view.render_slicing_metrics(tfma_result, slicing_column='trip_start_hour')

In [None]:
# Show metrics sliced by 'trip_start_day' x 'trip_start_month'.
tfma.view.render_slicing_metrics(
    tfma_result,
    slicing_spec=tfma.slicer.SingleSliceSpec(
        columns=['trip_start_day','trip_start_month']))

In [None]:
# Show overall metrics.
tfma.view.render_slicing_metrics(tfma_result)

### ModelValidator
`ModelValidator` performs validation of your candidate model compared to a baseline.

In [None]:
# Performs quality validation of a candidate model (compared to a baseline).
model_validator = ModelValidator(
    examples=example_gen.outputs['examples'],
    model=trainer.outputs['output'])
context.run(model_validator)

### Pusher
`Pusher` checks whether a model has passed validation, and if so, pushes the model to a file destination.

In [None]:
# Checks whether the model passed the validation steps and pushes the model
# to a file destination if check passed.
pusher = Pusher(
    model_export=trainer.outputs['output'],
    model_blessing=model_validator.outputs['blessing'],
    push_destination=pusher_pb2.PushDestination(
        filesystem=pusher_pb2.PushDestination.Filesystem(
            base_directory=_serving_model_dir)))
context.run(pusher)

In [None]:
# Set up paths.
_taxi_root = os.path.join(tfx.__path__[0], 'examples/chicago_taxi_pipeline')
# Python module file to inject customized logic into the TFX components. The
# Transform and Trainer both require user-defined functions to run successfully.
_taxi_module_file = os.path.join(_taxi_root, 'taxi_utils.py')
# Path which can be listened to by the model server.  Pusher will output the
# trained model here.
_serving_model_dir = os.path.join(tempfile.mkdtemp(),
                                  'serving_model/taxi_simple')