In [1]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'service_worker.json'

In [2]:

import pprint
import re
import tempfile
import urllib

import absl
import tensorflow as tf
import tensorflow_model_analysis as tfma
tf.get_logger().propagate = False
pp = pprint.PrettyPrinter()

import tfx
from tfx.components import CsvExampleGen, ImportExampleGen
# from tfx.extensions.google_cloud_big_query.example_gen.component import BigQueryExampleGen
from tfx.components import Evaluator
from tfx.components import ExampleValidator
from tfx.components import Pusher
from tfx.components import ResolverNode
from tfx.components import SchemaGen
from tfx.components import StatisticsGen
from tfx.components import Trainer
from tfx.components import Transform
from tfx.components.base import executor_spec
from tfx.components.trainer.executor import GenericExecutor
from tfx.dsl.experimental import latest_blessed_model_resolver
from tfx.extensions.google_cloud_ai_platform.trainer import executor as ai_platform_trainer_executor
from tfx.orchestration import metadata
from tfx.orchestration import pipeline
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
from tfx.types import Channel
from tfx.types.standard_artifacts import Model
from tfx.types.standard_artifacts import ModelBlessing
from tfx.utils.dsl_utils import external_input

from tfx_ca.bigquery_example_gen.component import BigQueryExampleGen
from tfx.orchestration import data_types
from tfx.utils import json_utils



In [3]:

# This is the root directory for your TFX pip package installation.
_tfx_root = '/tmp/tfxroot'

# This is the directory containing the TFX Chicago Taxi Pipeline example.
# _taxi_root = os.path.join(_tfx_root, 'examples/chicago_taxi_pipeline')

# This is the path where your model will be pushed for serving.
_serving_model_dir = os.path.join(
    tempfile.mkdtemp(), 'serving_model/taxi_simple')

# Set up logging.
absl.logging.set_verbosity(absl.logging.INFO)

_data_root = tempfile.mkdtemp(prefix='tfx-data')
# DATA_PATH = 'https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/chicago_taxi_pipeline/data/simple/data.csv'
# _data_filepath = os.path.join(_data_root, "data.csv")
# urllib.request.urlretrieve(DATA_PATH, _data_filepath)

In [4]:
context = InteractiveContext(beam_pipeline_args=['--temp_location','gs://pjm_interactive_tmp','--project','dst-mlpipes'])




In [5]:
qry = """
with labeled as (
    select deviceid, 
    devicetype, 
    case when content like '%meni%' or content like '%avw3%' then 1 else 0 end as label,
    content
  from tfx_ca.visitdata
  limit 100000
), 
positives as (
  select deviceid,
    devicetype,
    label,
    content    
  from labeled
  where label = 1
  limit 100000
),
negatives as (
  select deviceid,
    devicetype,
    label,
    content
  from labeled
  where label = 0
  limit 900000
),

positives_ranked as (
  select label,
  content,
  (rank() over(order by rand()))*9+.5 as rnk
  from positives
),

negatives_ranked as (
  select label,
  content,
  (rank() over(order by rand())) as rnk
  from negatives
)

select label, content, rnk from 
(select label, content, rnk 
from positives_ranked) 

union all 

(select label, content, rnk 
from negatives_ranked)
order by rnk
"""

In [29]:
seed_runtime = data_types.RuntimeParameter(
    name='seed_pattern',
    default="'%meni%' or content like '%avw3%'",
    ptype=str
)



In [30]:
json_utils.dumps(seed_runtime)

'{"__class__": "RuntimeParameter", "__module__": "tfx.orchestration.data_types", "__tfx_object_type__": "jsonable", "default": "\'%meni%\' or content like \'%avw3%\'", "description": null, "name": "seed_pattern", "ptype": {"__class__": "str", "__module__": "builtins", "__tfx_object_type__": "class"}}'

In [None]:
components

In [8]:
from tfx_ca.bigquery_example_gen.proto import bigquery_seed_config_pb2

In [20]:
from tfx.proto import example_gen_pb2
from google.protobuf import any_pb2
from tfx_ca.bigquery_example_gen.proto import bigquery_example_gen_pb2

In [31]:
big_query_seed = bigquery_example_gen_pb2.BigQuerySeed()
big_query_seed.seed = json_utils.dumps(seed_runtime)

In [32]:
anymsg = any_pb2.Any()
anymsg.Pack(big_query_seed, 'bigqueryseed.dstillery.com')


In [33]:
foo = example_gen_pb2.CustomConfig(custom_config=anymsg)

In [34]:
foo

custom_config {
  type_url: "bigqueryseed.dstillery.com/tfx_ca.bigquery_example_gen.BigQuerySeed"
  value: "\n\250\002{\"__class__\": \"RuntimeParameter\", \"__module__\": \"tfx.orchestration.data_types\", \"__tfx_object_type__\": \"jsonable\", \"default\": \"\'%meni%\' or content like \'%avw3%\'\", \"description\": null, \"name\": \"seed_pattern\", \"ptype\": {\"__class__\": \"str\", \"__module__\": \"builtins\", \"__tfx_object_type__\": \"class\"}}"
}

In [35]:
json_utils.dumps(foo)

'{"__class__": "CustomConfig", "__module__": "tfx.proto.example_gen_pb2", "__proto_value__": "{\\n  \\"custom_config\\": {\\n    \\"@type\\": \\"bigqueryseed.dstillery.com/tfx_ca.bigquery_example_gen.BigQuerySeed\\",\\n    \\"seed\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"RuntimeParameter\\\\\\", \\\\\\"__module__\\\\\\": \\\\\\"tfx.orchestration.data_types\\\\\\", \\\\\\"__tfx_object_type__\\\\\\": \\\\\\"jsonable\\\\\\", \\\\\\"default\\\\\\": \\\\\\"\'%meni%\' or content like \'%avw3%\'\\\\\\", \\\\\\"description\\\\\\": null, \\\\\\"name\\\\\\": \\\\\\"seed_pattern\\\\\\", \\\\\\"ptype\\\\\\": {\\\\\\"__class__\\\\\\": \\\\\\"str\\\\\\", \\\\\\"__module__\\\\\\": \\\\\\"builtins\\\\\\", \\\\\\"__tfx_object_type__\\\\\\": \\\\\\"class\\\\\\"}}\\"\\n  }\\n}", "__tfx_object_type__": "proto"}'

In [36]:
# seed = bigquery_seed_config_pb2.BigquerySeedConfig(seed='foo')

example_gen = BigQueryExampleGen(query=qry, custom_config=foo)

In [37]:
context.run(example_gen)

INFO:absl:Running driver for BigQueryExampleGen
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for BigQueryExampleGen
INFO:absl:Attempting to infer TFX Python dependency for beam
INFO:absl:Copying all content from install dir /opt/conda/envs/tfxca/lib/python3.7/site-packages/tfx to temp dir /tmp/tmpox8kg4qt/build/tfx
INFO:absl:Generating a temp setup file at /tmp/tmpox8kg4qt/build/tfx/setup.py
INFO:absl:Creating temporary sdist package, logs available at /tmp/tmpox8kg4qt/build/tfx/setup.log
INFO:absl:Added --extra_package=/tmp/tmpox8kg4qt/build/tfx/dist/tfx_ephemeral-0.25.0.tar.gz to beam args
INFO:absl:Generating examples.


KeyError: 'SEEDS'

In [7]:
example_gen??

In [11]:
ser_com = json_utils.dumps(example_gen)
ser_com

'{"__class__": "BigQueryExampleGen", "__module__": "tfx_ca.bigquery_example_gen.component", "__tfx_object_type__": "jsonable", "_id": null, "_instance_name": null, "driver_class": {"__class__": "BaseDriver", "__module__": "tfx.dsl.components.base.base_driver", "__tfx_object_type__": "class"}, "executor_spec": {"__class__": "ExecutorClassSpec", "__module__": "tfx.dsl.components.base.executor_spec", "__tfx_object_type__": "jsonable", "executor_class": {"__class__": "Executor", "__module__": "tfx_ca.bigquery_example_gen.executor", "__tfx_object_type__": "class"}, "extra_flags": []}, "platform_config": null, "spec": {"__class__": "QueryBasedExampleGenSpec", "__module__": "tfx.types.standard_component_specs", "__tfx_object_type__": "jsonable", "exec_properties": {"custom_config": "{\\n  \\"custom_config\\": {\\n    \\"@type\\": \\"type.googleapis.com/tfx_ca.BigquerySeedConfig\\",\\n    \\"seed\\": \\"foo\\"\\n  }\\n}", "input_config": "{\\n  \\"splits\\": [\\n    {\\n      \\"name\\": \\"si

In [39]:
re.findall(data_types.RUNTIME_PARAMETER_PATTERN, ser_com)

['{\\\\\\"__class__\\\\\\": \\\\\\"RuntimeParameter\\\\\\", \\\\\\"__module__\\\\\\": \\\\\\"tfx.orchestration.data_types\\\\\\", \\\\\\"__tfx_object_type__\\\\\\": \\\\\\"jsonable\\\\\\", \\\\\\"default\\\\\\": \\\\\\"\'%meni%\' or content like \'%avw3%\'\\\\\\", \\\\\\"description\\\\\\": null, \\\\\\"name\\\\\\": \\\\\\"seed_pattern\\\\\\", \\\\\\"ptype\\\\\\": {\\\\\\"__class__\\\\\\": \\\\\\"str\\\\\\", \\\\\\"__module__\\\\\\": \\\\\\"builtins\\\\\\", \\\\\\"__tfx_object_type__\\\\\\": \\\\\\"class\\\\\\"}']

In [7]:
dat = tf.data.TFRecordDataset('/tmp/tfx-interactive-2020-11-30T20_42_38.873175-cdu8axzq/BigQueryExampleGen/examples/1/train/data_tfrecord-00000-of-00001.gz',compression_type='GZIP')

In [8]:
for i in dat.take(3):
    print(i.numpy())

b'\n6\n\x0f\n\x03rnk\x12\x08\x12\x06\n\x04\x00\x00\x00@\n\x0e\n\x05label\x12\x05\x1a\x03\n\x01\x00\n\x13\n\x07content\x12\x08\n\x06\n\x04zk2q'
b'\nG\n\x0e\n\x05label\x12\x05\x1a\x03\n\x01\x00\n$\n\x07content\x12\x19\n\x17\n\x15eoh 1k2 52s c06h 59w8\n\x0f\n\x03rnk\x12\x08\x12\x06\n\x04\x00\x00\x80@'
b'\nG\n\x0e\n\x05label\x12\x05\x1a\x03\n\x01\x00\n$\n\x07content\x12\x19\n\x17\n\x151n5bd 1mvri 1fdt8 c9z\n\x0f\n\x03rnk\x12\x08\x12\x06\n\x04\x00\x00\xa0@'


In [8]:
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
context.run(statistics_gen)

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Running driver for StatisticsGen
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for StatisticsGen
INFO:absl:Attempting to infer TFX Python dependency for beam
INFO:absl:Copying all content from install dir /opt/conda/envs/tfxca/lib/python3.7/site-packages/tfx to temp dir /tmp/tmpecks1o2n/build/tfx
INFO:absl:Generating a temp setup file at /tmp/tmpecks1o2n/build/tfx/setup.py
INFO:absl:Creating temporary sdist package, logs available at /tmp/tmpecks1o2n/build/tfx/setup.log
INFO:absl:Added --extra_package=/tmp/tmpecks1o2n/build/tfx/dist/tfx_ephemeral-0.25.0.tar.gz to beam args
INFO:absl:Generating statistics for split train.
INFO:absl:Statistics for split train written to /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2/train.
INFO:absl:Generating statistics for split eval.
INFO:absl:Statistics for split eval written to /tmp/tfx-interactive-2020-

0,1
.execution_id,2
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } StatisticsGen at 0x7ff87b2d6690.inputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7ff918303790.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0.outputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""].exec_properties['stats_options_json']None['exclude_splits'][]"
.component.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7ff918303790.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
.component.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7ff918303790.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.exec_properties,['stats_options_json']None['exclude_splits'][]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7ff918303790.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['stats_options_json'],
['exclude_splits'],[]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7ff918303790.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1) at 0x7ff89a464bd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/BigQueryExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"


In [10]:
dat = tf.data.TFRecordDataset('/tmp/tfx-interactive-2020-11-30T20_42_38.873175-cdu8axzq/StatisticsGen/statistics/2/train/stats_tfrecord')
for i in dat:
    print(i.numpy())

10\x8c\x06"\x032yw)\x00\x00\x00\x00\x00\x004@\n\x1c\x08\x8d\x06\x10\x8d\x06"\x0b2pa 14l 5e6)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x8e\x06\x10\x8e\x06"\x051qgi6)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x8f\x06\x10\x8f\x06"\x051q8i2)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x90\x06\x10\x90\x06"\x051oneq)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x91\x06\x10\x91\x06"\x051nlmc)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x92\x06\x10\x92\x06"\x051n6n8)\x00\x00\x00\x00\x00\x004@\n \x08\x93\x06\x10\x93\x06"\x0f1m8jc d7x 1ortx)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x94\x06\x10\x94\x06"\x051lwjy)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x95\x06\x10\x95\x06"\x051livo)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x96\x06\x10\x96\x06"\x051kt2c)\x00\x00\x00\x00\x00\x004@\n\x1c\x08\x97\x06\x10\x97\x06"\x0b1iycv 1nu4c)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x98\x06\x10\x98\x06"\x051iez2)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x99\x06\x10\x99\x06"\x051iaye)\x00\x00\x00\x00\x00\x004@\n\x16\x08\x9a\x06\x10\x9a\x06"\x051g714)\x00\x00\x00\x00\x00\x00

In [9]:
# Generates schema based on statistics files.
schema_gen = SchemaGen(
    statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)
context.run(schema_gen)


INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Running driver for SchemaGen
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for SchemaGen
INFO:absl:Attempting to infer TFX Python dependency for beam
INFO:absl:Copying all content from install dir /opt/conda/envs/tfxca/lib/python3.7/site-packages/tfx to temp dir /tmp/tmpk1a_2meb/build/tfx
INFO:absl:Generating a temp setup file at /tmp/tmpk1a_2meb/build/tfx/setup.py
INFO:absl:Creating temporary sdist package, logs available at /tmp/tmpk1a_2meb/build/tfx/setup.log
INFO:absl:Added --extra_package=/tmp/tmpk1a_2meb/build/tfx/dist/tfx_ephemeral-0.25.0.tar.gz to beam args
INFO:absl:Processing schema from statistics for split train.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


INFO:absl:Processing schema from statistics for split eval.
INFO:absl:Schema written to /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3/schema.pbtxt.
INFO:absl:Running publisher for SchemaGen
INFO:absl:MetadataStore with DB connection initialized


0,1
.execution_id,3
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } SchemaGen at 0x7ff918303dd0.inputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""].outputs['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3.exec_properties['infer_feature_shape']1['exclude_splits'][]"
.component.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.component.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3
.exec_properties,['infer_feature_shape']1['exclude_splits'][]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
['infer_feature_shape'],1
['exclude_splits'],[]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3


In [10]:

  # Performs anomaly detection based on statistics and data schema.
example_validator = ExampleValidator(
    statistics=statistics_gen.outputs['statistics'],
    schema=schema_gen.outputs['schema'])
context.run(example_validator)

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Running driver for ExampleValidator
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for ExampleValidator
INFO:absl:Attempting to infer TFX Python dependency for beam
INFO:absl:Copying all content from install dir /opt/conda/envs/tfxca/lib/python3.7/site-packages/tfx to temp dir /tmp/tmp1zmdmblz/build/tfx
INFO:absl:Generating a temp setup file at /tmp/tmp1zmdmblz/build/tfx/setup.py
INFO:absl:Creating temporary sdist package, logs available at /tmp/tmp1zmdmblz/build/tfx/setup.log
INFO:absl:Added --extra_package=/tmp/tmp1zmdmblz/build/tfx/dist/tfx_ephemeral-0.25.0.tar.gz to beam args
INFO:absl:Validating schema against the computed statistics for split train.
INFO:absl:Validation complete for split train. Anomalies written to /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4/train.
INFO:absl:Validating schema against the computed statistics for s

0,1
.execution_id,4
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } ExampleValidator at 0x7ff899fbad10.inputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3.outputs['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7ff899fba110.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""].exec_properties['exclude_splits'][]"
.component.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3"
.component.outputs,"['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7ff899fba110.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3"
.outputs,"['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7ff899fba110.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"
.exec_properties,['exclude_splits'][]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
['anomalies'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7ff899fba110.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleAnomalies
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['exclude_splits'],[]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7ff88b6b7410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7ff8b7b10990.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2) at 0x7ff87d324390.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3) at 0x7ff899fea610.type<class 'tfx.types.standard_artifacts.Schema'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/SchemaGen/schema/3

0,1
['anomalies'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7ff899fba110.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleAnomalies
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4) at 0x7ff899584190.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri,/tmp/tfx-interactive-2020-12-01T13_54_53.080833-chlt_nzy/ExampleValidator/anomalies/4
.span,0
.split_names,"[""train"", ""eval""]"


In [16]:

# module_file = 'tfxca_trainer.py'

# _ai_platform_training_args = {
#     'project': _project_id,
#     'region': _gcp_region,
#     # Override the default TFX image used for training with one with the correct
#     # scikit-learn version.
#     'masterConfig': {
#         'imageUri': _tfx_image,
#     },
# }

# trainer = Trainer(
#     module_file=module_file,
#     custom_executor_spec=executor_spec.ExecutorClassSpec(
#         ai_platform_trainer_executor.GenericExecutor),
#     examples=example_gen.outputs['examples'],
#     schema=schema_gen.outputs['schema'],
#     train_args=trainer_pb2.TrainArgs(num_steps=2000),
#     eval_args=trainer_pb2.EvalArgs(),
#     custom_config={
#         ai_platform_trainer_executor.TRAINING_ARGS_KEY:
#         ai_platform_training_args,
#     })


NameError: name 'ai_platform_training_args' is not defined

In [89]:
  module_file = 'tfxca_trainer.py'
  
  trainer = Trainer(
      module_file=module_file,
      custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
      examples=example_gen.outputs['examples'],
      schema=schema_gen.outputs['schema'],
      train_args=trainer_pb2.TrainArgs(num_steps=2000),
      eval_args=trainer_pb2.EvalArgs(),
      custom_config={'foo':seed})

In [90]:
json_utils.dumps(trainer)

'{"__class__": "Trainer", "__module__": "tfx.components.trainer.component", "__tfx_object_type__": "jsonable", "_id": null, "_instance_name": null, "driver_class": {"__class__": "BaseDriver", "__module__": "tfx.dsl.components.base.base_driver", "__tfx_object_type__": "class"}, "executor_spec": {"__class__": "_NewClass", "__module__": "/opt/conda/envs/tfxca/lib/python3.7/site-packages/tfx/components/base/executor_spec.py:34", "__tfx_object_type__": "jsonable", "executor_class": {"__class__": "GenericExecutor", "__module__": "tfx.components.trainer.executor", "__tfx_object_type__": "class"}, "extra_flags": []}, "platform_config": null, "spec": {"__class__": "TrainerSpec", "__module__": "tfx.types.standard_component_specs", "__tfx_object_type__": "jsonable", "exec_properties": {"custom_config": "{\\"foo\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"RuntimeParameter\\\\\\", \\\\\\"__module__\\\\\\": \\\\\\"tfx.orchestration.data_types\\\\\\", \\\\\\"__tfx_object_type__\\\\\\": \\\\\\"jsonable\\\

In [16]:
context.run(trainer)

INFO:absl:Running driver for Trainer
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Running executor for Trainer
INFO:absl:Attempting to infer TFX Python dependency for beam
INFO:absl:Copying all content from install dir /opt/conda/envs/tfxca/lib/python3.7/site-packages/tfx to temp dir /tmp/tmpggq25inc/build/tfx
INFO:absl:Generating a temp setup file at /tmp/tmpggq25inc/build/tfx/setup.py
INFO:absl:Creating temporary sdist package, logs available at /tmp/tmpggq25inc/build/tfx/setup.log
INFO:absl:Added --extra_package=/tmp/tmpggq25inc/build/tfx/dist/tfx_ephemeral-0.25.0.tar.gz to beam args
INFO:absl:Train on the 'train' split when train_args.splits is not set.
INFO:absl:Evaluate on the 'eval' split when eval_args.splits is not set.
INFO:absl:Training model.


(666118, 1)
[[b'14l 1ht6b']
 [b'1qskd 1rivf']]
-- Epoch 1
Norm: 28.98, NNZs: 335, Bias: -12.886878, T: 666118, Avg. loss: 0.116812
Total training time: 0.62 seconds.
-- Epoch 2
Norm: 11.23, NNZs: 45, Bias: -7.262772, T: 1332236, Avg. loss: 0.001804
Total training time: 1.23 seconds.
-- Epoch 3
Norm: 11.46, NNZs: 41, Bias: -7.268809, T: 1998354, Avg. loss: 0.002055
Total training time: 1.82 seconds.
-- Epoch 4
Norm: 11.90, NNZs: 41, Bias: -7.272602, T: 2664472, Avg. loss: 0.002040
Total training time: 2.41 seconds.
-- Epoch 5
Norm: 11.32, NNZs: 41, Bias: -7.264590, T: 3330590, Avg. loss: 0.002035
Total training time: 2.98 seconds.
-- Epoch 6
Norm: 12.40, NNZs: 35, Bias: -7.260812, T: 3996708, Avg. loss: 0.002039
Total training time: 3.55 seconds.
-- Epoch 7
Norm: 36.70, NNZs: 26, Bias: -7.286533, T: 4662826, Avg. loss: 0.002057
Total training time: 4.12 seconds.
-- Epoch 8
Norm: 10.98, NNZs: 49, Bias: -7.249947, T: 5328944, Avg. loss: 0.001980
Total training time: 4.71 seconds.
-- Epoch

AttributeError: 'numpy.ndarray' object has no attribute 'lower'

In [17]:
pusher = Pusher(
  model=trainer.outputs['model'],
  push_destination=pusher_pb2.PushDestination(
      filesystem=pusher_pb2.PushDestination.Filesystem(
          base_directory=serving_model_dir)))

NameError: name 'serving_model_dir' is not defined

In [18]:
dir(tfmd.proto.v0)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'anomalies_pb2',
 'path_pb2',
 'schema_pb2',
 'statistics_pb2']

In [22]:
from tfx.utils import json_utils
from tfx.orchestration import data_types

In [23]:
data_types.RUNTIME_PARAMETER_PATTERN

'({\\\\*"__class__\\\\*": \\\\*"RuntimeParameter\\\\*", .*?})'

In [25]:
! pwd

/home/pmccarthy/projects/tfxca


In [26]:
from tfx_ca.components import RuntimeBigQueryExampleGen

In [43]:
from tfx.orchestration import data_types

In [45]:
seed = data_types.RuntimeParameter(
    name='seed_pattern',
    default="'%meni%' or content like '%avw3%'",
    ptype=str
)



In [63]:
print(seed)

{"__class__": "RuntimeParameter", "__module__": "tfx.orchestration.data_types", "__tfx_object_type__": "jsonable", "default": "'%meni%' or content like '%avw3%'", "description": null, "name": "seed_pattern", "ptype": {"__class__": "str", "__module__": "builtins", "__tfx_object_type__": "class"}}


In [50]:
from tfx_ca import components

In [75]:
importlib.reload(components)

<module 'tfx_ca.components' from '/home/pmccarthy/projects/tfxca/tfx_ca/components.py'>

In [76]:
r_example_gen = components.RuntimeBigQueryExampleGen(input_config={'seed':seed,'query':qry})

KeyError: 'splits'

In [91]:
ser_com = json_utils.dumps(trainer)
ser_com

'{"__class__": "Trainer", "__module__": "tfx.components.trainer.component", "__tfx_object_type__": "jsonable", "_id": null, "_instance_name": null, "driver_class": {"__class__": "BaseDriver", "__module__": "tfx.dsl.components.base.base_driver", "__tfx_object_type__": "class"}, "executor_spec": {"__class__": "_NewClass", "__module__": "/opt/conda/envs/tfxca/lib/python3.7/site-packages/tfx/components/base/executor_spec.py:34", "__tfx_object_type__": "jsonable", "executor_class": {"__class__": "GenericExecutor", "__module__": "tfx.components.trainer.executor", "__tfx_object_type__": "class"}, "extra_flags": []}, "platform_config": null, "spec": {"__class__": "TrainerSpec", "__module__": "tfx.types.standard_component_specs", "__tfx_object_type__": "jsonable", "exec_properties": {"custom_config": "{\\"foo\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"RuntimeParameter\\\\\\", \\\\\\"__module__\\\\\\": \\\\\\"tfx.orchestration.data_types\\\\\\", \\\\\\"__tfx_object_type__\\\\\\": \\\\\\"jsonable\\\

In [30]:
import re

In [92]:
re.findall(data_types.RUNTIME_PARAMETER_PATTERN, ser_com)

['{\\\\\\"__class__\\\\\\": \\\\\\"RuntimeParameter\\\\\\", \\\\\\"__module__\\\\\\": \\\\\\"tfx.orchestration.data_types\\\\\\", \\\\\\"__tfx_object_type__\\\\\\": \\\\\\"jsonable\\\\\\", \\\\\\"default\\\\\\": \\\\\\"\'%meni%\' or content like \'%avw3%\'\\\\\\", \\\\\\"description\\\\\\": null, \\\\\\"name\\\\\\": \\\\\\"seed_pattern\\\\\\", \\\\\\"ptype\\\\\\": {\\\\\\"__class__\\\\\\": \\\\\\"str\\\\\\", \\\\\\"__module__\\\\\\": \\\\\\"builtins\\\\\\", \\\\\\"__tfx_object_type__\\\\\\": \\\\\\"class\\\\\\"}']

In [42]:
data_types.RUNTIME_PARAMETER_PATTERN

'({\\\\*"__class__\\\\*": \\\\*"RuntimeParameter\\\\*", .*?})'

In [32]:
import importlib

In [78]:
bqeg = BigQueryExampleGen(query=qry)

In [82]:
dir(bqeg)

['DRIVER_CLASS',
 'EXECUTOR_SPEC',
 'SPEC_CLASS',
 '__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_downstream_nodes',
 '_id',
 '_instance_name',
 '_upstream_nodes',
 '_validate_component_class',
 '_validate_spec',
 'add_downstream_node',
 'add_upstream_node',
 'component_id',
 'component_type',
 'downstream_nodes',
 'driver_class',
 'exec_properties',
 'executor_spec',
 'from_json_dict',
 'get_id',
 'id',
 'inputs',
 'outputs',
 'platform_config',
 'spec',
 'to_json_dict',
 'type',
 'upstream_nodes',
 'with_id',
 'with_platform_config']

In [85]:
bqeg.exec_properties.keys()

dict_keys(['input_config', 'output_config', 'custom_config'])

In [81]:
print(json_utils.dumps(bqeg).replace(',',',\n'))

{"__class__": "BigQueryExampleGen",
 "__module__": "tfx.extensions.google_cloud_big_query.example_gen.component",
 "__tfx_object_type__": "jsonable",
 "_id": null,
 "_instance_name": null,
 "driver_class": {"__class__": "BaseDriver",
 "__module__": "tfx.dsl.components.base.base_driver",
 "__tfx_object_type__": "class"},
 "executor_spec": {"__class__": "ExecutorClassSpec",
 "__module__": "tfx.dsl.components.base.executor_spec",
 "__tfx_object_type__": "jsonable",
 "executor_class": {"__class__": "Executor",
 "__module__": "tfx.extensions.google_cloud_big_query.example_gen.executor",
 "__tfx_object_type__": "class"},
 "extra_flags": []},
 "platform_config": null,
 "spec": {"__class__": "QueryBasedExampleGenSpec",
 "__module__": "tfx.types.standard_component_specs",
 "__tfx_object_type__": "jsonable",
 "exec_properties": {"custom_config": null,
 "input_config": "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",
\n      \"pattern\": \"\\nwith labeled as (\\n    select deviceid,


In [86]:
json_utils.dumps(trainer)

'{"__class__": "Trainer", "__module__": "tfx.components.trainer.component", "__tfx_object_type__": "jsonable", "_id": null, "_instance_name": null, "driver_class": {"__class__": "BaseDriver", "__module__": "tfx.dsl.components.base.base_driver", "__tfx_object_type__": "class"}, "executor_spec": {"__class__": "_NewClass", "__module__": "/opt/conda/envs/tfxca/lib/python3.7/site-packages/tfx/components/base/executor_spec.py:34", "__tfx_object_type__": "jsonable", "executor_class": {"__class__": "GenericExecutor", "__module__": "tfx.components.trainer.executor", "__tfx_object_type__": "class"}, "extra_flags": []}, "platform_config": null, "spec": {"__class__": "TrainerSpec", "__module__": "tfx.types.standard_component_specs", "__tfx_object_type__": "jsonable", "exec_properties": {"custom_config": "null", "eval_args": "{}", "module_file": "tfxca_trainer.py", "run_fn": null, "train_args": "{\\n  \\"num_steps\\": 2000\\n}", "trainer_fn": null}, "inputs": {"__class__": "_PropertyDictWrapper", "

In [68]:
print(ser_com.replace(',','\n'))

{"__class__": "RuntimeBigQueryExampleGen"
 "__module__": "tfx_ca.components"
 "__tfx_object_type__": "jsonable"
 "_id": null
 "_instance_name": null
 "driver_class": {"__class__": "BaseDriver"
 "__module__": "tfx.dsl.components.base.base_driver"
 "__tfx_object_type__": "class"}
 "executor_spec": {"__class__": "ExecutorClassSpec"
 "__module__": "tfx.dsl.components.base.executor_spec"
 "__tfx_object_type__": "jsonable"
 "executor_class": {"__class__": "Executor"
 "__module__": "tfx.extensions.google_cloud_big_query.example_gen.executor"
 "__tfx_object_type__": "class"}
 "extra_flags": []}
 "platform_config": null
 "spec": {"__class__": "QueryBasedExampleGenSpec"
 "__module__": "tfx.types.standard_component_specs"
 "__tfx_object_type__": "jsonable"
 "exec_properties": {"custom_config": null
 "input_config": "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\"
\n      \"pattern\": \"\\nwith labeled as (\\n    select deviceid
 \\n    devicetype
 \\n    case when content like '%meni%