In [1]:
!pip install tfx



In [2]:
!pip install keras-tuner==1.0.4

Collecting keras-tuner==1.0.4
  Using cached keras_tuner-1.0.4-py3-none-any.whl (97 kB)
Installing collected packages: keras-tuner
  Attempting uninstall: keras-tuner
    Found existing installation: keras-tuner 1.4.4
    Uninstalling keras-tuner-1.4.4:
      Successfully uninstalled keras-tuner-1.4.4
Successfully installed keras-tuner-1.0.4


# Feature Engineering  Pipeline
## Exploring with Tensorflow Transform

In [3]:
import tensorflow as tf
from tfx import v1 as tfx
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from google.protobuf.json_format import MessageToDict
import os
import pprint
pp = pprint.PrettyPrinter()

In [4]:
# Location of the pipeline metadata store
_pipeline_root = "/content/pipeline/"

# Directory of the raw data files
_data_root = '/content/data/cenus_data/'

# Path to the raw training data
_data_filepath = os.path.join(_data_root,'adult.data')

In [5]:
# first few rows of the csv file
!head {_data_filepath}

39, State-gov, 77516, Bachelors, 13, Never-married, Adm-clerical, Not-in-family, White, Male, 2174, 0, 40, United-States, <=50K
50, Self-emp-not-inc, 83311, Bachelors, 13, Married-civ-spouse, Exec-managerial, Husband, White, Male, 0, 0, 13, United-States, <=50K
38, Private, 215646, HS-grad, 9, Divorced, Handlers-cleaners, Not-in-family, White, Male, 0, 0, 40, United-States, <=50K
53, Private, 234721, 11th, 7, Married-civ-spouse, Handlers-cleaners, Husband, Black, Male, 0, 0, 40, United-States, <=50K
28, Private, 338409, Bachelors, 13, Married-civ-spouse, Prof-specialty, Wife, Black, Female, 0, 0, 40, Cuba, <=50K
37, Private, 284582, Masters, 14, Married-civ-spouse, Exec-managerial, Wife, White, Female, 0, 0, 40, United-States, <=50K
49, Private, 160187, 9th, 5, Married-spouse-absent, Other-service, Not-in-family, Black, Female, 0, 0, 16, Jamaica, <=50K
52, Self-emp-not-inc, 209642, HS-grad, 9, Married-civ-spouse, Exec-managerial, Husband, White, Male, 0, 0, 45, United-States, >50K
31, 

# Create The Interactive Context
### Initialize InteractiveContext this will create a database or get the state of the component executions.

In [6]:
# Initialize the InteractiveContext with a local sqlite file
# If didn't create any specify folder it will automatically create a temporary directory

context = InteractiveContext(pipeline_root= _pipeline_root)



# Run TFX Components interactively
#ExampleGen
### It fetches the data from different sources prepares the data for training the model.
### Example like : DataIngestion , DataValidation , DataSpilting ,Data Conversion , versioning etc..

In [7]:
# Instantiate ExampleGen with the input csv dataset

example_gen = tfx.components.CsvExampleGen(input_base = _data_root)

# Execute the component
context.run(example_gen)



ValueError: ignored

### The output of the components are called artifacts.
### There will be a number in the uri because it is the exceution id associated with the dataset
### Split_names : Splitting of dataset into 'train','eval' and 'test.
### uri : Uniform Resouces Identification

In [16]:
# Get the artifact oject
artifact = example_gen.outputs['examples'].get()[0]

# Split the names and uri
print(f"split names : {artifact.split_names}")
print(f"artifact uri : {artifact.uri}")

split names : ["train", "eval"]
artifact uri : /content/pipeline/CsvExampleGen/examples/3


### The ingested data is stored in the directory shown in the uri field. It is also compressed using gzip.

In [17]:
# Get the URI of the output artifact representing the training examples
train_uri = os.path.join(artifact.uri,"Split-train")

# See the contents of the train folder
!ls {train_uri}

beam-temp-data_tfrecord-e6c60a1e61ce11ee87fc0242ac1c000c


### The data Collection is saved in TFRecord Format,you will need to use methods that work with that data type. We need to unpack the individuals examples from the TFRecord file and format it for printing

In [18]:
# Get the list of the files in the directory (all compressed TFRecord)

tfrecord_filenames = [os.path.join(train_uri,name) for name in os.listdir(train_uri)]
dataset = tf.data.TFRecordDataset(tfrecord_filenames,compression_type = "GZIP")

### Get the individual examples from the extracted dataset

In [19]:
# Define a helper function to get individual examples
def get_records(dataset,num_records):
  # Initialize an empty list
  records = []
  # Use the take() method to specify how many records to get
  for tfrecord in dataset.take(num_records):
    # Get the numpy property of the tensor
    serialized_example = tfrecord.numpy()
    # Initialize a tf.train.Example() to serialized data
    example = tf.train.Example()
    # Read the example data (output is a protocol buffer message )
    example.ParseFromString(serialized_example)
    # Convert the protocol buffer message to a python Dictionary
    example_dict = (MessageToDict(example))
    # Append to the records list
    records.append(example_dict)
  return records


In [20]:
# Get 3 records from the dataset
sample_records = get_records(dataset,3)
# Print the output
pp.pprint(sample_records)

FailedPreconditionError: ignored

# StatisticGen
### computes statistics over your dataset for data analysis as well as for use in downstream components

### Takes as input the dataset we just ingested using CsvExampleGen
### This file include statistics like : feature statistics,feature type,example count,missing value , unique value,value counts


In [35]:
# Initiate StatisticsGen with the ExampleGen ingested dataset

statistics_gen = tfx.components.StatisticsGen(
    examples = example_gen.outputs['examples']
)

# Execute the component
context.run(statistics_gen)

0,1
.execution_id,6
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } StatisticsGen at 0x7a1a0cd73fd0.inputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7a1a0df5d570.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0.outputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""].exec_properties['stats_options_json']None['exclude_splits'][]"
.component.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7a1a0df5d570.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0"
.component.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7a1a0df5d570.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0"
.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"
.exec_properties,['stats_options_json']None['exclude_splits'][]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7a1a0df5d570.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/content/pipeline/CsvExampleGen/examples/5
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/pipeline/StatisticsGen/statistics/6
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['stats_options_json'],
['exclude_splits'],[]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7a1a0df5d570.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /content/pipeline/CsvExampleGen/examples/5) at 0x7a1a0df5f460.type<class 'tfx.types.standard_artifacts.Examples'>.uri/content/pipeline/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/content/pipeline/CsvExampleGen/examples/5
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/pipeline/StatisticsGen/statistics/6
.span,0
.split_names,"[""train"", ""eval""]"


In [39]:
# Show the output statistics
context.show(statistics_gen.outputs['statistics'])

# SchemaGen
### uses TFDV to generate a schema based on your data statistics.
### take as input the statistics that we generated with StatisticsGen

In [40]:
# Initiate the schemagen with statistics ingested dataset

schema_gen = tfx.components.SchemaGen(
    statistics = statistics_gen.outputs['statistics']
)

# Run the component
context.run(schema_gen)

0,1
.execution_id,8
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } SchemaGen at 0x7a1a0cd72dd0.inputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""].outputs['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7.exec_properties['infer_feature_shape']1['exclude_splits'][]"
.component.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"
.component.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"
.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7
.exec_properties,['infer_feature_shape']1['exclude_splits'][]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/pipeline/StatisticsGen/statistics/6
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/content/pipeline/SchemaGen/schema/7

0,1
['infer_feature_shape'],1
['exclude_splits'],[]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/pipeline/StatisticsGen/statistics/6
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/content/pipeline/SchemaGen/schema/7


In [41]:
# Visualize the schema
context.show(schema_gen.outputs['schema'])

Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
' 0',INT,required,,-
' 13',INT,required,,-
' 2174',INT,required,,-
' 40',INT,required,,-
' 77516',INT,required,,-
' <=50K',STRING,required,,' <=50K'
' Adm-clerical',STRING,required,,' Adm-clerical'
' Bachelors',STRING,required,,' Bachelors'
' Male',STRING,required,,' Male'
' Never-married',STRING,required,,' Never-married'


Unnamed: 0_level_0,Values
Domain,Unnamed: 1_level_1
' <=50K',"' <=50K', ' >50K'"
' Adm-clerical',"' ?', ' Adm-clerical', ' Armed-Forces', ' Craft-repair', ' Exec-managerial', ' Farming-fishing', ' Handlers-cleaners', ' Machine-op-inspct', ' Other-service', ' Priv-house-serv', ' Prof-specialty', ' Protective-serv', ' Sales', ' Tech-support', ' Transport-moving'"
' Bachelors',"' 10th', ' 11th', ' 12th', ' 1st-4th', ' 5th-6th', ' 7th-8th', ' 9th', ' Assoc-acdm', ' Assoc-voc', ' Bachelors', ' Doctorate', ' HS-grad', ' Masters', ' Preschool', ' Prof-school', ' Some-college'"
' Male',"' Female', ' Male'"
' Never-married',"' Divorced', ' Married-AF-spouse', ' Married-civ-spouse', ' Married-spouse-absent', ' Never-married', ' Separated', ' Widowed'"
' Not-in-family',"' Husband', ' Not-in-family', ' Other-relative', ' Own-child', ' Unmarried', ' Wife'"
' State-gov',"' ?', ' Federal-gov', ' Local-gov', ' Never-worked', ' Private', ' Self-emp-inc', ' Self-emp-not-inc', ' State-gov', ' Without-pay'"
' United-States',"' ?', ' Cambodia', ' Canada', ' China', ' Columbia', ' Cuba', ' Dominican-Republic', ' Ecuador', ' El-Salvador', ' England', ' France', ' Germany', ' Greece', ' Guatemala', ' Haiti', ' Holand-Netherlands', ' Honduras', ' Hong', ' Hungary', ' India', ' Iran', ' Ireland', ' Italy', ' Jamaica', ' Japan', ' Laos', ' Mexico', ' Nicaragua', ' Outlying-US(Guam-USVI-etc)', ' Peru', ' Philippines', ' Poland', ' Portugal', ' Puerto-Rico', ' Scotland', ' South', ' Taiwan', ' Thailand', ' Trinadad&Tobago', ' United-States', ' Vietnam', ' Yugoslavia'"
' White',"' Amer-Indian-Eskimo', ' Asian-Pac-Islander', ' Black', ' Other', ' White'"


Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
No charts were generated by quickchart


# ExampleValidator
### component detects anomalies in your data based on the generated schema from the previous step
### will take as input the statistics from StatisticsGen and the schema from SchemaGen.It compares the statistics from the evaluation split to the schema from the training split

In [42]:
# Initiate ExampleValidator with the statisticGen and SchemaGen ingested data

example_validator = tfx.components.ExampleValidator(
    statistics = statistics_gen.outputs['statistics'],
    schema = schema_gen.outputs['schema']
)

# Run the component
context.run(example_validator)

0,1
.execution_id,9
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } ExampleValidator at 0x7a1a0cd73850.inputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7.outputs['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7a1a07239fc0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""].exec_properties['exclude_splits'][]['custom_validation_config']None"
.component.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7"
.component.outputs,"['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7a1a07239fc0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7"
.outputs,"['anomalies'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7a1a07239fc0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""]"
.exec_properties,['exclude_splits'][]['custom_validation_config']None

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/pipeline/StatisticsGen/statistics/6
.span,0
.split_names,"[""train"", ""eval""]"

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/content/pipeline/SchemaGen/schema/7

0,1
['anomalies'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7a1a07239fc0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleAnomalies
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri,/content/pipeline/ExampleValidator/anomalies/9
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['exclude_splits'],[]
['custom_validation_config'],

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7a1a0cd72e00.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7a1a07239810.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /content/pipeline/StatisticsGen/statistics/6) at 0x7a1a0cd73fa0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/content/pipeline/StatisticsGen/statistics/6.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/content/pipeline/StatisticsGen/statistics/6
.span,0
.split_names,"[""train"", ""eval""]"

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /content/pipeline/SchemaGen/schema/7) at 0x7a1a07239ff0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/content/pipeline/SchemaGen/schema/7

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/content/pipeline/SchemaGen/schema/7

0,1
['anomalies'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleAnomalies' (1 artifact) at 0x7a1a07239fc0.type_nameExampleAnomalies._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleAnomalies
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleAnomalies' (uri: /content/pipeline/ExampleValidator/anomalies/9) at 0x7a1a0723ab30.type<class 'tfx.types.standard_artifacts.ExampleAnomalies'>.uri/content/pipeline/ExampleValidator/anomalies/9.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleAnomalies'>
.uri,/content/pipeline/ExampleValidator/anomalies/9
.span,0
.split_names,"[""train"", ""eval""]"


In [43]:
# Visualize the results
context.show(example_validator.outputs['anomalies'])

# Transform
### components performs feature engineering for both training and serving dataset .
### It use the Tensorflow Transflow library.
### Take the input from the ExampleGen, the schema from SchemaGen as well as module containing the preprocessing function.

### will work on user-defined Transform code.The pipeline needs to load this as a module so you need to use this command %% writefile to save the file to disk.

In [54]:
# Set the constants module filename
_cenus_constants_module_file = 'census_constants.py'

In [55]:
%%writefile {_cenus_constants_module_file}

# Features with string data types that will be converted to indices
CATEGORICAL_FEATURE_KEYS = [
    'education', 'marital-status', 'occupation', 'race', 'relationship', 'workclass', 'sex', 'native-country'
]

# Numerical features that are marked as continuous
NUMERIC_FEATURE_KEYS = ['fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']

# Feature that can be grouped into buckets
BUCKET_FEATURE_KEYS = ['age']

# Number of buckets used by tf.transform for encoding each bucket feature.
FEATURE_BUCKET_COUNT = {'age': 4}

# Feature that the model will predict
LABEL_KEY = 'label'

# Utility function for renaming the feature
def transformed_name(key):
    return key + '_xf'

Overwriting census_constants.py


###  will use tft module functions to make these transformations

In [56]:
# Set the transform module filename
_cenus_transform_module_file = 'cenus_transform.py'

In [57]:
%%writefile {_cenus_transform_module_file}

import tensorflow as tf
import tensorflow_transform as tft

import census_constants

# Unpack the contents of the constants module
_NUMERIC_FEATURE_KEYS = census_constants.NUMERIC_FEATURE_KEYS
_CATEGORICAL_FEATURE_KEYS = census_constants.CATEGORICAL_FEATURE_KEYS
_BUCKET_FEATURE_KEYS = census_constants.BUCKET_FEATURE_KEYS
_FEATURE_BUCKET_COUNT = census_constants.FEATURE_BUCKET_COUNT
_LABEL_KEY = census_constants.LABEL_KEY
_transformed_name = census_constants.transformed_name


# Define the transformations
def preprocessing_fn(inputs):
    """tf.transform's callback function for preprocessing inputs.
    Args:
        inputs: map from feature keys to raw not-yet-transformed features.
    Returns:
        Map from string feature key to transformed feature operations.
    """
    outputs = {}

    # Scale these features to the range [0,1]
    for key in _NUMERIC_FEATURE_KEYS:
        outputs[_transformed_name(key)] = tft.scale_to_0_1(
            inputs[key])

    # Bucketize these features
    for key in _BUCKET_FEATURE_KEYS:
        outputs[_transformed_name(key)] = tft.bucketize(
            inputs[key], _FEATURE_BUCKET_COUNT[key])

    # Convert strings to indices in a vocabulary
    for key in _CATEGORICAL_FEATURE_KEYS:
        outputs[_transformed_name(key)] = tft.compute_and_apply_vocabulary(inputs[key])

    # Convert the label strings to an index
    outputs[_transformed_name(_LABEL_KEY)] = tft.compute_and_apply_vocabulary(inputs[_LABEL_KEY])

    return outputs

Overwriting cenus_transform.py


### now pass the training data,schema and transform module to the Transform component

In [58]:
# Ignore TF warning messages
tf.get_logger().setLevel('ERROR')

# Instantiate the Transform component
transform = tfx.components.Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_gen.outputs['schema'],
    module_file=os.path.abspath(_cenus_transform_module_file))

# Run the component
context.run(transform)

KeyError: ignored