### SETUP

In [None]:
!pip install -U tfx


In [None]:
# Import tensorflow and TFX modules
import tensorflow as tf
print('TensorFlow version: {}'.format(tf.__version__))
from tfx import v1 as tfx
print('TFX version: {}'.format(tfx.__version__))


In [None]:
import os

PIPELINE_NAME = "penguin-pipeline2"

# Output directory to store artifacts generated from the pipeline.
PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)
# Path to a SQLite DB file to use as an MLMD storage.
METADATA_PATH = os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
# Output directory where created models from the pipeline will be exported.
SERVING_MODEL_DIR = os.path.join('serving_model', PIPELINE_NAME)

from absl import logging
logging.set_verbosity(logging.INFO)  # Set default logging level.


### PREP DATA

In [None]:
import urllib.request
import tempfile

# Create some path roots
DATA_ROOT = os.path.join('penguin-data')
FULL_DATA_ROOT = os.path.join(DATA_ROOT, 'full-set')



#INCOMPLETE_DATA_ROOT = os.path.join(DATA_ROOT, 'incomplete-set')
#os.makedirs(DATA_ROOT + '/full-set', exist_ok=True)
#os.makedirs(DATA_ROOT + '/incomplete-set', exist_ok=True)
#_data_path = 'https://storage.googleapis.com/download.tensorflow.org/data/palmer_penguins/penguins_size.csv'
#_data_filepath = os.path.join(DATA_ROOT,'full-set', 'data-full.csv')
#urllib.request.urlretrieve(_data_path, _data_filepath)

In [None]:
_data_filepath = os.path.join(DATA_ROOT,'full-set', 'data-full.csv')
!head {_data_filepath}


### DELETE ENTRIES WITH NA FIELDS

In [None]:
!sed -i '/\bNA\b/d' {_data_filepath}
!head {_data_filepath}


### Create multiple datasets

In [None]:
## Remove 50 entries of Chinstrap species
#%run remove_csv.py

### CREATING SCHEMA

In [None]:
import shutil

SCHEMA_PATH = 'schema'

#_schema_uri = 'https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/penguin/schema/raw/schema.pbtxt'
#_schema_filename = 'schema.pbtxt'
#_schema_filepath = os.path.join(SCHEMA_PATH, _schema_filename)

#os.makedirs(SCHEMA_PATH, exist_ok=True)
#urllib.request.urlretrieve(_schema_uri, _schema_filepath)


### CREATE FILES FOR COMPONENT FUNCTIONS

In [None]:
_module_file = 'penguin_utils.py'

In [None]:
%%writefile {_module_file}


from typing import List, Text
from absl import logging
import tensorflow as tf
from tensorflow import keras
from tensorflow_metadata.proto.v0 import schema_pb2
import tensorflow_transform as tft
from tensorflow_transform.tf_metadata import schema_utils

from tfx import v1 as tfx
from tfx_bsl.public import tfxio

# Specify features that we will use.
_FEATURE_KEYS = [
    'culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'body_mass_g'
]
_LABEL_KEY = 'species'

_TRAIN_BATCH_SIZE = 20
_EVAL_BATCH_SIZE = 10


# NEW: TFX Transform will call this function.
def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature.
  """
  outputs = {}

  # Uses features defined in _FEATURE_KEYS only.
  for key in _FEATURE_KEYS:
    # tft.scale_to_z_score computes the mean and variance of the given feature
    # and scales the output based on the result.
    outputs[key] = tft.scale_to_z_score(inputs[key])

  # For the label column we provide the mapping from string to index.
  # We could instead use `tft.compute_and_apply_vocabulary()` in order to
  # compute the vocabulary dynamically and perform a lookup.
  # Since in this example there are only 3 possible values, we use a hard-coded
  # table for simplicity.
  table_keys = ['Adelie', 'Chinstrap', 'Gentoo']
  initializer = tf.lookup.KeyValueTensorInitializer(
      keys=table_keys,
      values=tf.cast(tf.range(len(table_keys)), tf.int64),
      key_dtype=tf.string,
      value_dtype=tf.int64)
  table = tf.lookup.StaticHashTable(initializer, default_value=-1)
  outputs[_LABEL_KEY] = table.lookup(inputs[_LABEL_KEY])

  return outputs


# NEW: This function will apply the same transform operation to training data
#      and serving requests.
def _apply_preprocessing(raw_features, tft_layer):
  transformed_features = tft_layer(raw_features)
  if _LABEL_KEY in raw_features:
    transformed_label = transformed_features.pop(_LABEL_KEY)
    return transformed_features, transformed_label
  else:
    return transformed_features, None


# NEW: This function will create a handler function which gets a serialized
#      tf.example, preprocess and run an inference with it.
def _get_serve_tf_examples_fn(model, tf_transform_output):
  # We must save the tft_layer to the model to ensure its assets are kept and
  # tracked.
  model.tft_layer = tf_transform_output.transform_features_layer()

  @tf.function(input_signature=[
      tf.TensorSpec(shape=[None], dtype=tf.string, name='examples')
  ])
  def serve_tf_examples_fn(serialized_tf_examples):
    # Expected input is a string which is serialized tf.Example format.
    feature_spec = tf_transform_output.raw_feature_spec()
    # Because input schema includes unnecessary fields like 'species' and
    # 'island', we filter feature_spec to include required keys only.
    required_feature_spec = {
        k: v for k, v in feature_spec.items() if k in _FEATURE_KEYS
    }
    parsed_features = tf.io.parse_example(serialized_tf_examples,
                                          required_feature_spec)

    # Preprocess parsed input with transform operation defined in
    # preprocessing_fn().
    transformed_features, _ = _apply_preprocessing(parsed_features,
                                                   model.tft_layer)
    # Run inference with ML model.
    return model(transformed_features)

  return serve_tf_examples_fn


def _input_fn(file_pattern: List[Text],
              data_accessor: tfx.components.DataAccessor,
              tf_transform_output: tft.TFTransformOutput,
              batch_size: int = 200) -> tf.data.Dataset:
  """Generates features and label for tuning/training.

  Args:
    file_pattern: List of paths or patterns of input tfrecord files.
    data_accessor: DataAccessor for converting input to RecordBatch.
    tf_transform_output: A TFTransformOutput.
    batch_size: representing the number of consecutive elements of returned
      dataset to combine in a single batch

  Returns:
    A dataset that contains (features, indices) tuple where features is a
      dictionary of Tensors, and indices is a single Tensor of label indices.
  """
  dataset = data_accessor.tf_dataset_factory(
      file_pattern,
      tfxio.TensorFlowDatasetOptions(batch_size=batch_size),
      schema=tf_transform_output.raw_metadata.schema)

  transform_layer = tf_transform_output.transform_features_layer()
  def apply_transform(raw_features):
    return _apply_preprocessing(raw_features, transform_layer)

  return dataset.map(apply_transform).repeat()


def _build_keras_model() -> tf.keras.Model:
  """Creates a DNN Keras model for classifying penguin data.

  Returns:
    A Keras Model.
  """
  # The model below is built with Functional API, please refer to
  # https://www.tensorflow.org/guide/keras/overview for all API options.
  inputs = [
      keras.layers.Input(shape=(1,), name=key)
      for key in _FEATURE_KEYS
  ]
  d = keras.layers.concatenate(inputs)
  for _ in range(2):
    d = keras.layers.Dense(8, activation='relu')(d)
  outputs = keras.layers.Dense(3)(d)

  model = keras.Model(inputs=inputs, outputs=outputs)
  model.compile(
      optimizer=keras.optimizers.Adam(1e-2),
      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
      metrics=[keras.metrics.SparseCategoricalAccuracy()])

  model.summary(print_fn=logging.info)
  return model


# TFX Trainer will call this function.
def run_fn(fn_args: tfx.components.FnArgs):
  """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

  train_dataset = _input_fn(
      fn_args.train_files,
      fn_args.data_accessor,
      tf_transform_output,
      batch_size=_TRAIN_BATCH_SIZE)
  eval_dataset = _input_fn(
      fn_args.eval_files,
      fn_args.data_accessor,
      tf_transform_output,
      batch_size=_EVAL_BATCH_SIZE)

  model = _build_keras_model()
  model.fit(
      train_dataset,
      steps_per_epoch=fn_args.train_steps,
      validation_data=eval_dataset,
      validation_steps=fn_args.eval_steps)

  # NEW: Save a computation graph including transform layer.
  signatures = {
      'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output),
  }
  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)


### PIPELINE DEFINITION

In [None]:
from ml_metadata.proto import metadata_store_pb2
# Non-public APIs, just for showcase.
from tfx.orchestration.portable.mlmd import execution_lib

# TODO(b/171447278): Move these functions into the TFX library.

def get_latest_artifacts(metadata, pipeline_name, component_id):
  """Output artifacts of the latest run of the component."""
  context = metadata.store.get_context_by_type_and_name(
      'node', f'{pipeline_name}.{component_id}')
  executions = metadata.store.get_executions_by_context(context.id)
  latest_execution = max(executions,
                         key=lambda e:e.last_update_time_since_epoch)
  return execution_lib.get_output_artifacts(metadata, latest_execution.id)

# Non-public APIs, just for showcase.
from tfx.orchestration.experimental.interactive import visualizations

def visualize_artifacts(artifacts):
  """Visualizes artifacts using standard visualization modules."""
  for artifact in artifacts:
    visualization = visualizations.get_registry().get_visualization(
        artifact.type_name)
    if visualization:
      visualization.display(artifact)

from tfx.orchestration.experimental.interactive import standard_visualizations
standard_visualizations.register_standard_visualizations()


In [None]:
# Non-public APIs, just for showcase.
from tfx.orchestration.metadata import Metadata
from tfx.types import standard_component_specs

metadata_connection_config = tfx.orchestration.metadata.sqlite_metadata_connection_config(
    os.path.join('metadata', 'penguin-pipeline1', 'metadata.db'))

with Metadata(metadata_connection_config) as metadata_handler:
  # Find output artifacts from MLMD.
  stat_gen_output = get_latest_artifacts(metadata_handler, "penguin-pipeline1",
                                         'StatisticsGen')
  stats_artifacts = stat_gen_output[standard_component_specs.STATISTICS_KEY]

  model_output = get_latest_artifacts(metadata_handler, "penguin-pipeline1", "Trainer")
  model_artifacts = model_output[standard_component_specs.MODEL_KEY]


In [None]:
#print("Model output: ", model_output)
#print("\n")
print("Model artifact: ", model_artifacts[0].uri)

#visualize_artifacts(stats_artifacts)

In [None]:

from tfx.types.standard_artifacts import Model

def create_custom_model_artifact(model_path):
    model_artifact = Model()
    model_artifact.uri = model_path
    return model_artifact


In [52]:
import tensorflow_model_analysis as tfma
from tfx.types import Channel
def _create_pipeline(pipeline_name: str, pipeline_root: str, data_root: str,
                     schema_path: str, module_file: str, serving_model_dir: str,
                     metadata_path: str) -> tfx.dsl.Pipeline:
  """Implements the penguin pipeline with TFX."""
  # Brings data into the pipeline or otherwise joins/converts training data.
  example_gen = tfx.components.CsvExampleGen(input_base=data_root)

  # Computes statistics over data for visualization and example validation.
  statistics_gen = tfx.components.StatisticsGen(
      examples=example_gen.outputs['examples'])

  # Import the schema.
  schema_importer = tfx.dsl.Importer(
      source_uri=schema_path,
      artifact_type=tfx.types.standard_artifacts.Schema).with_id(
          'schema_importer')

  # Performs anomaly detection based on statistics and data schema.
  example_validator = tfx.components.ExampleValidator(
      statistics=statistics_gen.outputs['statistics'],
      schema=schema_importer.outputs['result'])

  # NEW: Transforms input data using preprocessing_fn in the 'module_file'.
  transform = tfx.components.Transform(
      examples=example_gen.outputs['examples'],
      schema=schema_importer.outputs['result'],
      materialize=False,
      module_file=module_file)

  # Uses user-provided Python function that trains a model.
  trainer = tfx.components.Trainer(
      module_file=module_file,
      examples=example_gen.outputs['examples'],

      # NEW: Pass transform_graph to the trainer.
      transform_graph=transform.outputs['transform_graph'],

      train_args=tfx.proto.TrainArgs(num_steps=100),
      eval_args=tfx.proto.EvalArgs(num_steps=5))


  # NEW: Uses TFMA to compute evaluation statistics over features of a model and
  #   perform quality validation of a candidate model (compared to a baseline).
  eval_config = tfma.EvalConfig(
      model_specs=[tfma.ModelSpec(label_key='species')],
      slicing_specs=[
          # An empty slice spec means the overall slice, i.e. the whole dataset.
          tfma.SlicingSpec(),
          # Calculate metrics for each penguin species.
          tfma.SlicingSpec(feature_keys=['species']),
          ],
      metrics_specs=[
          tfma.MetricsSpec(per_slice_thresholds={
              'sparse_categorical_accuracy':
                  tfma.PerSliceMetricThresholds(thresholds=[
                      tfma.PerSliceMetricThreshold(
                          slicing_specs=[tfma.SlicingSpec()],
                          threshold=tfma.MetricThreshold(
                              value_threshold=tfma.GenericValueThreshold(
                                   lower_bound={'value': 0.6}),
                              # Change threshold will be ignored if there is no
                              # baseline model resolved from MLMD (first run).
                              change_threshold=tfma.GenericChangeThreshold(
                                  direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                                  absolute={'value': -1e-10}))
                       )]),
          })],
      )
  
  # Create a custom Model artifact pointing to the saved_model.pb file
  custom_model_artifact = create_custom_model_artifact(model_artifacts[0].uri)

    # Create a Model channel pointing to the custom Model artifact
  model_channel = Channel(type=Model)
  model_channel.additional_properties = {'custom_model': custom_model_artifact}

  
  evaluator = tfx.components.Evaluator(
      examples=example_gen.outputs['examples'],
      model=trainer.outputs['model'],
      baseline_model=model_channel,
      eval_config=eval_config)


  # Pushes the model to a filesystem destination.
  pusher = tfx.components.Pusher(
      model=trainer.outputs['model'],
      push_destination=tfx.proto.PushDestination(
          filesystem=tfx.proto.PushDestination.Filesystem(
              base_directory=serving_model_dir)))

  components = [
      example_gen,
      statistics_gen,
      schema_importer,
      example_validator,

      transform,  # NEW: Transform component was added to the pipeline.

      trainer,
        
      #model_resolver, # Following two components were added to the pipeline.
      evaluator,

      pusher,
  ]

  return tfx.dsl.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      metadata_connection_config=tfx.orchestration.metadata
      .sqlite_metadata_connection_config(metadata_path),
      components=components)


In [53]:

tfx.orchestration.LocalDagRunner().run(
_create_pipeline(
    pipeline_name=PIPELINE_NAME,
    pipeline_root=PIPELINE_ROOT,
    data_root=FULL_DATA_ROOT,
    schema_path=SCHEMA_PATH,
    module_file=_module_file,
    serving_model_dir=SERVING_MODEL_DIR,
    metadata_path=
    ))

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Generating ephemeral wheel package for '/workspaces/tfx_test_case/penguin_utils.py' (including modules: ['penguin_utils', 'remove_csv']).
INFO:absl:User module package has hash fingerprint version 3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd.
INFO:absl:Executing: ['/usr/local/bin/python', '/tmp/tmp_h9pasz5/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/tmp/tmpiljqgc59', '--dist-dir', '/tmp/tmpyythvbl9']
INFO:absl:Successfully built user code wheel distribution at 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl'; target user module is 'penguin_utils'.
INFO:absl:Full user module path is 'penguin_utils@pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying penguin_utils.py -> build/lib
copying remove_csv.py -> build/lib
installing to /tmp/tmpiljqgc59
running install
running install_lib
copying build/lib/penguin_utils.py -> /tmp/tmpiljqgc59
copying build/lib/remove_csv.py -> /tmp/tmpiljqgc59
running install_egg_info
running egg_info
creating tfx_user_code_Transform.egg-info
writing tfx_user_code_Transform.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Transform.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Transform.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
Copying tfx_user_code_Transform.egg-info to /tmp/tmpiljqgc59/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3.7.egg-info
ru

INFO:absl:Successfully built user code wheel distribution at 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Trainer-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl'; target user module is 'penguin_utils'.
INFO:absl:Full user module path is 'penguin_utils@pipelines/penguin-pipeline2/_wheels/tfx_user_code_Trainer-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl'


running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying penguin_utils.py -> build/lib
copying remove_csv.py -> build/lib
installing to /tmp/tmprvkhezxy
running install
running install_lib
copying build/lib/penguin_utils.py -> /tmp/tmprvkhezxy
copying build/lib/remove_csv.py -> /tmp/tmprvkhezxy
running install_egg_info
running egg_info
creating tfx_user_code_Trainer.egg-info
writing tfx_user_code_Trainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Trainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Trainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
Copying tfx_user_code_Trainer.egg-info to /tmp/tmprvkhezxy/tfx_user_code_Trainer-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3.7.egg-info
running install_scri

INFO:absl:Using deployment config:
 executor_specs {
  key: "CsvExampleGen"
  value {
    beam_executable_spec {
      python_executor_spec {
        class_path: "tfx.components.example_gen.csv_example_gen.executor.Executor"
      }
    }
  }
}
executor_specs {
  key: "Evaluator"
  value {
    beam_executable_spec {
      python_executor_spec {
        class_path: "tfx.components.evaluator.executor.Executor"
      }
    }
  }
}
executor_specs {
  key: "ExampleValidator"
  value {
    python_class_executable_spec {
      class_path: "tfx.components.example_validator.executor.Executor"
    }
  }
}
executor_specs {
  key: "Pusher"
  value {
    python_class_executable_spec {
      class_path: "tfx.components.pusher.executor.Executor"
    }
  }
}
executor_specs {
  key: "StatisticsGen"
  value {
    beam_executable_spec {
      python_executor_spec {
        class_path: "tfx.components.statistics_gen.executor.Executor"
      }
    }
  }
}
executor_specs {
  key: "Trainer"
  value {
    pyt

Processing ./pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl


You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.
INFO:absl:Successfully installed 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl'.
INFO:absl:udf_utils.get_fn {'module_file': None, 'module_path': 'penguin_utils@pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl', 'stats_options_updater_fn': None} 'stats_options_updater_fn'
INFO:absl:Installing 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['/usr/local/bin/python', '-m', 'pip', 'install', '--target', '/tmp/tmpelg7psb4', 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py

Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd
Processing ./pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl


You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.
INFO:absl:Successfully installed 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl'.
INFO:absl:Installing 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['/usr/local/bin/python', '-m', 'pip', 'install', '--target', '/tmp/tmpsf99j8pr', 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl']


Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd
Processing ./pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl


You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.
INFO:absl:Successfully installed 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl'.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature island has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature sex has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.


Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd


INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature island has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature sex has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature island has a shape dim 

INFO:tensorflow:Assets written to: pipelines/penguin-pipeline2/Transform/transform_graph/19/.temp_path/tftransform_tmp/f4b979cf6e88433495b45a0b02e4c50f/assets


INFO:tensorflow:Assets written to: pipelines/penguin-pipeline2/Transform/transform_graph/19/.temp_path/tftransform_tmp/f4b979cf6e88433495b45a0b02e4c50f/assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: pipelines/penguin-pipeline2/Transform/transform_graph/19/.temp_path/tftransform_tmp/7470e309f32541acb470465748d643c1/assets


INFO:tensorflow:Assets written to: pipelines/penguin-pipeline2/Transform/transform_graph/19/.temp_path/tftransform_tmp/7470e309f32541acb470465748d643c1/assets
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Settin

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Settin

Processing ./pipelines/penguin-pipeline2/_wheels/tfx_user_code_Trainer-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl


You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.
INFO:absl:Successfully installed 'pipelines/penguin-pipeline2/_wheels/tfx_user_code_Trainer-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd-py3-none-any.whl'.
INFO:absl:Training model.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature island has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature sex has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.


Installing collected packages: tfx-user-code-Trainer
Successfully installed tfx-user-code-Trainer-0.0+3b422019575624088b5efebea0459843365c43d5a8dfef8bb203414b59b050cd
INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature island has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature sex has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Model: "model_2"
INFO:absl:__________________________________________________________________________________________________
INFO:absl: Layer (type)                   Output Shape         Param #     Connected to                     
INFO:absl: culmen_length_mm (InputLayer)  [(None, 1)]          0           []                            



INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature island has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature sex has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.


INFO:tensorflow:Assets written to: pipelines/penguin-pipeline2/Trainer/model/21/Format-Serving/assets


INFO:tensorflow:Assets written to: pipelines/penguin-pipeline2/Trainer/model/21/Format-Serving/assets
INFO:absl:Training complete. Model written to pipelines/penguin-pipeline2/Trainer/model/21/Format-Serving. ModelRun written to pipelines/penguin-pipeline2/Trainer/model_run/21
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 21 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model_run': [Artifact(artifact: uri: "pipelines/penguin-pipeline2/Trainer/model_run/21"
, artifact_type: name: "ModelRun"
)], 'model': [Artifact(artifact: uri: "pipelines/penguin-pipeline2/Trainer/model/21"
, artifact_type: name: "Model"
base_type: MODEL
)]}) for execution 21
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component Trainer is finished.
INFO:absl:Component Evaluator is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.evaluator.component.Evaluator"
 

ValueError: There can be only one baseline model, there are 3.

In [None]:
# List files in created model directory.
!find {SERVING_MODEL_DIR}


In [None]:
!saved_model_cli show --dir {SERVING_MODEL_DIR}/$(ls -1 {SERVING_MODEL_DIR} | sort -nr | head -1) --tag_set serve --signature_def serving_default


In [None]:
# Find a model with the latest and oldest timestamp.
model_dirs = (item for item in os.scandir(SERVING_MODEL_DIR) if item.is_dir())
print('model_dirs ', (item for item in os.scandir(SERVING_MODEL_DIR) if item.is_dir()))
model_path_new = max(model_dirs, key=lambda i: int(i.name)).path

model_dirs = (item for item in os.scandir(SERVING_MODEL_DIR) if item.is_dir())
model_path_old = min(model_dirs, key=lambda i: int(i.name)).path
print('max value ', model_path_new, ' min value ', model_path_old)
loaded_model_new = tf.keras.models.load_model(model_path_new)
loaded_model_old = tf.keras.models.load_model(model_path_old)
inference_fn_new = loaded_model_new.signatures['serving_default']
inference_fn_old = loaded_model_old.signatures['serving_default']


In [None]:
# Prepare an example and run inference.

#Chinstrap test
features = {
  'culmen_length_mm': tf.train.Feature(float_list=tf.train.FloatList(value=[46.3])),
  'culmen_depth_mm': tf.train.Feature(float_list=tf.train.FloatList(value=[17.5])),
  'flipper_length_mm': tf.train.Feature(int64_list=tf.train.Int64List(value=[187])),
  'body_mass_g': tf.train.Feature(int64_list=tf.train.Int64List(value=[3200])),
}
example_proto = tf.train.Example(features=tf.train.Features(feature=features))
examples = example_proto.SerializeToString()

result_new = inference_fn_new(examples=tf.constant([examples]))
print('Chinstrap test: ')
print('Model with incomplete dataset result: ', result_new['output_0'].numpy())

result_old = inference_fn_old(examples=tf.constant([examples]))
print('Model with full dataset result: ', result_old['output_0'].numpy())
 
#Adelie test
features = {
  'culmen_length_mm': tf.train.Feature(float_list=tf.train.FloatList(value=[34.9])),
  'culmen_depth_mm': tf.train.Feature(float_list=tf.train.FloatList(value=[17.5])),
  'flipper_length_mm': tf.train.Feature(int64_list=tf.train.Int64List(value=[190])),
  'body_mass_g': tf.train.Feature(int64_list=tf.train.Int64List(value=[3723])),
}
example_proto = tf.train.Example(features=tf.train.Features(feature=features))
examples = example_proto.SerializeToString()
print('Adelie test: ')
result_new = inference_fn_new(examples=tf.constant([examples]))
print('Model with incomplete dataset result: ', result_new['output_0'].numpy())

result_old = inference_fn_old(examples=tf.constant([examples]))
print('Model with full dataset result: ', result_old['output_0'].numpy())

#Gentoo test 
features = {
  'culmen_length_mm': tf.train.Feature(float_list=tf.train.FloatList(value=[49.5])),
  'culmen_depth_mm': tf.train.Feature(float_list=tf.train.FloatList(value=[16.5])),
  'flipper_length_mm': tf.train.Feature(int64_list=tf.train.Int64List(value=[227])),
  'body_mass_g': tf.train.Feature(int64_list=tf.train.Int64List(value=[6100])),
}
example_proto = tf.train.Example(features=tf.train.Features(feature=features))
examples = example_proto.SerializeToString()
print('Gentoo test: ')
result_new = inference_fn_new(examples=tf.constant([examples]))
print('Model with incomplete dataset result: ', result_new['output_0'].numpy())

result_old = inference_fn_old(examples=tf.constant([examples]))
print('Model with full dataset result: ', result_old['output_0'].numpy())

In [None]:
!head{_data_filepath}

### MLMD DATABASE QUERY

In [None]:
import os
import tempfile
import urllib
import pandas as pd

import tensorflow_model_analysis as tfma
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext



from tfx import v1 as tfx
print('TFX version: {}'.format(tfx.__version__))
import ml_metadata as mlmd
print('MLMD version: {}'.format(mlmd.__version__))


In [None]:
connection_config = tfx.orchestration.metadata.sqlite_metadata_connection_config(METADATA_PATH)
store = mlmd.MetadataStore(connection_config)

# All TFX artifacts are stored in the base directory
base_dir = connection_config.sqlite.filename_uri.split('metadata.sqlite')[0]


In [None]:
def display_types(types):
  # Helper function to render dataframes for the artifact and execution types
  table = {'id': [], 'name': []}
  for a_type in types:
    table['id'].append(a_type.id)
    table['name'].append(a_type.name)
  return pd.DataFrame(data=table)


In [None]:
def display_artifacts(store, artifacts):
  # Helper function to render dataframes for the input artifacts
  table = {'artifact id': [], 'type': [], 'uri': []}
  for a in artifacts:
    table['artifact id'].append(a.id)
    artifact_type = store.get_artifact_types_by_id([a.type_id])[0]
    table['type'].append(artifact_type.name)
    table['uri'].append(a.uri.replace(base_dir, './'))
  return pd.DataFrame(data=table)


In [None]:
def display_properties(store, node):
  # Helper function to render dataframes for artifact and execution properties
  table = {'property': [], 'value': []}
  for k, v in node.properties.items():
    table['property'].append(k)
    table['value'].append(
        v.string_value if v.HasField('string_value') else v.int_value)
  for k, v in node.custom_properties.items():
    table['property'].append(k)
    table['value'].append(
        v.string_value if v.HasField('string_value') else v.int_value)
  return pd.DataFrame(data=table)


In [None]:
display_types(store.get_artifact_types())


In [None]:
pushed_models = store.get_artifacts_by_type("PushedModel")
display_artifacts(store, pushed_models)


In [None]:
pushed_model = pushed_models[-1]
display_properties(store, pushed_model)


In [None]:
def get_one_hop_parent_artifacts(store, artifacts):
  # Get a list of artifacts within a 1-hop of the artifacts of interest
  artifact_ids = [artifact.id for artifact in artifacts]
  executions_ids = set(
      event.execution_id
      for event in store.get_events_by_artifact_ids(artifact_ids)
      if event.type == mlmd.proto.Event.OUTPUT)
  artifacts_ids = set(
      event.artifact_id
      for event in store.get_events_by_execution_ids(executions_ids)
      if event.type == mlmd.proto.Event.INPUT)
  return [artifact for artifact in store.get_artifacts_by_id(artifacts_ids)]


In [None]:
parent_artifacts = get_one_hop_parent_artifacts(store, [pushed_model])
display_artifacts(store, parent_artifacts)


In [None]:
exported_model = parent_artifacts[0]
display_properties(store, exported_model)


In [None]:
model_parents = get_one_hop_parent_artifacts(store, [exported_model])
display_artifacts(store, model_parents)


In [None]:
used_data = model_parents[0]
display_properties(store, used_data)


In [None]:
display_types(store.get_execution_types())


In [None]:
def find_producer_execution(store, artifact):
  executions_ids = set(
      event.execution_id
      for event in store.get_events_by_artifact_ids([artifact.id])
      if event.type == mlmd.proto.Event.OUTPUT)
  return store.get_executions_by_id(executions_ids)[0]

trainer = find_producer_execution(store, exported_model)
display_properties(store, trainer)
