In [2]:
import tensorflow as tf
print('TensorFlow version: {}'.format(tf.__version__))

from tfx import v1 as tfx
print('TFX version: {}'.format(tfx.__version__))

TensorFlow version: 2.7.0
TFX version: 1.6.0


In [3]:
import os

PIPELINE_NAME = "penguin-simple"

# Output directory to store artifacts generated from the pipeline.
PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)
# Path to a SQLite DB file to use as an MLMD storage.
METADATA_PATH = os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
# Output directory where created models from the pipeline will be exported.
SERVING_MODEL_DIR = os.path.join('serving_model', PIPELINE_NAME)

from absl import logging
logging.set_verbosity(logging.INFO)  # Set default logging level

In [4]:
DATA_ROOT="./data"
_data_filepath = os.path.join(DATA_ROOT, "penguins_processed.csv")

In [5]:
_trainer_module_file = 'penguin_trainer.py'

In [34]:
def _create_pipeline(pipeline_name: str, pipeline_root: str, data_root: str,
                     module_file: str, serving_model_dir: str,
                     metadata_path: str) -> tfx.dsl.Pipeline:
    """Creates a three component penguin pipeline with TFX."""
    # Brings data into the pipeline.
    example_gen = tfx.components.CsvExampleGen(input_base=data_root)

    # Uses user-provided Python function that trains a model.
    trainer = tfx.components.Trainer(
        module_file=module_file,
        examples=example_gen.outputs['examples'],
        train_args=tfx.proto.TrainArgs(num_steps=100),
        eval_args=tfx.proto.EvalArgs(num_steps=5)
    )

    # Pushes the model to a filesystem destination.
    pusher = tfx.components.Pusher(
        model=trainer.outputs['model'],
        push_destination=tfx.proto.PushDestination(
            filesystem=tfx.proto.PushDestination.Filesystem(base_directory=serving_model_dir)
        )
    )

    # Following three components will be included in the pipeline.
    components = [
      example_gen,
      trainer,
      pusher,
    ]

    return tfx.dsl.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        metadata_connection_config=tfx.orchestration.metadata.sqlite_metadata_connection_config(metadata_path),
        components=components
    )

In [35]:
pipeline = _create_pipeline(
    pipeline_name=PIPELINE_NAME,
    pipeline_root=PIPELINE_ROOT,
    data_root=DATA_ROOT,
    module_file=_trainer_module_file,
    serving_model_dir=SERVING_MODEL_DIR,
    metadata_path=METADATA_PATH
)

In [36]:
tfx.orchestration.LocalDagRunner().run(pipeline)

INFO:absl:Generating ephemeral wheel package for '/Users/lxh/private_workspace/demo-tfx-penguin/penguin_trainer.py' (including modules: ['penguin_trainer', 'demo_keras_iris_2', 'demo_keras_iris', 'demo_keras_penguins', 'sample', 'main']).
INFO:absl:User module package has hash fingerprint version 2c8d603f2e364ab2376fa07403cdac6910fa8d410038489579c0d727f82f81ed.
INFO:absl:Executing: ['/Users/lxh/.virtualenv/alisa/bin/python', '/var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmpvmnirg1r/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmp9lpjyzqm', '--dist-dir', '/var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmpsevztmpb']
listing git files failed - pretending there aren't any
INFO:absl:Successfully built user code wheel distribution at 'pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+2c8d603f2e364ab2376fa07403cdac6910fa8d410038489579c0d727f82f81ed-py3-none-any.whl'; target user module is 'penguin_trainer'.
INFO:absl:

INFO:absl:Generating examples.
INFO:absl:Processing input csv data ./data/* to TFExample.


running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying penguin_trainer.py -> build/lib
copying demo_keras_iris_2.py -> build/lib
copying demo_keras_iris.py -> build/lib
copying demo_keras_penguins.py -> build/lib
copying sample.py -> build/lib
copying main.py -> build/lib
installing to /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmp9lpjyzqm
running install
running install_lib
copying build/lib/penguin_trainer.py -> /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmp9lpjyzqm
copying build/lib/demo_keras_iris_2.py -> /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmp9lpjyzqm
copying build/lib/demo_keras_iris.py -> /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmp9lpjyzqm
copying build/lib/demo_keras_penguins.py -> /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmp9lpjyzqm
copying build/lib/sample.py -> /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmp9lpjyzqm
copying build/lib/main.py -> /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/

INFO:absl:Examples generated.
INFO:absl:Value type <class 'NoneType'> of key version in exec_properties is not supported, going to drop it
INFO:absl:Value type <class 'list'> of key _beam_pipeline_args in exec_properties is not supported, going to drop it
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 8 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'examples': [Artifact(artifact: uri: "pipelines/penguin-simple/CsvExampleGen/examples/8"
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:2,total_bytes:51294,xor_checksum:66513830,sum_checksum:3288796102"
  }
}
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2022-02-16T16:09:12.164929:CsvExampleGen:examples:0"
  }
}
custom_properties {
  key: "span"
  value {
    int_value: 0
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.6.0"
 

INFO:absl:Train on the 'train' split when train_args.splits is not set.
INFO:absl:Evaluate on the 'eval' split when eval_args.splits is not set.
INFO:absl:udf_utils.get_fn {'eval_args': '{\n  "num_steps": 5\n}', 'train_args': '{\n  "num_steps": 100\n}', 'module_path': 'penguin_trainer@pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+2c8d603f2e364ab2376fa07403cdac6910fa8d410038489579c0d727f82f81ed-py3-none-any.whl', 'custom_config': 'null'} 'run_fn'
INFO:absl:Installing 'pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+2c8d603f2e364ab2376fa07403cdac6910fa8d410038489579c0d727f82f81ed-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['/Users/lxh/.virtualenv/alisa/bin/python', '-m', 'pip', 'install', '--target', '/var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmp4w2o60go', 'pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+2c8d603f2e364ab2376fa07403cdac6910fa8d410038489579c0d727f82f81ed-py3-none-any.whl']


Processing ./pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+2c8d603f2e364ab2376fa07403cdac6910fa8d410038489579c0d727f82f81ed-py3-none-any.whl
Installing collected packages: tfx-user-code-Trainer
Successfully installed tfx-user-code-Trainer-0.0+2c8d603f2e364ab2376fa07403cdac6910fa8d410038489579c0d727f82f81ed


You should consider upgrading via the '/Users/lxh/.virtualenv/alisa/bin/python -m pip install --upgrade pip' command.
INFO:absl:Successfully installed 'pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+2c8d603f2e364ab2376fa07403cdac6910fa8d410038489579c0d727f82f81ed-py3-none-any.whl'.
INFO:absl:Training model.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
}
. Setting to DenseTensor.
2022-02-16 16:09:16.465783: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations



2022-02-16 16:09:17.559959: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: pipelines/penguin-simple/Trainer/model/9/Format-Serving/assets


INFO:tensorflow:Assets written to: pipelines/penguin-simple/Trainer/model/9/Format-Serving/assets
INFO:absl:Training complete. Model written to pipelines/penguin-simple/Trainer/model/9/Format-Serving. ModelRun written to pipelines/penguin-simple/Trainer/model_run/9
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 9 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model': [Artifact(artifact: uri: "pipelines/penguin-simple/Trainer/model/9"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2022-02-16T16:09:12.164929:Trainer:model:0"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.6.0"
  }
}
, artifact_type: name: "Model"
)], 'model_run': [Artifact(artifact: uri: "pipelines/penguin-simple/Trainer/model_run/9"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2022-02-16T16:09:12.164929:Trainer:model_run:0"
  }
}