# Imports

In [None]:
%pip install tensorflow-data-validation
%pip install -q tensorflow_data_validation[visualization]
%pip install tfx

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import pickle
import tempfile
import tensorflow_data_validation as tfdv
import tensorflow as tf

np.set_printoptions(threshold=sys.maxsize)
print('TFDV version: {}'.format(tfdv.version.__version__))
print('TF version: {}'.format(tf.__version__))

# Data Analysis

#### Load and display data

In [None]:
DATA = './data'
TRAIN_DATA = os.path.join(DATA, 'train.csv')
TEST_DATA = os.path.join(DATA, 'test.csv')
OUTPUT = './output'

In [None]:
train_df = pd.read_csv(TRAIN_DATA, sep=";")    
test_df = pd.read_csv(TEST_DATA, sep=";")
display(train_df)
display(test_df)

#### Generate Statistics

In [None]:
%%capture
import tensorflow_data_validation as tfdv
print('TFDV version: {}'.format(tfdv.version.__version__))
train_stats = tfdv.generate_statistics_from_dataframe(train_df)
test_stats = tfdv.generate_statistics_from_dataframe(test_df)

In [None]:
tfdv.visualize_statistics(train_stats)
tfdv.visualize_statistics(lhs_statistics=train_stats,
                         rhs_statistics=test_stats)

#### Infer schema and detect anomalies

In [None]:
schema = tfdv.infer_schema(train_stats)
tfdv.display_schema(schema)

In [None]:
from tensorflow_metadata.proto.v0 import schema_pb2

# Create schema environments and remove the label from the testing environment so it is not detected as an anomaly in the test set
schema.default_environment.append('TRAINING')
schema.default_environment.append('TESTING')

tfdv.get_feature(schema, 'EXTRA_BAGGAGE').not_in_environment.append('TESTING')

# Generate new statistics based on schema
stats_options = tfdv.StatsOptions(schema=schema, infer_type_from_schema=True)
stats_options.label_feature = 'EXTRA_BAGGAGE'
train_stats = tfdv.generate_statistics_from_dataframe(
    train_df,
    stats_options=stats_options,
)

# Check for anomalies in the test statistics
anomalies = tfdv.validate_statistics(test_stats, schema, environment='TESTING')
tfdv.display_anomalies(anomalies)

#options = tfdv.StatsOptions(schema=schema)
#anomalous_example_stats = tfdv.validate_examples_in_csv(data_location=TRAIN_DATA, stats_options=options)



#### Checking data skew and drift

In [None]:
tfdv.get_feature(schema, 'WEBSITE').skew_comparator.infinity_norm.threshold = 0.01
skew_anomalies = tfdv.validate_statistics(statistics=train_stats, schema=schema, serving_statistics=test_stats)
tfdv.display_anomalies(skew_anomalies)

#### Generate statistics on data slices

In [None]:
from tensorflow_data_validation.utils import slicing_util
slice_fn =  slicing_util.get_feature_value_slicer(features={'DEVICE': 'COMPUTER'})
stats_options = tfdv.StatsOptions(slice_functions=[slice_fn])

train_stats = tfdv.generate_statistics_from_dataframe(
    train_df,
    stats_options=stats_options,
)

tfdv.visualize_statistics(train_stats)


# Data preprocessing

In [None]:
%pip install -U tensorflow-transform
%pip install pyarrow

In [208]:
import os
PIPELINE_NAME = "extra-baggage"
DATA_ROOT = "train-data"

# Output directory to store artifacts generated from the pipeline.
PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)
# Path to a SQLite DB file to use as an MLMD storage.
METADATA_PATH = os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
# Output directory where created models from the pipeline will be exported.
SERVING_MODEL_DIR = os.path.join('serving_model', PIPELINE_NAME)

from absl import logging

logging.set_verbosity(logging.FATAL)  # Set default logging level.

In [209]:
_transform_module_file = 'transform.py' 

In [210]:

%%writefile {_transform_module_file}
import tensorflow as tf
import tensorflow_transform as tft

def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
  
  # Number of vocabulary terms used for encoding VOCAB_FEATURES by tf.transform
  _VOCAB_SIZE = 1000
  # Count of out-of-vocab buckets in which unrecognized VOCAB_FEATURES are hashed.
  _OOV_SIZE = 10
  # Number of buckets used by tf.transform for encoding each feature.
  _FEATURE_BUCKET_COUNT = 10

  _FEATURE_KEYS = ['DEPARTURE','ARRIVAL']

  _VOCAB_FEATURE_KEYS = ['DEPARTURE', 'ARRIVAL', 'EXTRA_BAGGAGE']

  _CATEGORICAL_FEATURE_KEYS = []

  _DENSE_FLOAT_FEATURE_KEYS = []

  _BUCKET_FEATURE_KEYS = []

  _LABEL_KEY = 'EXTRA_BAGGAGE'
  outputs = {}
  for key in _DENSE_FLOAT_FEATURE_KEYS:
    # If sparse make it dense, setting nan's to 0 or '', and apply zscore.
    outputs[key] = tft.scale_to_z_score(
        _fill_in_missing(inputs[key]))

  for key in _VOCAB_FEATURE_KEYS:
    # Build a vocabulary for this feature.
    outputs[key] = tft.compute_and_apply_vocabulary(
            inputs[key],
            top_k=_VOCAB_SIZE,
            num_oov_buckets=_OOV_SIZE)

  for key in _BUCKET_FEATURE_KEYS:
    outputs[key] = tft.bucketize(
              inputs[key], 
              _FEATURE_BUCKET_COUNT)

  for key in _CATEGORICAL_FEATURE_KEYS:
    outputs[key] = inputs[key]  

  return outputs  
    

def _fill_in_missing(x):
  """Replace missing values in a SparseTensor.
  Fills in missing values of `x` with '' or 0, and converts to a dense tensor.
  Args:
    x: A `SparseTensor` of rank 2.  Its dense shape should have size at most 1
      in the second dimension.
  Returns:
    A rank 1 tensor where missing values of `x` have been filled in.
  """
  if not isinstance(x, tf.sparse.SparseTensor):
    return x

  default_value = '' if x.dtype == tf.string else 0
  return tf.squeeze(
      tf.sparse.to_dense(
          tf.SparseTensor(x.indices, x.values, [x.dense_shape[0], 1]),
          default_value),
      axis=1)

Overwriting transform.py


In [211]:
_trainer_module_file = 'extra_baggage_trainer.py'

In [212]:
%%writefile {_trainer_module_file}

from typing import List
import tensorflow_transform as tft
from tensorflow import keras
from tensorflow_transform.tf_metadata import schema_utils

import tensorflow as tf
from tfx import v1 as tfx
from tfx_bsl.public import tfxio
from tensorflow_metadata.proto.v0 import schema_pb2

from tfx.components.trainer.fn_args_utils import DataAccessor
from tfx.components.trainer.fn_args_utils import FnArgs
from tfx_bsl.tfxio import dataset_options

_TRAIN_BATCH_SIZE = 20
_EVAL_BATCH_SIZE = 10

_FEATURE_KEYS = ['DEPARTURE', 'ARRIVAL']

_LABEL_KEY = 'EXTRA_BAGGAGE'


def _apply_preprocessing(raw_features, tft_layer):
  transformed_features = tft_layer(raw_features)
  if _LABEL_KEY in raw_features:
    transformed_label = transformed_features.pop(_LABEL_KEY)
    return transformed_features, transformed_label
  else:
    return transformed_features, None

def _get_serve_rest_fn(model, tf_transform_output):
  
  model.tft_layer = tf_transform_output.transform_features_layer()

  @tf.function(input_signature=[
      tf.TensorSpec(shape=(None,1), dtype=tf.string, name='departure'),
      tf.TensorSpec(shape=(None,1), dtype=tf.string, name='arrival'),
  ])
  def serve_rest_fn(x0, x1):
    # Run inference with ML model.    
    transformed_features, _ = _apply_preprocessing({
                                                  'DEPARTURE': x0,
                                                  'ARRIVAL': x1,
                                                  },
                                                   model.tft_layer)
    print(transformed_features)
    return model(transformed_features)

  return serve_rest_fn

def _get_serve_tf_examples_fn(model, tf_transform_output):
  
  model.tft_layer = tf_transform_output.transform_features_layer()

  @tf.function(input_signature=[
      tf.TensorSpec(shape=[None], dtype=tf.string, name='examples')
  ])
  def serve_tf_examples_fn(serialized_tf_examples):
    # Expected input is a string which is serialized tf.Example format.
    feature_spec = tf_transform_output.raw_feature_spec()
    
    # Because input schema includes unnecessary fields like 'species' and
    # 'island', we filter feature_spec to include required keys only.
    required_feature_spec = {
        k: v for k, v in feature_spec.items() if k in _FEATURE_KEYS
    }
    parsed_features = tf.io.parse_example(serialized_tf_examples,
                                          required_feature_spec)

    # Preprocess parsed input with transform operation defined in
    # preprocessing_fn().
    transformed_features, _ = _apply_preprocessing(parsed_features,
                                                   model.tft_layer)
    # Run inference with ML model.
    return model(transformed_features)

  return serve_tf_examples_fn

    
def _input_fn(file_pattern: List[str],
              data_accessor: DataAccessor,
              tf_transform_output: tft.TFTransformOutput,
              batch_size: int = 200) -> tf.data.Dataset:
  """Generates features and label for training.

  Args:
    file_pattern: List of paths or patterns of input tfrecord files.
    data_accessor: DataAccessor for converting input to RecordBatch.
    schema: schema of the input data.
    batch_size: representing the number of consecutive elements of returned
      dataset to combine in a single batch

  Returns:
    A dataset that contains (features, indices) tuple where features is a
      dictionary of Tensors, and indices is a single Tensor of label indices.
  """
  
  dataset = data_accessor.tf_dataset_factory(
      file_pattern,
      dataset_options.TensorFlowDatasetOptions(
          batch_size=batch_size),
      tf_transform_output.raw_metadata.schema).repeat()

  transform_layer = tf_transform_output.transform_features_layer()
  
  def apply_transform(raw_features):    
    return _apply_preprocessing(raw_features, transform_layer)

  return dataset.map(apply_transform).repeat()


def _build_keras_model() -> tf.keras.Model:
  """Creates a DNN Keras model for classifying booking data.

  Returns:
    A Keras Model.
  """
  # The model below is built with Functional API, please refer to
  # https://www.tensorflow.org/guide/keras/overview for all API options.
  inputs = [keras.layers.Input(shape=(1,1), name=f) for f in _FEATURE_KEYS]
  d = keras.layers.concatenate(inputs)
  for _ in range(2):
    d = keras.layers.Dense(8, activation='relu')(d)
  outputs = keras.layers.Dense(1, activation='sigmoid')(d)

  model = keras.Model(inputs=inputs, outputs=outputs)
  model.compile(
      optimizer=keras.optimizers.Adam(1e-3),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=[keras.metrics.Accuracy()])

  model.summary(print_fn=logging.info)
  return model

# TFX Trainer will call this function.
def run_fn(fn_args: FnArgs):
  """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """  
  tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")
  tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
  
  train_dataset = _input_fn(
      fn_args.train_files,
      fn_args.data_accessor,
      tf_transform_output,
      batch_size=_TRAIN_BATCH_SIZE)  

  model = _build_keras_model()
  model.fit(
      train_dataset,
      steps_per_epoch=fn_args.train_steps,
      validation_steps=fn_args.eval_steps,
      callbacks=[tensorboard_callback])

  # The result of the training should be saved in `fn_args.serving_model_dir`
  # directory.
 
  signatures = {
      'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output),
      'serving_rest': _get_serve_rest_fn(model, tf_transform_output),
  }
  model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)

Overwriting extra_baggage_trainer.py


In [213]:
from tfx import v1 as tfx
def _import_schema(schema_path=''):
  tfx.dsl.Importer(
      source_uri=schema_path,
      artifact_type=tfx.types.standard_artifacts.Schema).with_id(
          'schema_importer')

def _create_pipeline(pipeline_name: str, 
                     pipeline_root: str, 
                     data_root: str, 
                     schema_path: str,
                     module_file: str, 
                     transform_module_file: str, 
                     serving_model_dir: str,
                     metadata_path: str) -> tfx.dsl.Pipeline:

  """Creates a pipeline with TFX."""
  # Brings data into the pipeline.
  example_gen = tfx.components.CsvExampleGen(input_base=data_root)

  stats_gen = tfx.components.StatisticsGen(examples=example_gen.outputs['examples'])

  #schema_importer = tfx.dsl.Importer(
  #    source_uri=schema_path,
  #    artifact_type=tfx.types.standard_artifacts.Schema).with_id(
  #        'schema_importer')
  schema_importer= tfx.components.SchemaGen(
      statistics=stats_gen.outputs['statistics'], infer_feature_shape=True)

  example_validator = tfx.components.ExampleValidator(
    statistics=stats_gen.outputs['statistics'],
    schema=schema_importer.outputs['schema'])

  transform = tfx.components.Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_importer.outputs['schema'],
    module_file=_transform_module_file)

  # Uses user-provided Python function that trains a model.
  trainer = tfx.components.Trainer(
      module_file=module_file,
      examples=example_gen.outputs['examples'],
      schema=schema_importer.outputs['schema'],
      transform_graph=transform.outputs['transform_graph'],
      train_args=tfx.proto.TrainArgs(num_steps=100),
      eval_args=tfx.proto.EvalArgs(num_steps=5))  

  # Pushes the model to a filesystem destination.
  pusher = tfx.components.Pusher(
      model=trainer.outputs['model'],
      push_destination=tfx.proto.PushDestination(
          filesystem=tfx.proto.PushDestination.Filesystem(
              base_directory=serving_model_dir)))

  # Following three components will be included in the pipeline.
  components = [
      example_gen,
      stats_gen,
      schema_importer,
      example_validator,

      transform,  # NEW: Transform component was added to the pipeline.

      trainer,
      pusher,
  ]

  return tfx.dsl.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      metadata_connection_config=tfx.orchestration.metadata
      .sqlite_metadata_connection_config(metadata_path),
      components=components)

In [214]:
tfx.orchestration.LocalDagRunner().run(
  _create_pipeline(
      pipeline_name=PIPELINE_NAME,
      pipeline_root=PIPELINE_ROOT,
      data_root=DATA_ROOT,
      schema_path='',
      module_file =_trainer_module_file,
      transform_module_file =_transform_module_file,
      serving_model_dir=SERVING_MODEL_DIR,
      metadata_path=METADATA_PATH))

running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying extra_baggage_trainer.py -> build/lib
copying transform.py -> build/lib




installing to /var/folders/tz/gyhk2p3j6hx4bmdqw8bkql840000gn/T/tmpxez9e0st
running install
running install_lib
copying build/lib/extra_baggage_trainer.py -> /var/folders/tz/gyhk2p3j6hx4bmdqw8bkql840000gn/T/tmpxez9e0st
copying build/lib/transform.py -> /var/folders/tz/gyhk2p3j6hx4bmdqw8bkql840000gn/T/tmpxez9e0st
running install_egg_info
running egg_info
creating tfx_user_code_Transform.egg-info
writing tfx_user_code_Transform.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Transform.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Transform.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
Copying tfx_user_code_Transform.egg-info to /var/folders/tz/gyhk2p3j6hx4bmdqw8bkql840000gn/T/tmpxez9e0st/tfx_user_code_Transform-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89



installing to /var/folders/tz/gyhk2p3j6hx4bmdqw8bkql840000gn/T/tmpv6hh9vps
running install
running install_lib
copying build/lib/extra_baggage_trainer.py -> /var/folders/tz/gyhk2p3j6hx4bmdqw8bkql840000gn/T/tmpv6hh9vps
copying build/lib/transform.py -> /var/folders/tz/gyhk2p3j6hx4bmdqw8bkql840000gn/T/tmpv6hh9vps
running install_egg_info
running egg_info
creating tfx_user_code_Trainer.egg-info
writing tfx_user_code_Trainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Trainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Trainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
Copying tfx_user_code_Trainer.egg-info to /var/folders/tz/gyhk2p3j6hx4bmdqw8bkql840000gn/T/tmpv6hh9vps/tfx_user_code_Trainer-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986-py3.9.egg-

E0819 19:09:30.759780000 4701634048 fork_posix.cc:76]                  Other threads are currently calling into gRPC, skipping fork() handlers


Processing ./pipelines/extra-baggage/_wheels/tfx_user_code_Transform-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986




Processing ./pipelines/extra-baggage/_wheels/tfx_user_code_Transform-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986




Processing ./pipelines/extra-baggage/_wheels/tfx_user_code_Transform-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986-py3-none-any.whl
Installing collected packages: tfx-user-code-Transform
Successfully installed tfx-user-code-Transform-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986




INFO:tensorflow:Assets written to: pipelines/extra-baggage/Transform/transform_graph/969/.temp_path/tftransform_tmp/7a77702d8c4c457fb11aef2ccf49d529/assets


INFO:tensorflow:Assets written to: pipelines/extra-baggage/Transform/transform_graph/969/.temp_path/tftransform_tmp/7a77702d8c4c457fb11aef2ccf49d529/assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: pipelines/extra-baggage/Transform/transform_graph/969/.temp_path/tftransform_tmp/224b431bd69b4e38b58aa26219d5e556/assets


INFO:tensorflow:Assets written to: pipelines/extra-baggage/Transform/transform_graph/969/.temp_path/tftransform_tmp/224b431bd69b4e38b58aa26219d5e556/assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
E0819 19:10:55.098000000 4701634048 fork_posix.cc:76]                  Other threads are currently calling into gRPC, skipping fork() handlers


Processing ./pipelines/extra-baggage/_wheels/tfx_user_code_Trainer-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986-py3-none-any.whl
Installing collected packages: tfx-user-code-Trainer
Successfully installed tfx-user-code-Trainer-0.0+1ff575ff9b7ee2eedd55ab03c1bcfd9b80ddc148453b0fd9f02084e89aa76986




INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


{'DEPARTURE': <tf.Tensor 'transform_features_layer_26/StatefulPartitionedCall:1' shape=(None, 1) dtype=int64>, 'ARRIVAL': <tf.Tensor 'transform_features_layer_26/StatefulPartitionedCall:0' shape=(None, 1) dtype=int64>}
INFO:tensorflow:Assets written to: pipelines/extra-baggage/Trainer/model/970/Format-Serving/assets


INFO:tensorflow:Assets written to: pipelines/extra-baggage/Trainer/model/970/Format-Serving/assets


In [186]:
from ml_metadata.proto import metadata_store_pb2
# Non-public APIs, just for showcase.
from tfx.orchestration.portable.mlmd import execution_lib

# TODO(b/171447278): Move these functions into the TFX library.

def get_latest_artifacts(metadata, pipeline_name, component_id):
  """Output artifacts of the latest run of the component."""
  context = metadata.store.get_context_by_type_and_name(
      'node', f'{pipeline_name}.{component_id}')
  executions = metadata.store.get_executions_by_context(context.id)
  latest_execution = max(executions,
                         key=lambda e:e.last_update_time_since_epoch)
  return execution_lib.get_artifacts_dict(metadata, latest_execution.id,
                                          [metadata_store_pb2.Event.OUTPUT])

# Non-public APIs, just for showcase.
from tfx.orchestration.experimental.interactive import visualizations

def visualize_artifacts(artifacts):
  """Visualizes artifacts using standard visualization modules."""
  for artifact in artifacts:
    visualization = visualizations.get_registry().get_visualization(
        artifact.type_name)
    if visualization:
      visualization.display(artifact)

from tfx.orchestration.experimental.interactive import standard_visualizations
standard_visualizations.register_standard_visualizations()

In [192]:
# Non-public APIs, just for showcase.
from tfx.orchestration.metadata import Metadata
from tfx.types import standard_component_specs
SCHEMA_METADATA_PATH = METADATA_PATH
metadata_connection_config = tfx.orchestration.metadata.sqlite_metadata_connection_config(
    SCHEMA_METADATA_PATH)

with Metadata(metadata_connection_config) as metadata_handler:
  # Find output artifacts from MLMD.
  stat_gen_output = get_latest_artifacts(metadata_handler, PIPELINE_NAME,
                                         'StatisticsGen')
  stats_artifacts = stat_gen_output[standard_component_specs.STATISTICS_KEY]

  schema_gen_output = get_latest_artifacts(metadata_handler,
                                           PIPELINE_NAME, 'SchemaGen')
  schema_artifacts = schema_gen_output[standard_component_specs.SCHEMA_KEY]

  transform_gen_output = get_latest_artifacts(metadata_handler,
                                           PIPELINE_NAME, 'Transform')
  transform_artifacts = transform_gen_output[standard_component_specs.TRANSFORM_GRAPH_KEY]

  trainer_gen_output = get_latest_artifacts(metadata_handler,
                                           PIPELINE_NAME, 'Trainer')
  trainer_artifacts = trainer_gen_output[standard_component_specs.TRAIN_ARGS_KEY]

  #ev_output = get_latest_artifacts(metadata_handler, PIPELINE_NAME,
  #                                 'ExampleValidator')
  #anomalies_artifacts = ev_output[standard_component_specs.ANOMALIES_KEY]

In [193]:
visualize_artifacts(trainer_artifacts)
print(trainer_artifacts)

[]


In [None]:
import shutil

_schema_filename = 'schema.pbtxt'
SCHEMA_PATH = 'schema'

os.makedirs(SCHEMA_PATH, exist_ok=True)
_generated_path = os.path.join(schema_artifacts[0].uri, _schema_filename)

# Copy the 'schema.pbtxt' file from the artifact uri to a predefined path.
shutil.copy(_generated_path, SCHEMA_PATH)

In [None]:
import tensorflow as tf
import json

model_dirs = (item for item in os.scandir(SERVING_MODEL_DIR) if item.is_dir())

model_path = max(model_dirs, key=lambda i: int(i.name)).path
loaded_model = tf.saved_model.load(model_path)
inference_fn = loaded_model.signatures['serving_default']

features = {
      "ARRIVAL": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"22/July"])),
      "DEPARTURE": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"22/July"])),
      "DISTANCE": tf.train.Feature(float_list=tf.train.FloatList(value=[3206.92])),
      "ADULTS": tf.train.Feature(int64_list=tf.train.Int64List(value=[1])),
      "CHILDREN": tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
      "INFANTS": tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
      "TRAIN": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"FALSE"])),
      "GDS": tf.train.Feature(int64_list=tf.train.Int64List(value=[1])),
      "TRIP_TYPE": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"TRIP_TYPE"])),
      "HAUL_TYPE": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"DOMESTIC"])),
      "NO_GDS": tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
      "WEBSITE": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"EDES"])),
      "PRODUCT": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"TRIP"])),
      "SMS": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"FALSE"])),      
      "DEVICE": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"COMPUTER"])),
    }


example_proto = tf.train.Example(features=tf.train.Features(feature=features))
examples = example_proto.SerializeToString()
examples=tf.constant([examples])
result = inference_fn(examples=examples)
print(result['output_0'].numpy())

In [215]:
import tensorflow as tf
import json

model_dirs = (item for item in os.scandir(SERVING_MODEL_DIR) if item.is_dir())

model_path = max(model_dirs, key=lambda i: int(i.name)).path
loaded_model = tf.saved_model.load(model_path)
inference_fn = loaded_model.signatures['serving_rest']
departure_t = tf.constant("22/jul/22", dtype=tf.string, shape=(1,1))
arrival_t = tf.constant("22/jul/22", dtype=tf.string, shape=(1,1))

result = inference_fn(departure=departure_t, arrival=arrival_t)
print(result['output_0'].numpy())


[[[0.00018514]]]


In [None]:
import tensorflow as tf
try: # detect TPUs
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
  tf.config.experimental_connect_to_cluster(tpu)
  tf.tpu.experimental.initialize_tpu_system(tpu)
  strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError: # detect GPUs
  strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines
  #strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # for clusters of multi-GPU machines

print("Number of accelerators: ", strategy.num_replicas_in_sync)

In [None]:
t = tf.Variable(['a'], dtype=tf.string, shape=(None,1))
print(t.shape)
fill_tensor = tf.fill(t.shape, '')
print(fill_tensor)

In [None]:
features = {
      "ARRIVAL": "22/July",
      "TRAIN": "FALSE",
      "CHILDREN": 0,
      "ADULTS": 1,
      "INFANTS": 0,
      "GDS": 1,
      "TRIP_TYPE": "TRIP",
      "DISTANCE": 3206.92,
      "DEVICE": "COMPUTER",
      "NO_GDS": 0,
      "HAUL_TYPE": "DOMESTIC",
      "WEBSITE": "EDES",
      "DEPARTURE": "22/July",
      "PRODUCT": "TRIP",
      "SMS": "FALSE",      
    }

    tf.TensorSpec(shape=[None], dtype=tf.string, name='ARRIVAL'),
      tf.TensorSpec(shape=[None], dtype=tf.string, name='TRAIN'),
      tf.TensorSpec(shape=[None], dtype=tf.int32, name='CHILDREN'),
      tf.TensorSpec(shape=[None], dtype=tf.int32, name='ADULTS'),
      tf.TensorSpec(shape=[None], dtype=tf.int32, name='INFANTS'),
      tf.TensorSpec(shape=[None], dtype=tf.int32, name='GDS'),
      tf.TensorSpec(shape=[None], dtype=tf.string, name='TRIP_TYPE'),
      tf.TensorSpec(shape=[None], dtype=tf.float32, name='DISTANCE'),
      tf.TensorSpec(shape=[None], dtype=tf.string, name='DEVICE'),
      tf.TensorSpec(shape=[None], dtype=tf.int32, name='NO_GDS'),
      tf.TensorSpec(shape=[None], dtype=tf.string, name='HAUL_TYPE'),
      tf.TensorSpec(shape=[None], dtype=tf.int32, name='WEBSITE'),
      tf.TensorSpec(shape=[None], dtype=tf.string, name='DEPARTURE'),
      tf.TensorSpec(shape=[None], dtype=tf.string, name='PRODUCT'),
      tf.TensorSpec(shape=[None], dtype=tf.string, name='SMS'),

In [None]:
f = tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"22/July"]))
features = {
    'ARRIVAL': tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"22/July"]))
}
features=tf.train.Features(feature=features)
example = tf.train.Example(features=features)

print(example)