# Step 8: Use model to perform inference

Use example data stored on disk to perform inference with your model by sending REST requests to Tesnorflow Serving.

In [0]:
"""A client for serving the chicago_taxi workshop example locally."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import base64
import json
import os
import subprocess
import tempfile

import requests
import tensorflow as tf
import tfx_utils
from tfx.utils import io_utils
from tensorflow_metadata.proto.v0 import schema_pb2

from tensorflow_transform import coders as tft_coders
from tensorflow_transform.tf_metadata import dataset_metadata
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import schema_utils

from google.protobuf import text_format

from tensorflow.python.lib.io import file_io  # pylint: disable=g-direct-tensorflow-import
from tfx.examples.chicago_taxi.trainer import taxi

_INFERENCE_TIMEOUT_SECONDS = 5.0
_PIPELINE_NAME = 'taxi'
_LABEL_KEY = 'tips'

The data that we will use to send requests to our model is stored on disk in [csv](https://en.wikipedia.org/wiki/Comma-separated_values) format; we will convert these examples to [Tensorflow Example](https://www.tensorflow.org/api_docs/python/tf/train/Example) to send to our model being served by Tensorflow Serving.

Construct the following two utility functions:

* `_make_proto_coder` which creates a coder that will decode a single row from the CSV data file and output a tf.transform encoded dict.
* `_make_csv_coder` which creates a coder that will encode a tf.transform encoded dict object into a TF Example.



In [0]:
def _get_raw_feature_spec(schema):
  """Return raw feature spec for a given schema."""
  return schema_utils.schema_as_feature_spec(schema).feature_spec


def _make_proto_coder(schema):
  """Return a coder for tf.transform to read TF Examples."""
  raw_feature_spec = _get_raw_feature_spec(schema)
  raw_schema = dataset_schema.from_feature_spec(raw_feature_spec)
  return tft_coders.ExampleProtoCoder(raw_schema)


def _make_csv_coder(schema, column_names):
  """Return a coder for tf.transform to read csv files."""
  raw_feature_spec = _get_raw_feature_spec(schema)
  parsing_schema = dataset_schema.from_feature_spec(raw_feature_spec)
  return tft_coders.CsvCoder(column_names, parsing_schema)

Implement routine to read examples from a CSV file and for each example, send an inference request containing a base-64 encoding of the serialized TF Example.

In [0]:
def do_inference(server_addr, model_name, examples_file, num_examples, schema):
  """Sends requests to the model and prints the results.
  Args:
    server_addr: network address of model server in "host:port" format
    model_name: name of the model as understood by the model server
    examples_file: path to csv file containing examples, with the first line
      assumed to have the column headers
    num_examples: number of requests to send to the server
    schema: a Schema describing the input data
  Returns:
    Response from model server
  """
  filtered_features = [
      feature for feature in schema.feature if feature.name != _LABEL_KEY
  ]
  del schema.feature[:]
  schema.feature.extend(filtered_features)

  column_names = io_utils.load_csv_column_names(examples_file)
  csv_coder = _make_csv_coder(schema, column_names)
  proto_coder = _make_proto_coder(schema)

  input_file = open(examples_file, 'r')
  input_file.readline()  # skip header line

  serialized_examples = []
  for _ in range(num_examples):
    one_line = input_file.readline()
    if not one_line:
      print('End of example file reached')
      break
    one_example = csv_coder.decode(one_line)

    serialized_example = proto_coder.encode(one_example)
    serialized_examples.append(serialized_example)

  parsed_server_addr = server_addr.split(':')

  host=parsed_server_addr[0]
  port=parsed_server_addr[1]
  json_examples = []

  for serialized_example in serialized_examples:
    # The encoding follows the guidelines in:
    # https://www.tensorflow.org/tfx/serving/api_rest
    example_bytes = base64.b64encode(serialized_example).decode('utf-8')
    predict_request = '{ "b64": "%s" }' % example_bytes
    json_examples.append(predict_request)

  json_request = '{ "instances": [' + ','.join(map(str, json_examples)) + ']}'

  server_url = 'http://' + host + ':' + port + '/v1/models/' + model_name + ':predict'
  response = requests.post(
      server_url, data=json_request, timeout=_INFERENCE_TIMEOUT_SECONDS)
  response.raise_for_status()
  prediction = response.json()
  print(json.dumps(prediction, indent=4))

Open the metadata store, obtain the URI for the schema of your model, as inferred by TF DV, fetch the schema file and parse it into a `Schema` object.

In [0]:
def _make_schema(pipeline_name):
  """Reads and constructs schema object for provided pipeline.

  Args:
    pipeline_name: The name of the pipeline for which TFX Metadata Store has Schema.

  Returns:
    An instance of Schema or raises Exception if more or fewer than one schema
    was found for the given pipeline.
  """
  db_path = os.path.join(os.environ['HOME'], 'airflow/tfx/metadata/', pipeline_name, 'metadata.db')
  store = tfx_utils.TFXReadonlyMetadataStore.from_sqlite_db(db_path)
  schemas = store.get_artifacts_of_type_df(tfx_utils.TFXArtifactTypes.SCHEMA)
  assert len(schemas.URI) == 1
  schema_uri = schemas.URI.iloc[0] + 'schema.pbtxt'
  schema = schema_pb2.Schema()
  contents = file_io.read_file_to_string(schema_uri)
  text_format.Parse(contents, schema)
  return schema

Use the utilities that we have defined to send a series of inference requests to the model being served by Tensorflow Serving listening on the host's network interface.

In [0]:
do_inference(server_addr='127.0.0.1:8501',
     model_name=_PIPELINE_NAME,
     examples_file='/root/airflow/data/taxi_data/data.csv',
     num_examples=3,
     schema=_make_schema(_PIPELINE_NAME))