# Consume native Keras model served by TF-Serving

This notebook shows client code needed to consume a native Keras model served by Tensorflow serving. The Tensorflow serving model needs to be started using the following command:

    bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server \
        --port=9000 --model_name=keras-mnist-fcn \
        --model_base_path=/home/sujit/Projects/polydlot/data/tf-export/keras-mnist-fcn

In [21]:
from __future__ import division, print_function
from google.protobuf import json_format
from grpc.beta import implementations
from sklearn.preprocessing import OneHotEncoder
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
import json
import os
import sys
import threading
import time
import numpy as np
import tensorflow as tf

In [2]:
CONCURRENCY = 1
# NUM_TESTS = 10
NUM_TESTS = 1
SERVER_HOST = "localhost"
SERVER_PORT = 9000
WORK_DIR = "/tmp"

DATA_DIR = "../../data"
TEST_FILE = os.path.join(DATA_DIR, "mnist_test.csv")

IMG_SIZE = 28
NUM_CLASSES = 10
BATCH_SIZE = 1

In [3]:
def parse_file(filename):
    xdata, ydata = [], []
    fin = open(filename, "rb")
    i = 0
    for line in fin:
        if i % 10000 == 0:
            print("{:s}: {:d} lines read".format(os.path.basename(filename), i))
        cols = line.strip().split(",")
        ydata.append(int(cols[0]))
        xdata.append(np.reshape(np.array([float(x) / 255. for x in cols[1:]]), 
                     (IMG_SIZE*IMG_SIZE,)))
        i += 1
    fin.close()
    print("{:s}: {:d} lines read".format(os.path.basename(filename), i))
    y = np.array(ydata, dtype="int32")
    X = np.array(xdata, dtype="float32")
    return X, y

Xtest, ytest = parse_file(TEST_FILE)
print(Xtest.shape, ytest.shape)

mnist_test.csv: 0 lines read
mnist_test.csv: 10000 lines read
(10000, 784) (10000,)


In [4]:
class _ResultCounter(object):
    """ Counter for prediction results """
    def __init__(self, num_tests, concurrency):
        self._num_tests = num_tests
        self._concurrency = concurrency
        self._error = 0
        self._done = 0
        self._active = 0
        self._results = []
        self._condition = threading.Condition()

    def inc_error(self):
        with self._condition:
            self._error += 1
            
    def inc_done(self):
        with self._condition:
            self._done += 1
            self._condition.notify()

    def dec_active(self):
        with self._condition:
            self._active -= 1
            self._condition.notify()
    
    def add_result(self, result):
        with self._condition:
            self._results.append(result)
            self._condition.notify()
            
    def get_error_rate(self):
        with self._condition:
            while self._done != self._num_tests:
                self._condition.wait()
        return self._error / float(self._num_tests)

    def throttle(self):
        with self._condition:
            while self._active == self._concurrency:
                self._condition.wait()
            self._active += 1


def _create_rpc_callback(image, label, result_counter):
    def _callback(result_future):
        print("image", image.shape, "label", label.shape)
        exception = result_future.exception()
        if exception:
            result_counter.inc_error()
            print(exception)
        else:
            sys.stdout.write('.')
            sys.stdout.flush()
        response = numpy.array(result_future.result().outputs['scores'].float_val)
        prediction = numpy.argmax(response)
#         label_ = numpy.argmax(label)
        if label != prediction:
            result_counter.inc_error()
        result_counter.add_result((image, label, prediction))
        result_counter.inc_done()
        result_counter.dec_active()
    return _callback

In [5]:
channel = implementations.insecure_channel(SERVER_HOST, SERVER_PORT)
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
result_counter = _ResultCounter(NUM_TESTS, CONCURRENCY)
tf.contrib.keras.backend.set_learning_phase(False)
for i in range(NUM_TESTS):
    request = predict_pb2.PredictRequest()
    request.model_spec.name = "keras-mnist-fcn"
    request.model_spec.signature_name = "predict"
#     Xbatch, Ybatch = test_gen.next()
#     Xbatch, Ybatch = Xtest[0:10], ytest[0:10]
    request.inputs["images"].CopyFrom(
#         tf.contrib.util.make_tensor_proto(Xbatch[0], shape=Xbatch.shape))
        tf.contrib.util.make_tensor_proto(Xtest[0], shape=[1, Xtest[0].size]))

    result_counter.throttle()
#     result_future = stub.Predict.future(request, 5.0)
#     result_future.add_done_callback(_create_rpc_callback(Xtest[0], ytest[0], result_counter))
    result = stub.Predict(request, 10.0)
    print(result)


outputs {
  key: "scores"
  value {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 1
      }
      dim {
        size: 10
      }
    }
    float_val: 6.40141717412e-10
    float_val: 3.63637688849e-08
    float_val: 3.10336105258e-07
    float_val: 6.09695644016e-06
    float_val: 1.643126607e-10
    float_val: 1.38603717392e-09
    float_val: 2.09329970892e-12
    float_val: 0.999953389168
    float_val: 5.23174072953e-08
    float_val: 4.01516335842e-05
  }
}



In [28]:
message = json.loads(json_format.MessageToJson(result))
# print(message)
y_ = np.array(message["outputs"]["scores"]["floatVal"], dtype="float32")
print(y_.shape, y_)
y = np.argmax(y_)
print(y)

(10,) [  6.40141717e-10   3.63637689e-08   3.10336105e-07   6.09695644e-06
   1.64312661e-10   1.38603717e-09   2.09329971e-12   9.99953389e-01
   5.23174073e-08   4.01516336e-05]
7
