In [1]:
import os
import numpy
import subprocess
import sys
import time

import tritonclient.http as triton_http
import tritonclient.grpc as triton_grpc

In [20]:
http_port_cmd = "kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name==\"http2\")].port}'"
grpc_port_cmd = "kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name==\"tcp\")].port}'"
host_cmd = "kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}'"

http_port = subprocess.check_output(http_port_cmd.split()).decode('utf-8').replace("'", "")
grpc_port = subprocess.check_output(grpc_port_cmd.split()).decode('utf-8').replace("'", "")
host = subprocess.check_output(host_cmd.split()).decode('utf-8').replace("'", "")

print(host, http_port)
print(host, grpc_port)

35.232.0.166 80
35.232.0.166 31400


In [3]:
# Set up both HTTP and GRPC clients. Note that the GRPC client is generally
# somewhat faster.

# Generate dummy data to classify
features = 500
samples = 10_000
data = numpy.random.rand(samples, features).astype('float32')

### HTTP Client Example

In [13]:
http_client = triton_http.InferenceServerClient(
    url=f'{host}:{http_port}',
    verbose=False,
    concurrency=12
)

while (not (http_client.is_server_ready() or http_client.is_model_ready('fil'))):
    print("Waiting on server ready")
    time.sleep(5)
print(f"Is Server Ready: {http_client.is_server_ready()}")
print(f"Is FIL model ready: {http_client.is_model_ready('fil')}")

Is Server Ready: True
Is FIL model ready: True


In [14]:
# Set up Triton input and output objects for both HTTP and GRPC
triton_input_http = triton_http.InferInput(
    'input__0',
    (samples, features),
    'FP32'
)

triton_input_http.set_data_from_numpy(data, binary_data=True)
triton_output_http = triton_http.InferRequestedOutput(
    'output__0',
    binary_data=True
)

# Submit inference requests (both HTTP and GRPC)
request_http = http_client.infer(
    'fil',
    model_version='1',
    inputs=[triton_input_http],
    outputs=[triton_output_http]
)

In [15]:
result_http = request_http.as_numpy('output__0')
result_http

array([[0.67993677, 0.32006323],
       [0.64918005, 0.35081992],
       [0.58112514, 0.4188749 ],
       ...,
       [0.51727444, 0.48272556],
       [0.8467938 , 0.15320626],
       [0.93336314, 0.06663687]], dtype=float32)

### gRPC Client Example.

In [12]:
grpc_client = triton_grpc.InferenceServerClient(
    url=f'{host}:{grpc_port}',
    verbose = False
)

while (not (grpc_client.is_server_ready() or grpc_client.is_model_ready('fil'))):
    print("Waiting on server ready")
    time.sleep(5)
    
print(f"Is Server Ready: {grpc_client.is_server_ready()}")
print(f"Is FIL model ready: {grpc_client.is_model_ready('fil')}")

Is Server Ready: True
Is FIL model ready: True


In [16]:
data = numpy.random.rand(samples, features).astype('float32')
triton_input_grpc = triton_grpc.InferInput(
    'input__0',
    [samples, features],
    'FP32'
)

triton_input_grpc.set_data_from_numpy(data)
triton_output_grpc = triton_grpc.InferRequestedOutput('output__0')

request_grpc = grpc_client.infer(
    'fil',
    model_version='1',
    inputs=[triton_input_grpc],
    outputs=[triton_output_grpc]
)

In [17]:
result_grpc = request_grpc.as_numpy('output__0')
result_grpc

array([[0.360883  , 0.639117  ],
       [0.6957687 , 0.30423132],
       [0.70181334, 0.2981867 ],
       ...,
       [0.82927394, 0.17072603],
       [0.43887025, 0.56112975],
       [0.28556466, 0.71443534]], dtype=float32)

### Compare Results

In [18]:
# Check that we got the same result with both GRPC and HTTP
numpy.testing.assert_almost_equal(result_http, result_grpc)

AssertionError: 
Arrays are not almost equal to 7 decimals

Mismatched elements: 20000 / 20000 (100%)
Max absolute difference: 0.82936734
Max relative difference: 46.14017
 x: array([[0.6799368, 0.3200632],
       [0.6491801, 0.3508199],
       [0.5811251, 0.4188749],...
 y: array([[0.360883 , 0.639117 ],
       [0.6957687, 0.3042313],
       [0.7018133, 0.2981867],...