In [14]:
import os
import numpy
import subprocess
import sys

import tritonclient.http as triton_http
import tritonclient.grpc as triton_grpc

In [15]:
port_cmd = "kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name==\"http2\")].port}'"
host_cmd = "kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}'"

port = subprocess.check_output(port_cmd.split()).decode('utf-8').replace("'", "")
host = subprocess.check_output(host_cmd.split()).decode('utf-8').replace("'", "")

In [16]:
# Set up both HTTP and GRPC clients. Note that the GRPC client is generally
# somewhat faster.

# Generate dummy data to classify
features = 500
samples = 10_000
data = numpy.random.rand(samples, features).astype('float32')

In [None]:
http_client = triton_http.InferenceServerClient(
    url=f'{host}:{port}',
    verbose=False,
    concurrency=12
)

print(f"Is Server Ready: {http_client.is_server_ready()}")
print(f"Is FIL model ready: {http_client.is_model_ready('fil')}")

In [None]:
# Set up Triton input and output objects for both HTTP and GRPC
triton_input_http = triton_http.InferInput(
    'input__0',
    (samples, features),
    'FP32'
)

triton_input_http.set_data_from_numpy(data, binary_data=True)
triton_output_http = triton_http.InferRequestedOutput(
    'output__0',
    binary_data=True
)

# Submit inference requests (both HTTP and GRPC)
request_http = http_client.infer(
    'fil',
    model_version='1',
    inputs=[triton_input_http],
    outputs=[triton_output_http]
)

In [5]:
result_http = request_http.as_numpy('output__0')
result_http

array([[0.27678204, 0.72321796],
       [0.20834464, 0.79165536],
       [0.5546981 , 0.44530186],
       ...,
       [0.53438294, 0.4656171 ],
       [0.3188979 , 0.6811021 ],
       [0.74689895, 0.25310105]], dtype=float32)

In [18]:
grpc_client = triton_grpc.InferenceServerClient(
    url=f'{host}:{port}',
    verbose = False
)

In [35]:
print(f"Is Server Ready: {grpc_client.is_server_ready()}")
print(f"Is FIL model ready: {grpc_client.is_model_ready('fil')}")
metadata = grpc_client.get_model_metadata('fil')

print(metadata)

Is Server Ready: True
Is FIL model ready: True
name: "fil"
versions: "1"
platform: "fil"
inputs {
  name: "input__0"
  datatype: "FP32"
  shape: -1
  shape: 500
}
outputs {
  name: "output__0"
  datatype: "FP32"
  shape: -1
  shape: 2
}



In [39]:
data = numpy.random.rand(samples, features).astype('float32')
triton_input_grpc = triton_grpc.InferInput(
    'input__0',
    [samples, features],
    'FP32'
)

triton_input_grpc.set_data_from_numpy(data)
triton_output_grpc = triton_grpc.InferRequestedOutput('output__0')

request_grpc = grpc_client.infer(
    'fil',
    model_version='1',
    inputs=[triton_input_grpc],
    outputs=[triton_output_grpc]
)

In [41]:
result_grpc = request_grpc.as_numpy('output__0')
result_grpc

array([[0.26589543, 0.7341046 ],
       [0.6134304 , 0.38656965],
       [0.706275  , 0.293725  ],
       ...,
       [0.6513818 , 0.3486182 ],
       [0.7315393 , 0.2684607 ],
       [0.5951021 , 0.4048979 ]], dtype=float32)

In [None]:
# Check that we got the same result with both GRPC and HTTP
numpy.testing.assert_almost_equal(result_http, result_grpc)