# Triton Client

Note: just exploratory code

## Http Client

In [101]:
import tritonclient.http as httpclient

# # Note: not all methods available via Executor
# url = "localhost:8003/seldon/seldon/v2-triton-mnist"
url = "localhost:8000"

http_triton_client = httpclient.InferenceServerClient(
    url=url,
    verbose=False,
    concurrency=1,
)

In [102]:
print("Is Server Ready:", http_triton_client.is_server_ready())
print("Is Server Live:", http_triton_client.is_server_live())
print("Server Metadata:", http_triton_client.get_server_metadata())
print("MNIST model ready:", http_triton_client.is_model_ready("mnist"))

Is Server Ready: True
Is Server Live: True
Server Metadata: {'name': 'triton', 'version': '2.13.0', 'extensions': ['classification', 'sequence', 'model_repository', 'model_repository(unload_dependents)', 'schedule_policy', 'model_configuration', 'system_shared_memory', 'cuda_shared_memory', 'binary_tensor_data', 'statistics']}
MNIST model ready: True


In [103]:
print(http_triton_client.get_model_metadata("mnist"))

{'name': 'mnist', 'versions': ['1'], 'platform': 'tensorflow_savedmodel', 'inputs': [{'name': 'conv2d_input', 'datatype': 'FP32', 'shape': [-1, 28, 28, 1]}], 'outputs': [{'name': 'dense_1', 'datatype': 'FP32', 'shape': [-1, 10]}]}


In [104]:
# print(http_triton_client.get_model_config("mnist"))

In [105]:
import numpy as np

binary_data = False

inputs = [httpclient.InferInput("conv2d_input", (5, 28, 28, 1), "FP32")]
inputs[0].set_data_from_numpy(np.random.rand(5, 28, 28, 1).astype("float32"), binary_data=binary_data)
outputs = [httpclient.InferRequestedOutput("dense_1", binary_data=binary_data)]


result = http_triton_client.infer("mnist", inputs, outputs=outputs)
result.as_numpy("dense_1")

array([[7.8109936e-03, 1.6749991e-04, 4.3998000e-01, 2.5843871e-03,
        2.8441547e-04, 3.8541239e-04, 1.9684564e-03, 7.0706295e-04,
        5.4516971e-01, 9.4206532e-04],
       [4.1001481e-03, 2.8203041e-04, 3.2225168e-01, 7.0531778e-03,
        1.4055093e-03, 5.2019940e-03, 2.2412206e-03, 2.0930546e-03,
        6.5015137e-01, 5.2198074e-03],
       [2.3214463e-02, 8.1608095e-04, 2.8792626e-01, 9.2498166e-03,
        1.8826844e-03, 1.3457134e-02, 1.5644446e-02, 2.6651174e-03,
        6.4210683e-01, 3.0372196e-03],
       [1.4550821e-03, 3.5082394e-05, 1.4485511e-02, 2.3372934e-04,
        3.4631207e-04, 4.3974174e-04, 1.8960210e-04, 2.7477610e-04,
        9.8204190e-01, 4.9826084e-04],
       [6.3246260e-03, 3.1383260e-04, 3.7626114e-02, 5.0798138e-03,
        8.4333651e-04, 5.0137374e-03, 3.2386994e-03, 1.8026867e-03,
        9.1177511e-01, 2.7981961e-02]], dtype=float32)

## Grpc Client

In [106]:
import tritonclient.grpc as grpcclient

# # Note: not all methods available via Executor
# headers = {"seldon": "v2-triton-mnist", "namespace": "seldon"}
# url = "localhost:8003"

url = "localhost:8001"
headers = dict()

grpc_triton_client = grpcclient.InferenceServerClient(
    url=url,
    verbose=False,
)

In [107]:
print("Is Server Ready:", grpc_triton_client.is_server_ready(headers=headers))
print("Is Server Live:", grpc_triton_client.is_server_live(headers=headers))
grpc_triton_client.get_server_metadata(headers=headers)

Is Server Ready: True
Is Server Live: True


name: "triton"
version: "2.13.0"
extensions: "classification"
extensions: "sequence"
extensions: "model_repository"
extensions: "model_repository(unload_dependents)"
extensions: "schedule_policy"
extensions: "model_configuration"
extensions: "system_shared_memory"
extensions: "cuda_shared_memory"
extensions: "binary_tensor_data"
extensions: "statistics"

In [108]:

print("MNIST model ready:", grpc_triton_client.is_model_ready("mnist", headers=headers))

MNIST model ready: True


In [109]:
grpc_triton_client.get_model_metadata("mnist", headers=headers)

name: "mnist"
versions: "1"
platform: "tensorflow_savedmodel"
inputs {
  name: "conv2d_input"
  datatype: "FP32"
  shape: -1
  shape: 28
  shape: 28
  shape: 1
}
outputs {
  name: "dense_1"
  datatype: "FP32"
  shape: -1
  shape: 10
}

In [110]:
import numpy as np

inputs = [grpcclient.InferInput("conv2d_input", (5, 28, 28, 1), "FP32")]
inputs[0].set_data_from_numpy(np.random.rand(5, 28, 28, 1).astype("float32"))
outputs = [grpcclient.InferRequestedOutput("dense_1")]


result = grpc_triton_client.infer("mnist", inputs, outputs=outputs, headers=headers)
result.as_numpy("dense_1")

array([[1.5252707e-02, 2.6560088e-03, 6.3741118e-01, 2.2145616e-01,
        3.6019515e-04, 3.7998840e-02, 7.7487421e-03, 6.9566141e-03,
        6.8815462e-02, 1.3439893e-03],
       [1.2870033e-02, 9.8890497e-04, 5.9165972e-01, 7.4792365e-03,
        3.5227824e-03, 1.1102542e-03, 1.7923594e-03, 9.7923726e-03,
        3.6278358e-01, 8.0007548e-03],
       [5.8509228e-03, 1.9079042e-04, 3.9114609e-02, 4.9245800e-04,
        7.5394701e-04, 2.1845701e-03, 7.2474396e-03, 3.6165887e-04,
        9.4347650e-01, 3.2706687e-04],
       [8.9087533e-03, 8.0969045e-04, 8.5668162e-02, 3.7207045e-03,
        6.1010523e-04, 2.6622151e-03, 1.7552607e-03, 3.2632998e-03,
        8.9178741e-01, 8.1442646e-04],
       [4.9480617e-02, 1.2994569e-04, 5.2092540e-01, 1.7507779e-03,
        1.5797348e-04, 6.0777506e-04, 8.1396336e-04, 2.5525091e-03,
        4.2290476e-01, 6.7631720e-04]], dtype=float32)