### Query an Ensemle model with Triton on gRPC

In [1]:
!pip -q install tritonclient


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# Imports and parameters (adapt to your endpoint and model)
import numpy as np
import tritonclient.grpc as grpcclient
import subprocess

host = "ensemble-kserve-triton-ensemble.apps.cluster-kgqjf.dynamic.redhatworkshops.io"
port = 443
ssl_connection = True
model_name = 'ensemble_merger_google_xgb'

In [3]:
# In case of self-signed certificate we need to fetch the certificate chain
def save_cert_chain(url, port, filename):
    # Run the OpenSSL command to get the certificate chain
    command = f"echo | openssl s_client -showcerts -connect {url}:{port} 2>/dev/null | openssl x509 -outform PEM > {filename}"
    subprocess.run(command, shell=True, check=True)

In [4]:
# Initialize client
if ssl_connection:
    save_cert_chain(host, 443, 'cert_chain.pem')
    root_certificates = 'cert_chain.pem'
else:
    root_certificates = None

triton_client = grpcclient.InferenceServerClient(
    url=host,
    ssl=ssl_connection,
    root_certificates=root_certificates,
    )

In [5]:
# Prepare input and output objects
inputs = []
outputs = []
inputs.append(grpcclient.InferInput("INPUT", [1, 63], "FP32"))
outputs.append(grpcclient.InferRequestedOutput("OUTPUT"))

# Create an array of 63 random floats between 0 and 1
input_data = np.arange(start=0, stop=63, dtype=np.float32)

# Expand the input into a batch (size=1)
input_data = np.expand_dims(input_data, axis=0)

# Initialize the data
inputs[0].set_data_from_numpy(input_data)

<tritonclient.grpc._infer_input.InferInput at 0x7f0834a9dbb0>

In [6]:
# Make the inference request
results = triton_client.infer(
        model_name=model_name,
        inputs=inputs,
        outputs=outputs,
        headers={"test": "1"},
    )

In [7]:
# Get and display results
output_data = results.as_numpy("OUTPUT")
output_data

array([[-2.8066086e+03,  2.8076086e+03,  1.0000000e+00,  0.0000000e+00,
         1.0000000e+00,  0.0000000e+00,  1.0000000e+00]], dtype=float32)

In [8]:
# Get and display statistics from the server
statistics = triton_client.get_inference_statistics(model_name=model_name)
print(statistics)

model_stats {
  name: "ensemble_merger_google_xgb"
  version: "1"
  last_inference: 1709756530871
  inference_count: 5
  execution_count: 5
  inference_stats {
    success {
      count: 5
      ns: 87987477
    }
    fail {
    }
    queue {
      count: 5
      ns: 19785
    }
    compute_input {
      count: 5
      ns: 1393285
    }
    compute_infer {
      count: 5
      ns: 78572193
    }
    compute_output {
      count: 5
      ns: 3233917
    }
    cache_hit {
    }
    cache_miss {
    }
  }
  batch_stats {
    batch_size: 1
    compute_input {
      count: 5
      ns: 1393285
    }
    compute_infer {
      count: 5
      ns: 78572193
    }
    compute_output {
      count: 5
      ns: 3233917
    }
  }
}

