In [None]:
import torch
import numpy as np
import tritonclient.http as httpclient
from tritonclient.utils import InferenceServerException

from transformers import BertTokenizer

Inference on Resnet

In [None]:
# Assuming Triton is served on Local Machine
URL = 'localhost:8000'
VERBOSE = False

triton_client = httpclient.InferenceServerClient(url=URL, verbose=VERBOSE)
model_name = 'resnet'

In [None]:
# Generate random data
img = torch.rand(1, 3,160,160)

# Convert input to numpy
img = img.numpy()

In [None]:
inputs = []
outputs = []

inputs.append(
    httpclient.InferInput(name="INPUT__0", shape=img.shape, datatype="FP32")
)
inputs[0].set_data_from_numpy(img, binary_data=False)

outputs.append(httpclient.InferRequestedOutput(name="OUTPUT__0"))

result = triton_client.infer(
    model_name = model_name, 
    inputs=inputs, 
    outputs=outputs
)

result = result.as_numpy("OUTPUT__0")

Inference on BERT

In [None]:
enc = BertTokenizer.from_pretrained("bert-base-uncased")

URL = 'localhost:8000'
VERBOSE = False

triton_client = httpclient.InferenceServerClient(url=URL, verbose=VERBOSE)
model_name = 'bert'

In [None]:

text = "[CLS] My profile picture is an orange cat [SEP]"
tokenized_text = enc.tokenize(text)
indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
segments_ids = [0]*len(tokenized_text) # Segment Mask


In [None]:
token_np = np.array([indexed_tokens], dtype='int32')
mask_np = np.array([segments_ids], dtype='int32')

In [None]:
inputs = []
outputs = []

inputs.append(
    httpclient.InferInput(name="INPUT__0", shape=token_np.shape, datatype="INT32")
)
inputs.append(
    httpclient.InferInput(name="INPUT__1", shape=mask_np.shape, datatype="INT32")
)
inputs[0].set_data_from_numpy(token_np, binary_data=False)
inputs[1].set_data_from_numpy(mask_np, binary_data=False)

outputs.append(httpclient.InferRequestedOutput(name="OUTPUT__0"))
outputs.append(httpclient.InferRequestedOutput(name="OUTPUT__1"))

result = triton_client.infer(
    model_name = model_name, 
    inputs=inputs, 
    outputs=outputs
)

text_emb = result.as_numpy("OUTPUT__0")
pool_output = result.as_numpy("OUTPUT__1")