share the code for deploying TensorRT with the NVIDIA Triton Server. #3

imenselmi · 2023-08-10T11:11:31Z

Please share the code for deploying TensorRT with the NVIDIA Triton Server.
I wrote this code, but I encountered some issues with it. :

import argparse
from concurrent.futures import ThreadPoolExecutor, wait
import time
import cv2
import tritonclient.http as httpclient
from tqdm import tqdm
from PIL import Image
import numpy as np


def test_infer(req_id, image_file, model_name, print_output=False):
    """img = np.array(Image.open(image_file))
    img = np.ascontiguousarray(img.transpose(2, 0, 1))"""
    img=cv2.imread(str(image_file))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.array(img, dtype=np.float32)
    img /= 255
    img = np.transpose(img, (2, 0, 1))
    # Define model's inputs
    inputs = []
    inputs.append(httpclient.InferInput('input_tensor', img.shape, "FP32"))
    print(inputs)
    inputs[0].set_data_from_numpy(img)
    # Define model's outputs
    outputs = []
    outputs.append(httpclient.InferRequestedOutput('detection_boxes_box_outputs'))
    outputs.append(httpclient.InferRequestedOutput('detection_classes_box_outputs'))
    outputs.append(httpclient.InferRequestedOutput('detection_masks'))
    outputs.append(httpclient.InferRequestedOutput('detection_scores_box_outputs'))
    outputs.append(httpclient.InferRequestedOutput('num_detections_box_outputs'))
    # Send request to Triton server
    triton_client = httpclient.InferenceServerClient(
        url="localhost:8000", verbose=False)
    results = triton_client.infer(model_name, inputs=inputs, outputs=outputs)
    response_info = results.get_response()
    outputs = {}
    for output_info in response_info['outputs']:
        output_name = output_info['name']
        outputs[output_name] = results.as_numpy(output_name)

    if print_output:
        print(req_id, outputs)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image', required=True)
    parser.add_argument('--model', required=True)
    parser.add_argument('--mode', default='sequential', choices=['sequential', 'concurrent'])
    parser.add_argument('--num-reqs', default='1')
    parser.add_argument('--print-output', action='store_true')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    image_file = args.image
    model_name = args.model
    mode = args.mode
    n_reqs = int(args.num_reqs)

    if mode == 'sequential':
        for i in tqdm(range(n_reqs)):
            test_infer(i, image_file, model_name, args.print_output)
    elif mode == 'concurrent':
        s = time.time()
        with ThreadPoolExecutor(max_workers=10) as executor:
            futures = [
                executor.submit(test_infer,
                                i,
                                image_file,
                                model_name,
                                args.print_output)
                for i in range(n_reqs)
            ]
            wait(futures)
            for f in futures:
                f.results()
        e = time.time()
        print('FPS:', n_reqs/(e - s))

error :

raise error
tritonclient.utils.InferenceServerException: [400] [request id: <id_unknown>] inference request batch-size must be <= 1 for 'd2_1_b_trt'

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

share the code for deploying TensorRT with the NVIDIA Triton Server. #3

share the code for deploying TensorRT with the NVIDIA Triton Server. #3

imenselmi commented Aug 10, 2023

share the code for deploying TensorRT with the NVIDIA Triton Server. #3

share the code for deploying TensorRT with the NVIDIA Triton Server. #3

Comments

imenselmi commented Aug 10, 2023