Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

share the code for deploying TensorRT with the NVIDIA Triton Server. #3

Open
imenselmi opened this issue Aug 10, 2023 · 0 comments
Open

Comments

@imenselmi
Copy link

Please share the code for deploying TensorRT with the NVIDIA Triton Server.
I wrote this code, but I encountered some issues with it. :

import argparse
from concurrent.futures import ThreadPoolExecutor, wait
import time
import cv2
import tritonclient.http as httpclient
from tqdm import tqdm
from PIL import Image
import numpy as np


def test_infer(req_id, image_file, model_name, print_output=False):
    """img = np.array(Image.open(image_file))
    img = np.ascontiguousarray(img.transpose(2, 0, 1))"""
    img=cv2.imread(str(image_file))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.array(img, dtype=np.float32)
    img /= 255
    img = np.transpose(img, (2, 0, 1))
    # Define model's inputs
    inputs = []
    inputs.append(httpclient.InferInput('input_tensor', img.shape, "FP32"))
    print(inputs)
    inputs[0].set_data_from_numpy(img)
    # Define model's outputs
    outputs = []
    outputs.append(httpclient.InferRequestedOutput('detection_boxes_box_outputs'))
    outputs.append(httpclient.InferRequestedOutput('detection_classes_box_outputs'))
    outputs.append(httpclient.InferRequestedOutput('detection_masks'))
    outputs.append(httpclient.InferRequestedOutput('detection_scores_box_outputs'))
    outputs.append(httpclient.InferRequestedOutput('num_detections_box_outputs'))
    # Send request to Triton server
    triton_client = httpclient.InferenceServerClient(
        url="localhost:8000", verbose=False)
    results = triton_client.infer(model_name, inputs=inputs, outputs=outputs)
    response_info = results.get_response()
    outputs = {}
    for output_info in response_info['outputs']:
        output_name = output_info['name']
        outputs[output_name] = results.as_numpy(output_name)

    if print_output:
        print(req_id, outputs)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image', required=True)
    parser.add_argument('--model', required=True)
    parser.add_argument('--mode', default='sequential', choices=['sequential', 'concurrent'])
    parser.add_argument('--num-reqs', default='1')
    parser.add_argument('--print-output', action='store_true')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    image_file = args.image
    model_name = args.model
    mode = args.mode
    n_reqs = int(args.num_reqs)

    if mode == 'sequential':
        for i in tqdm(range(n_reqs)):
            test_infer(i, image_file, model_name, args.print_output)
    elif mode == 'concurrent':
        s = time.time()
        with ThreadPoolExecutor(max_workers=10) as executor:
            futures = [
                executor.submit(test_infer,
                                i,
                                image_file,
                                model_name,
                                args.print_output)
                for i in range(n_reqs)
            ]
            wait(futures)
            for f in futures:
                f.results()
        e = time.time()
        print('FPS:', n_reqs/(e - s))

error :

raise error
tritonclient.utils.InferenceServerException: [400] [request id: <id_unknown>] inference request batch-size must be <= 1 for 'd2_1_b_trt'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant