We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Please share the code for deploying TensorRT with the NVIDIA Triton Server. I wrote this code, but I encountered some issues with it. :
import argparse from concurrent.futures import ThreadPoolExecutor, wait import time import cv2 import tritonclient.http as httpclient from tqdm import tqdm from PIL import Image import numpy as np def test_infer(req_id, image_file, model_name, print_output=False): """img = np.array(Image.open(image_file)) img = np.ascontiguousarray(img.transpose(2, 0, 1))""" img=cv2.imread(str(image_file)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.array(img, dtype=np.float32) img /= 255 img = np.transpose(img, (2, 0, 1)) # Define model's inputs inputs = [] inputs.append(httpclient.InferInput('input_tensor', img.shape, "FP32")) print(inputs) inputs[0].set_data_from_numpy(img) # Define model's outputs outputs = [] outputs.append(httpclient.InferRequestedOutput('detection_boxes_box_outputs')) outputs.append(httpclient.InferRequestedOutput('detection_classes_box_outputs')) outputs.append(httpclient.InferRequestedOutput('detection_masks')) outputs.append(httpclient.InferRequestedOutput('detection_scores_box_outputs')) outputs.append(httpclient.InferRequestedOutput('num_detections_box_outputs')) # Send request to Triton server triton_client = httpclient.InferenceServerClient( url="localhost:8000", verbose=False) results = triton_client.infer(model_name, inputs=inputs, outputs=outputs) response_info = results.get_response() outputs = {} for output_info in response_info['outputs']: output_name = output_info['name'] outputs[output_name] = results.as_numpy(output_name) if print_output: print(req_id, outputs) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--image', required=True) parser.add_argument('--model', required=True) parser.add_argument('--mode', default='sequential', choices=['sequential', 'concurrent']) parser.add_argument('--num-reqs', default='1') parser.add_argument('--print-output', action='store_true') return parser.parse_args() if __name__ == '__main__': args = parse_args() image_file = args.image model_name = args.model mode = args.mode n_reqs = int(args.num_reqs) if mode == 'sequential': for i in tqdm(range(n_reqs)): test_infer(i, image_file, model_name, args.print_output) elif mode == 'concurrent': s = time.time() with ThreadPoolExecutor(max_workers=10) as executor: futures = [ executor.submit(test_infer, i, image_file, model_name, args.print_output) for i in range(n_reqs) ] wait(futures) for f in futures: f.results() e = time.time() print('FPS:', n_reqs/(e - s))
error :
raise error tritonclient.utils.InferenceServerException: [400] [request id: <id_unknown>] inference request batch-size must be <= 1 for 'd2_1_b_trt'
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Please share the code for deploying TensorRT with the NVIDIA Triton Server.
I wrote this code, but I encountered some issues with it. :
error :
The text was updated successfully, but these errors were encountered: