In [None]:
import os
import psutil

# Total CPU cores (physical + logical)
max_workers_cpu = os.cpu_count()  # Returns logical cores (including Hyper-Threading)
print(f"Logical CPU cores (incl. Hyper-Threading): {max_workers_cpu}")

# Physical cores only (Linux/macOS)
if hasattr(os, 'sched_getaffinity'):
    physical_cores = len(os.sched_getaffinity(0))
else:
    physical_cores = psutil.cpu_count(logical=False)
print(f"Physical CPU cores: {physical_cores}")

In [None]:
total_ram_gb = psutil.virtual_memory().total / (1024 ** 3)
available_ram_gb = psutil.virtual_memory().available / (1024 ** 3)
print(f"Total RAM: {total_ram_gb:.2f} GB")
print(f"Available RAM: {available_ram_gb:.2f} GB")

In [2]:
from ultralytics import YOLO
import cv2

In [3]:
img = cv2.imread('frame.jpg')

In [None]:
model = YOLO("tmp/triton_repo/yolo/1/model.onnx")
results = model.predict([img for i in range(20)], imgsz=640, conf=0.5)
results[0].plot()

In [None]:
def postprocess(input_image_size, model_input_size, output, confidence_thres=0.4, iou_thres=0.4):
    outputs = np.transpose(np.squeeze(output[0]))
    rows = outputs.shape[0]
    boxes = []
    scores = []
    class_ids = []
    x_factor = input_image_size[0] / model_input_size[0]
    y_factor = input_image_size[1] / model_input_size[1]

    for i in range(rows):
        classes_scores = outputs[i][4:]
        max_score = np.amax(classes_scores)
        if max_score >= confidence_thres:
            class_id = np.argmax(classes_scores)
            x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
            left = int((x - w / 2) * x_factor)
            top = int((y - h / 2) * y_factor)
            width = int(w * x_factor)
            height = int(h * y_factor)
            class_ids.append(class_id)
            scores.append(max_score)
            boxes.append([left, top, width, height])

    indices = cv2.dnn.NMSBoxes(boxes, scores, confidence_thres, iou_thres)

    detections = []
    if len(indices) > 0:
        for i in indices.flatten():
            box = boxes[i]
            score = scores[i]
            class_id = class_ids[i]
            x1, y1, w, h = box
            x2 = x1 + w
            y2 = y1 + h
            detections.append([x1, y1, x2, y2, score, class_id])
    
    return detections

In [None]:
preprocessed_imgs.shape

In [None]:
import cv2
import numpy as np
import onnxruntime as ort

img = cv2.imread("frame.jpg") 

def preprocess(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    img = np.transpose(img, (2, 0, 1)) 
    img = np.expand_dims(img, 0)    
    img = img.astype(np.float32)  
    
    img /= 255.0
    
    return img
preprocessed_imgs = preprocess(img) 
print(preprocessed_imgs.shape)
# Запуск инференса
sess = ort.InferenceSession("tmp/triton_repo/yolo/1/model.onnx")
onnx_outputs = sess.run(["output0"], {"images": preprocessed_imgs})

In [None]:
img.shape[:2]

In [None]:
det = postprocess((3840, 2160), (640, 640), onnx_outputs[0])

In [None]:
len(det)

In [None]:
det

In [None]:
def draw_boxes(image, boxes, scale=(1.0, 1.0), class_names=None, color=(0, 255, 0)):
    image = image.copy()
    for box in boxes:
        x1, y1, x2, y2 = [int(coord) for coord in box[:4]]
        x1 = int(x1)
        x2 = int(x2)
        y1 = int(y1)
        y2 = int(y2)
        score = box[4] if len(box) > 4 else None
        class_id = int(box[5]) if len(box) > 5 else None
        
        label = ''
        if class_names and class_id is not None:
            label = f"{class_names[class_id]} {score:.2f}" if score is not None else class_names[class_id]
        elif score is not None:
            label = f"{score:.2f}"

        cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
        cv2.putText(image, str(score), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    return image

In [None]:
cv2.imwrite('output.jpg', draw_boxes(img, det))

In [None]:
len(det)

In [None]:
import tensorrt as trt
from polygraphy.logger import G_LOGGER
from polygraphy.backend.common import BytesFromPath
from polygraphy.backend.trt import EngineFromBytes, TrtRunner

# This is a logger for all TensorRT events. Restrict it to only show
# messages of WARNING severity and above.
cuda_logger = trt.Logger(trt.Logger.WARNING)

# This is our CUDA runtime.
cuda_runtime = trt.Runtime(cuda_logger)

# This sets Polygraphy's logger to show only warnings and errors.
G_LOGGER.module_severity = 30

In [None]:
model = TrtRunner(  
  EngineFromBytes(
    serialized_engine=BytesFromPath("model_trt/model_det/1/model.plan"),
    runtime=cuda_runtime,
  )
)

In [None]:
import numpy as np

input_dict = {"images": preprocessed_imgs}

# Run inference!
model.activate()
output = model.infer(feed_dict=input_dict)

# Get output tensor.
tensor_output = output["output0"]

In [None]:
tensor_output.shape

In [None]:
det = postprocess((3840, 2160), (640, 640), tensor_output)

In [None]:
det

In [None]:
cv2.imwrite('output_trt.jpg', draw_boxes(img, det))

In [None]:
import numpy as np
from tritonclient.utils import triton_to_np_dtype
from tritonclient.http import InferenceServerClient, InferInput

# Initialize the Triton client
client = InferenceServerClient(url='localhost:8000')

In [None]:
def preprocess(image):
    input_name = 'images'
    input_shape = (1, 3, 640, 640)
    input_dtype = 'FP32'

    original_h, original_w = image.shape[:2]

    input_data = (cv2.resize(image, (640, 640)) / 255.0).astype(triton_to_np_dtype(input_dtype))
    input_data = input_data.transpose(2, 0, 1)  # (C, H, W)
    input_data = np.expand_dims(input_data, axis=0)  # (1, C, H, W)
    print(input_data.shape)
    infer_input = InferInput(input_name, input_shape, input_dtype)
    print(infer_input)
    infer_input.set_data_from_numpy(input_data)

    return infer_input

In [None]:
import contextlib

In [None]:
import ffmpegcv
import torch.nn.functional as F

In [None]:
cap = ffmpegcv.toCUDA(ffmpegcv.VideoCaptureNV('video.mp4', pix_fmt='nv12'), tensor_format='chw')

In [None]:
ret, frame_CHW_CUDA = cap.read_torch()
frame_CHW_CUDA = frame_CHW_CUDA.unsqueeze(0)
frame_resized = F.interpolate(frame_CHW_CUDA, size=(640, 640), mode="bilinear", align_corners=False)/255

In [None]:
frame_CHW_CUDA.shape

In [None]:
model.predict(frame_resized)

In [4]:
model = YOLO("http://localhost:8000/yolo", task="detect")

In [None]:
model = YOLO("http://localhost:8000/model_seg", task="detect")

In [5]:
model.predict([img for i in range(20)])


0: 384x640 17 potatos, 9.3ms
1: 384x640 17 potatos, 9.3ms
2: 384x640 17 potatos, 9.3ms
3: 384x640 17 potatos, 9.3ms
4: 384x640 17 potatos, 9.3ms
5: 384x640 17 potatos, 9.3ms
6: 384x640 17 potatos, 9.3ms
7: 384x640 17 potatos, 9.3ms
8: 384x640 17 potatos, 9.3ms
9: 384x640 17 potatos, 9.3ms
10: 384x640 17 potatos, 9.3ms
11: 384x640 17 potatos, 9.3ms
12: 384x640 17 potatos, 9.3ms
13: 384x640 17 potatos, 9.3ms
14: 384x640 17 potatos, 9.3ms
15: 384x640 17 potatos, 9.3ms
16: 384x640 17 potatos, 9.3ms
17: 384x640 17 potatos, 9.3ms
18: 384x640 17 potatos, 9.3ms
19: 384x640 17 potatos, 9.3ms
Speed: 3.2ms preprocess, 9.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'potato'}
 obb: None
 orig_img: array([[[ 95, 133,  57],
         [ 95, 133,  57],
         [ 95, 133,  57],
         ...,
         [121, 150,  95],
         [121, 150,  95],
         [121, 150,  95]],
 
        [[ 95, 133,  57],
         [ 95, 133,  57],
         [ 95, 133,  57],
         ...,
         [121, 150,  95],
         [121, 150,  95],
         [121, 150,  95]],
 
        [[ 95, 133,  57],
         [ 95, 133,  57],
         [ 95, 133,  57],
         ...,
         [121, 150,  95],
         [121, 150,  95],
         [121, 150,  95]],
 
        ...,
 
        [[117, 146,  73],
         [117, 146,  73],
         [117, 146,  73],
         ...,
         [133, 170, 100],
         [133, 170, 102],
         [133, 170, 100]],
 
        [[117, 145,  75],
         [117, 146,  73],
         [117, 146,  73],
         ...,
         [133, 170

In [None]:
model.predict([img for i in range(20)])

In [None]:
import time

In [None]:
# Wait for the Triton server to start
triton_client = InferenceServerClient(url="localhost:8000", verbose=False, ssl=False)

# Wait until model is ready
for _ in range(10):
    with contextlib.suppress(Exception):
        assert triton_client.is_model_ready('yolo')
        break
    time.sleep(1)

In [None]:
# Wait until model is ready
for _ in range(10):
    with contextlib.suppress(Exception):
        assert model.predict(img)
        break
    time.sleep(1)

In [None]:
infer_input = preprocess(img)  # preprocess returns the input for inference
import torch

response = triton_client.infer(model_name='yolo', inputs=[infer_input])
output0 = response.as_numpy('output0')  # output0[0] assumed to be detections

In [None]:
output0

In [None]:
det = postprocess((3840, 2160), (640, 640), output0)

In [None]:
det