In [1]:
import cv2,imutils
import torch
import torch.backends.cudnn as cudnn
from utils.augmentations import letterbox
import numpy as np
from models.common import DetectMultiBackend
from utils.general import (LOGGER, check_img_size,cv2, non_max_suppression, scale_coords)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync

In [2]:
class Inference(object):
    def __init__(self):
        if(torch.cuda.is_available()):
            self.device = select_device('0')
        else:
            self.device = select_device('cpu')
        self.imgsz=(640, 640)
        self.model = DetectMultiBackend('yolov5s.pt', device=self.device, dnn=False)
        self.stride, self.names, self.pt, self.jit, self.onnx, self.engine = self.model.stride, self.model.names,self.model.pt, self.model.jit, self.model.onnx, self.model.engine
        self.imgsz = check_img_size(self.imgsz, s=self.stride)  # check image size
        self.model.warmup(imgsz=(1, 3, *self.imgsz))  # warmup
        self.dt, self.seen = [0.0, 0.0, 0.0], 0
    
    def infer(self,image):        
        # cv2.imshow('Original VIDEO',image)
        key = cv2.waitKey(1) & 0xFF
        img = letterbox(image, 640, stride=self.stride, auto=True)[0]
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)
        

        # image = cv2.putText(image,'FPS: '+str(fps),(10,40),cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,0,255),2)
        t1 = time_sync()
        im = torch.from_numpy(img).to(self.device)
        im = im.half() if self.model.fp16 else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        if len(im.shape) == 3:
            im = im[None]  # expand for batch dim 
        t2 = time_sync()
        self.dt[0] += t2 - t1
        
        # Inference
        # visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
        pred = self.model(im, augment=False, visualize=False)
        t3 = time_sync()
        self.dt[1] += t3 - t2 
        # s += '%gx%g ' % im.shape[2:]  # print string     
        pred = non_max_suppression(pred, 0.25, 0.45, None, False, max_det=1000)
        self.dt[2] += time_sync() - t3
        for i, det in enumerate(pred): 
            annotator = Annotator(image, line_width=3, example=str("exp"))
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], image.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    # s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):                   
                    c = int(cls)  # integer class
                    label = None if False else (self.names[c] if False else f'{self.names[c]} {conf:.2f}')
                    annotator.box_label(xyxy, label, color=colors(c, True))
            im0 = annotator.result()       
            im0 = cv2.putText(im0,'FrameDelay: '+str(round(((time_sync() - t1)*1000), 1))+" ms",(10,40),cv2.FONT_HERSHEY_SIMPLEX,0.7,(0,0,255),2)
            # ret, jpeg = cv2.imencode('.jpg', im0,[cv2.IMWRITE_JPEG_QUALITY,60])
            
            return im0

In [3]:
filePath='rtmp://103.85.20.95:1935/live/VR'
Infer =  Inference()

YOLOv5  v6.1-291-g2b227d2 Python-3.9.12 torch-1.12.0 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12288MiB)

Fusing layers... 
YOLOv5s_v6 summary: 213 layers, 7225885 parameters, 0 gradients


In [8]:
import subprocess as sp
AIUrl = 'http://192.168.0.115:8081/AI' #这里改成本地ip，端口号不变，文件夹自定义
VRUrl = 'http://192.168.0.115:8081/VR' #这里改成本地ip，端口号不变，文件夹自定义


In [9]:
camera = cv2.VideoCapture(filePath) 
size = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)), int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
sizeStr = str(size[0]) + 'x' + str(size[1])
fps = camera.get(cv2.CAP_PROP_FPS)  # 30p/self
fps = int(fps)

In [10]:
commandAI = [
    'ffmpeg',
    # 're',#
    # '-y', # 无需询问即可覆盖输出文件
    '-f', 'rawvideo', # 强制输入或输出文件格式
    '-vcodec','rawvideo', # 设置视频编解码器。这是-codec:v的别名
    '-pix_fmt', 'bgr24', # 设置像素格式
    '-s', sizeStr, # 设置图像大小
    '-r', str(fps), # 设置帧率
    '-i', '-', # 输入
    '-c:v', 'mpeg1video',
    # '-pix_fmt', 'yuv420p',
    '-r', str(fps), # 设置帧率
    # '-preset', 'ultrafast',
    '-f', 'mpegts',# 强制输入或输出文件格式
    AIUrl]
commandVR = [
    'ffmpeg',
    # 're',#
    # '-y', # 无需询问即可覆盖输出文件
    '-f', 'rawvideo', # 强制输入或输出文件格式
    '-vcodec','rawvideo', # 设置视频编解码器。这是-codec:v的别名
    '-pix_fmt', 'bgr24', # 设置像素格式
    '-s', sizeStr, # 设置图像大小
    '-r', str(fps), # 设置帧率
    '-i', '-', # 输入
    '-c:v', 'mpeg1video',
    # '-pix_fmt', 'yuv420p',
    '-r', str(fps), # 设置帧率
    # '-preset', 'ultrafast',
    '-f', 'mpegts',# 强制输入或输出文件格式
    VRUrl]
pipeAI = sp.Popen(commandAI, stdin=sp.PIPE)
pipeVR = sp.Popen(commandVR, stdin=sp.PIPE)

In [None]:
import copy
while (True):
    ret, frame = camera.read() # 逐帧采集视频流
    if not ret:
        camera = cv2.VideoCapture(filePath) 
        continue
    img = Infer.infer(copy.deepcopy(frame))
    pipeAI.stdin.write(img.tobytes())
    pipeVR.stdin.write(frame.tobytes() )
    ############################图片输出
    # 结果帧处理 存入文件 / 推流 / ffmpeg 再处理
#     pipe.stdin.write(frame.tostring())  # 存入管道用于直播
    # out.write(frame)    #同时 存入视频文件 记录直播帧数据