In [3]:
import torch
import numpy as np
import cv2
import pafy
from time import time

In [2]:
class ObjectDetection:
    # YouTube 동영상에 YOLOv5 구현

    def __init__(self, url, out_file):
        # 객체 생성 시 호출
        # url: 예측 대상 YouTube URL
        # out_file: 유효한 출력 파일 이름 *.avi
        self._URL = url
        self.model = self.load_model()
        self.classes = self.model.names
        self.out_file = out_file
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

    def get_video_from_url(self):
        # url에서 새 비디오 스트리밍 객체 생성
        play = pafy.new(self._URL).streams[-1]
        assert play is not None
        return cv2.VideoCapture(play.url)

    def load_model(self):
        # YOLOv5 모델 로드
        model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
        return model

    def score_frame(self, frame):
        # frame: 단일 프레임; numpy/list/tuple 형식
        # return: 프레임에서 모델이 감지한 객체의 레이블과 좌표
        self.model.to(self.device)
        frame = [frame]
        results = self.model(frame)
        labels, cord = results.xyxyn[0][:, -1].cpu().numpy(), results.xyxyn[0][:, :-1].cpu().numpy()
        return labels, cord

    def class_to_label(self, x):
        # x 숫자 레이블 -> 문자열 레이블로 반환
        return self.classes[int(x)]

    def plot_boxes(self, results, frame):
        # 경계상자와 레이블을 프레임에 플로팅
        # results: 프레임에서 모델이 감지한 객체의 레이블과 좌표
        # frame: 점수화된 프레임
        # return: 경계 상자와 레이블이 플로팅된 프레임
        labels, cord = results
        n = len(labels)
        x_shape, y_shape = frame.shape[1], frame.shape[0]
        for i in range(n):
            row = cord[i]
            if row[4] >= 0.2:
                x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)
                bgr = (0, 255, 0)
                cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 2)
                cv2.putText(frame, self.class_to_label(labels[i])
                            + ': ' + str(x1) + ', ' + str(x2) + ', ' + str(y1) + ', ' + str(y2),
                            (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.9, bgr, 2)
        return frame

    def __call__(self):
        # 인스턴스 생성 시 호출; 프레임 단위로 비디오 로드
        player = self.get_video_from_url()
        assert player.isOpened()
        x_shape = int(player.get(cv2.CAP_PROP_FRAME_WIDTH))
        y_shape = int(player.get(cv2.CAP_PROP_FRAME_HEIGHT))
        four_cc = cv2.VideoWriter_fourcc(*"MJPG")
        out = cv2.VideoWriter(self.out_file, four_cc, 20, (x_shape, y_shape))
        while True:
            try :
              start_time = time()
              ret, frame = player.read()
              results = self.score_frame(frame)
              frame = self.plot_boxes(results, frame)
              end_time = time()
              fps = 1/np.round(end_time - start_time, 3)
              print(f"Frames Per Second : {fps}")
              out.write(frame)

            except:
              print("종료되었습니다.")
              break

In [8]:
Video = ObjectDetection("https://youtu.be/tL3TwpPUips", "video/test2_YOLOv5.avi")
Video()

Using cache found in C:\Users\smhrd/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2022-4-8 torch 1.8.0+cu111 CUDA:0 (NVIDIA GeForce RTX 2070, 8192MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.5 GFLOPs
Adding AutoShape... 


Frames Per Second : 6.410256410256411
Frames Per Second : 30.3030303030303
Frames Per Second : 34.48275862068965
Frames Per Second : 24.390243902439025
Frames Per Second : 22.22222222222222
Frames Per Second : 3.558718861209964
Frames Per Second : 41.666666666666664
Frames Per Second : 7.633587786259541
Frames Per Second : 4.62962962962963
Frames Per Second : 13.88888888888889
Frames Per Second : 2.1231422505307855
Frames Per Second : 52.631578947368425
Frames Per Second : 4.149377593360996
Frames Per Second : 8.0
Frames Per Second : 55.55555555555556
Frames Per Second : 6.369426751592357
Frames Per Second : 30.3030303030303
Frames Per Second : 3.773584905660377
Frames Per Second : 7.8125
Frames Per Second : 18.867924528301888
Frames Per Second : 6.578947368421053
Frames Per Second : 22.72727272727273
Frames Per Second : 3.0864197530864197
Frames Per Second : 50.0
Frames Per Second : 45.45454545454546
Frames Per Second : 3.4482758620689657
Frames Per Second : 47.61904761904761
Frames P