In [52]:
# 모듈 임포트
import cv2
import numpy as np
import time

In [53]:
# 모델 로딩(파일 설정, min_confidence 설정)
file_name = "video/yolo_01.mp4"
min_confidence = 0.5
net = cv2.dnn.readNet("yolo/yolov3.weights", "yolo/yolov3.cfg")

In [54]:
# 클래스 이름 및 출력레이어 설정
classes = []
with open("yolo/coco.names", "r")as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))

In [59]:
# detectAndDisplay 제작
def detectAndDisplay(frame):
    # 프레임 작업 시작 시간 저장
    start_time = time.time()
    
    img = cv2.resize(frame, None, fx=0.4, fy=0.4)
    height, width, channels = img.shape
    
    # 이미지 형식 변형
    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0,0,0), True, crop=False) 
    # 모델에 이미지 세트
    net.setInput(blob)
    # 인식된 사물의 정보 추출
    outs = net.forward(output_layers)
    # 비어있는 리스트 준비
    class_ids = []
    confidences = []
    boxes = []
    # 이미지 작업
    
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > min_confidence:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = "{}: {:.2f}".format(classes[class_ids[i]], confidences[i]*100)
            # print(i, label)
            color = colors[i]
            cv2.rectangle(img, (x, y), (x+w, y+h), color, 2)
            cv2.putText(img, label, (x, y+30), font, 2, (0, 255, 0), 1)
    cv2.imshow("YOLO Video", img)
    
    # 프레임 작업 종료 시간 저장
    end_time = time.time()
    # 종료시간 - 시작시간
    process_time = end_time - start_time
    print("=== A frame took {:.3f} seconds".format(process_time))

In [61]:
# 동영상에서 추출한 프레임들을 detectAndDisplay 호출 시 전달
cap = cv2.VideoCapture(file_name)
if not cap.isOpened:
    print('--(!)Error opening video cascade')
    exit(0)

while True:
    ret, file_name = cap.read() # 캡쳐해서 하나씩 읽어옴
    if file_name is None:
        print('--(!) No captured frame -- Break!')
        break
    
    detectAndDisplay(file_name)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()

=== A frame took 0.575 seconds
=== A frame took 0.420 seconds
=== A frame took 0.411 seconds
=== A frame took 0.393 seconds
=== A frame took 0.378 seconds
=== A frame took 0.395 seconds
=== A frame took 0.387 seconds
=== A frame took 0.433 seconds
=== A frame took 0.382 seconds
=== A frame took 0.402 seconds
=== A frame took 0.426 seconds
=== A frame took 0.390 seconds
=== A frame took 0.384 seconds
=== A frame took 0.380 seconds
=== A frame took 0.379 seconds
=== A frame took 0.382 seconds
