In [9]:
import cv2
import numpy as np

def yolo(frame, size, score_threshold, nms_threshold, gpu):
    # YOLO 네트워크 불러오기
    net = cv2.dnn.readNet(f"yolo_weights_cfg/yolov4.weights", "yolo_weights_cfg/yolov4.cfg")
    
    if gpu:
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
    else : 
        pass
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    # 클래스의 갯수만큼 랜덤 RGB 배열을 생성
    colors = np.random.uniform(0, 255, size=(len(classes), 3))

    # 이미지의 높이, 너비, 채널 받아오기
    height, width, channels = frame.shape

    # 네트워크에 넣기 위한 전처리
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (size, size), (0, 0, 0), True, crop=False)

    # 전처리된 blob 네트워크에 입력
    net.setInput(blob)

    # 결과 받아오기
    outs = net.forward(output_layers)

    # 각각의 데이터를 저장할 빈 리스트
    class_ids = []
    confidences = []
    boxes = []
    output_bbox = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.1:
                # 탐지된 객체의 너비, 높이 및 중앙 좌표값 찾기
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # 객체의 사각형 테두리 중 좌상단 좌표값 찾기
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # 후보 박스(x, y, width, height)와 confidence(상자가 물체일 확률) 출력
    print(f"boxes: {boxes}")
    print(f"confidences: {confidences}")


    # Non Maximum Suppression (겹쳐있는 박스 중 confidence 가 가장 높은 박스를 선택)
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=score_threshold, nms_threshold=nms_threshold)
    
    # 후보 박스 중 선택된 박스의 인덱스 출력
    print(f"indexes: ", end='')
    for index in indexes:
        print(index, end=' ')
    print("\n\n============================== classes ==============================")
    label_list = []
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            class_name = classes[class_ids[i]]
            label = f"{class_name} {confidences[i]:.2f}"
            color = colors[class_ids[i]]

            # 사각형 테두리 그리기 및 텍스트 쓰기
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.rectangle(frame, (x - 1, y), (x + len(class_name) * 13 + 65, y - 25), color, -1)
            cv2.putText(frame, label, (x, y - 8), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), 2)
            
            # 탐지된 객체의 정보 출력
            print(f"[{class_name}({i})] conf: {confidences[i]} / x: {x} / y: {y} / width: {w} / height: {h}")
            output_bbox.append([x, y, w, h])
            label_list.append(label)

    return frame, output_bbox, label_list

In [10]:
classes = ["person", "bicycle", "car", "motorcycle",
           "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
           "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
           "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
           "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
           "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
           "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife",
           "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
           "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table",
           "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
           "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
           "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

size_list = [320, 416, 608]

In [19]:
import time
cap = cv2.VideoCapture('walking_cut1.mp4')
#ret, initial_frame = cap.read()
YOLO = True
N_num = int(input("숫자를 입력하세요:"))

z = N_num
time_ = time.time()
while cap.isOpened():
    ret, frame = cap.read()
    start = time.time()
    if frame is None:
        break
    
    if z == N_num:
        YOLO = True
        z = 0
        
    
    if YOLO is True:
        yolo_frame, boxes, confidence = yolo(frame=frame, size=size_list[2], score_threshold=0.4, nms_threshold=0.4, gpu=False)
        cv2.imshow("Frame",yolo_frame)
        #cv2.imwrite('yolo.jpg', yolo_frame)
        YOLO = False
        print('time:',time.time()-start) 
        
    else:
        for box, confi in zip(boxes, confidence):
            x, y, w, h = box
            cv2.rectangle(frame,(x,y),(x+w,y+h),(0,255,255))
            cv2.putText(frame, confi, (x, y - 8), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)
            
        cv2.imshow("Frame", frame)
        print('time:',time.time()-start) 
        
        
    
    z = z+1
    k = cv2.waitKey(1)
    if k == 27 : break
    
print('all time:', time.time()-time_)   
cap.release()
cv2.destroyAllWindows()

숫자를 입력하세요:100
boxes: [[61, -1, 204, 36], [228, 0, 179, 45], [226, -1, 184, 46], [231, -1, 209, 46], [229, -1, 216, 46], [397, -2, 200, 60], [395, -3, 203, 61], [813, -1, 180, 52], [814, -1, 178, 52], [826, 0, 170, 51], [825, 0, 172, 51], [393, 0, 207, 61], [394, -1, 208, 62], [405, 0, 204, 61], [405, -1, 204, 62], [808, 0, 189, 56], [809, 0, 185, 57], [725, 29, 42, 164], [722, 30, 47, 161], [771, 32, 44, 157], [-2, 33, 76, 173], [-1, 33, 77, 173], [725, 37, 41, 153], [725, 37, 44, 154], [772, 38, 43, 162], [770, 39, 47, 159], [793, 274, 76, 214], [787, 269, 87, 224], [793, 277, 76, 220], [787, 275, 87, 223], [795, 282, 75, 210], [791, 279, 86, 215], [23, -1, 220, 37], [68, 0, 193, 37], [230, -2, 189, 48], [231, -2, 188, 48], [392, -2, 202, 59], [392, -2, 202, 58], [814, -1, 174, 52], [815, 0, 172, 51], [829, -1, 166, 52], [829, -1, 166, 52], [388, 0, 205, 62], [389, 0, 204, 62], [388, 0, 205, 62], [815, 0, 171, 57], [813, 0, 173, 57], [0, 11, 79, 179], [0, 11, 79, 180], [725, 30, 42, 1

time: 0.0009744167327880859
time: 0.0009737014770507812
time: 0.0009753704071044922
time: 0.0009791851043701172
time: 0.0009729862213134766
time: 0.0019981861114501953
time: 0.0009472370147705078
time: 0.0009984970092773438
time: 0.000997781753540039
time: 0.001997232437133789
time: 0.0019989013671875
time: 0.0009691715240478516
time: 0.0010006427764892578
time: 0.0009989738464355469
time: 0.0009737014770507812
time: 0.00096893310546875
time: 0.0009696483612060547
time: 0.0009720325469970703
time: 0.0010025501251220703
time: 0.0009722709655761719
time: 0.0010004043579101562
time: 0.0009999275207519531
time: 0.00099945068359375
time: 0.0009734630584716797
time: 0.0009725093841552734
time: 0.0009980201721191406
time: 0.0010266304016113281
time: 0.00099945068359375
time: 0.0019996166229248047
time: 0.001001119613647461
time: 0.0009999275207519531
time: 0.00102996826171875
time: 0.0009875297546386719
time: 0.0009732246398925781
time: 0.0009729862213134766
time: 0.0019736289978027344
time: 

time: 0.0007755756378173828
time: 0.001008749008178711
time: 0.0009992122650146484
time: 0.0010013580322265625
time: 0.0009710788726806641
time: 0.00197601318359375
time: 0.002000570297241211
time: 0.002002239227294922
time: 0.0010094642639160156
time: 0.0009970664978027344
time: 0.0009768009185791016
time: 0.0010013580322265625
time: 0.0009856224060058594
time: 0.0009741783142089844
time: 0.0009729862213134766
time: 0.0020017623901367188
time: 0.000995635986328125
time: 0.0009741783142089844
time: 0.0009992122650146484
time: 0.0019991397857666016
time: 0.0010061264038085938
time: 0.0019991397857666016
time: 0.0010001659393310547
time: 0.000972747802734375
time: 0.0009775161743164062
time: 0.0009770393371582031
time: 0.0009734630584716797
time: 0.0009996891021728516
time: 0.0009987354278564453
time: 0.0009729862213134766
time: 0.0009748935699462891
time: 0.0009734630584716797
boxes: [[31, -1, 217, 37], [59, -1, 207, 37], [228, -1, 175, 46], [226, -1, 179, 46], [231, 0, 210, 47], [229, 

time: 0.0009996891021728516
time: 0.0009801387786865234
time: 0.001972198486328125
time: 0.001024007797241211
time: 0.0009722709655761719
time: 0.0009746551513671875
time: 0.001971721649169922
time: 0.0019750595092773438
time: 0.0019989013671875
time: 0.000997781753540039
time: 0.0010001659393310547
time: 0.0009989738464355469
time: 0.0009734630584716797
time: 0.001972675323486328
time: 0.0010018348693847656
time: 0.0010001659393310547
time: 0.0010068416595458984
time: 0.0020024776458740234
time: 0.001005411148071289
time: 0.0009741783142089844
time: 0.0009737014770507812
time: 0.000972747802734375
time: 0.00197601318359375
time: 0.0019981861114501953
time: 0.001973867416381836
time: 0.0009942054748535156
time: 0.001973390579223633
time: 0.0019724369049072266
time: 0.0010254383087158203
time: 0.0009753704071044922
time: 0.0019719600677490234
time: 0.001973390579223633
time: 0.0009756088256835938
time: 0.0009996891021728516
time: 0.0010230541229248047
time: 0.001970052719116211
time: 0.

time: 0.0009772777557373047
time: 0.002001523971557617
time: 0.000972747802734375
time: 0.0019762516021728516
time: 0.0009758472442626953
time: 0.0019783973693847656
time: 0.001972675323486328
time: 0.0019969940185546875
time: 0.0009853839874267578
time: 0.0020012855529785156
time: 0.001970052719116211
time: 0.000978231430053711
time: 0.0009746551513671875
all time: 7.455550909042358
