In [1]:
import cv2
import numpy as np
import time
import os
import matplotlib.pyplot as plt


In [2]:
LABELS = open('darknet/cfg/coco.names').read().strip().split('\n')
weights_path = 'darknet/yolov4.weights'
confing_path = 'darknet/cfg/yolov4.cfg'


In [3]:
net = cv2.dnn.readNet(confing_path, weights_path)


In [4]:
# definindo mais configuracoes para a deteccao

np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

ln = net.getLayerNames()
print(f"""
Todas as camadas:
{ln}

Total: {len(ln)}

Camadas de saída:
{net.getUnconnectedOutLayers()}
{[ln[i - 1] for i in net.getUnconnectedOutLayers()]}
""")

ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]



Todas as camadas:
('conv_0', 'bn_0', 'mish_1', 'conv_1', 'bn_1', 'mish_2', 'conv_2', 'bn_2', 'mish_3', 'identity_3', 'conv_4', 'bn_4', 'mish_5', 'conv_5', 'bn_5', 'mish_6', 'conv_6', 'bn_6', 'mish_7', 'shortcut_7', 'conv_8', 'bn_8', 'mish_9', 'concat_9', 'conv_10', 'bn_10', 'mish_11', 'conv_11', 'bn_11', 'mish_12', 'conv_12', 'bn_12', 'mish_13', 'identity_13', 'conv_14', 'bn_14', 'mish_15', 'conv_15', 'bn_15', 'mish_16', 'conv_16', 'bn_16', 'mish_17', 'shortcut_17', 'conv_18', 'bn_18', 'mish_19', 'conv_19', 'bn_19', 'mish_20', 'shortcut_20', 'conv_21', 'bn_21', 'mish_22', 'concat_22', 'conv_23', 'bn_23', 'mish_24', 'conv_24', 'bn_24', 'mish_25', 'conv_25', 'bn_25', 'mish_26', 'identity_26', 'conv_27', 'bn_27', 'mish_28', 'conv_28', 'bn_28', 'mish_29', 'conv_29', 'bn_29', 'mish_30', 'shortcut_30', 'conv_31', 'bn_31', 'mish_32', 'conv_32', 'bn_32', 'mish_33', 'shortcut_33', 'conv_34', 'bn_34', 'mish_35', 'conv_35', 'bn_35', 'mish_36', 'shortcut_36', 'conv_37', 'bn_37', 'mish_38', 'conv_

### Funcoes para deteccao e processamento do video

In [5]:
# exibir imagens
def show(img):
    fig = plt.gcf()
    fig.set_size_inches(16,10)
    plt.axis("off")
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.show()

# construindo o blob da imagem
def blob_img(net, img, show_text=True):
    start = time.time()
    blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layer_outputs = net.forward(ln)
    end = time.time()

    if show_text:
        print("YOLO took {:.2f} seconds.".format(end - start))
    return net, img, layer_outputs


#### Realizando a deteccao

In [6]:
def detections(detection, _threshold, boxes, confidences, id_classes):
    scores = detection[5:]
    class_id = np.argmax(scores)
    confidence = scores[class_id]

    if confidence > _threshold:
        box = detection[0:4] * np.array([W, H, W, H])
        (centerX, centerY, width, height) = box.astype("int")
        x = int(centerX - (width / 2))
        y = int(centerY - (height / 2))
        boxes.append([x, y, int(width), int(height)])
        confidences.append(float(confidence))
        id_classes.append(class_id)

    return boxes, confidences, id_classes


def draw_boxes(img, i, confidences, boxes, COLORS, LABELS, show_text=True):
    (x, y) = (boxes[i][0], boxes[i][1])
    (w, h) = (boxes[i][2], boxes[i][3])

    color = [int(c) for c in COLORS[id_classes[i]]]
    cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
    text = "{}: {:.4f}".format(LABELS[id_classes[i]], confidences[i])

    if show_text:
        print(">", text)
        print(x,y,w,h)
    cv2.putText(img, text, (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return img, x, y, w, h


#### Carregando video

In [7]:
!wget https://github.com/gabevr/yolo/raw/master/videos/video_pessoas01.mp4


--2026-01-26 16:46:01--  https://github.com/gabevr/yolo/raw/master/videos/video_pessoas01.mp4


Resolving github.com (github.com)... 4.228.31.150
Connecting to github.com (github.com)|4.228.31.150|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/gabevr/yolo/master/videos/video_pessoas01.mp4 [following]
--2026-01-26 16:46:01--  https://raw.githubusercontent.com/gabevr/yolo/master/videos/video_pessoas01.mp4
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6129513 (5,8M) [application/octet-stream]
Saving to: ‘video_pessoas01.mp4.3’


2026-01-26 16:46:02 (9,47 MB/s) - ‘video_pessoas01.mp4.3’ saved [6129513/6129513]



#### Lendo o aquivo de video com OpenCV

In [8]:
video_file = 'video_pessoas01.mp4'
cap = cv2.VideoCapture(video_file)
connected, video = cap.read()
connected


True

In [9]:
video.shape # resolucao do video e canais


(720, 1280, 3)

In [10]:
video_width = video.shape[1]
video_height = video.shape[0]

print(f"Video: {video_file}")
print(f"Resolução: {video_width}x{video_height}")
print(f"FPS: {cap.get(cv2.CAP_PROP_FPS)}")


Video: video_pessoas01.mp4
Resolução: 1280x720
FPS: 25.0


In [11]:
# redimencionar video

def resize(width, height, max_width=600):
    if (width > max_width):
        proportion = width / height
        new_width = max_width
        new_height = int(new_width/proportion)
    else:
        new_width = width
        new_height = height

    return new_width, new_height

n_width, n_height = resize(video_width, video_height)
print(n_width, n_height)


600 337


#### Definir as configuracoes de video output

In [12]:
file_name = 'output.avi'
fourcc = cv2.VideoWriter_fourcc(*'XVID') # MP4V para mp4
fps = 24
output_video = cv2.VideoWriter(file_name, fourcc, fps, (n_width, n_height))


#### Definindo Variaveis

In [13]:
threshold = 0.5
threshold_NMS = 0.3
small_font, medium_font = 0.4, 0.6
font = cv2.FONT_HERSHEY_SIMPLEX

show_samples = 20
current_sample = 0


#### Processamento do video e exibicao do resultado

In [14]:
while (cv2.waitKey(1) < 0):
    connected, frame = cap.read()
    if not connected:
        break

    t = time.time()
    frame = cv2.resize(frame, (n_width, n_height))
    try:
        (H, W) = frame.shape[:2]
    except:
        print("Erro")
        continue

    img_cp = frame.copy()
    net, frame, layer_outputs = blob_img(net, frame)
    boxes = []
    confidences = []
    id_classes = []

    for output in layer_outputs:
        for detection in output:
            boxes, confidences, id_classes = detections(detection, threshold, boxes, confidences, id_classes)

    objs = cv2.dnn.NMSBoxes(boxes, confidences, threshold, threshold_NMS)

    if len(objs) > 0:
        for i in objs.flatten():
            frame, x, y, w, h = draw_boxes(img_cp, i, confidences, boxes, COLORS, LABELS, show_text=False)
            object =  img_cp[y:y + h, x:x + w]

    cv2.putText(frame, " frame processado em {:.2f} segundos".format(time.time() - t),
    (20, n_height - 20), font, small_font, (250, 250, 250), 0, lineType=cv2.LINE_AA)

    if current_sample < show_samples:
        cv2.imshow("frame window", frame)
        current_sample += 1

    output_video.write(frame)

print("Fim do processamento")
output_video.release()
cap.release()
cv2.destroyAllWindows()


YOLO took 1.36 seconds.
YOLO took 0.36 seconds.
YOLO took 0.32 seconds.
YOLO took 0.37 seconds.
YOLO took 0.46 seconds.
YOLO took 0.62 seconds.
YOLO took 0.69 seconds.
YOLO took 0.66 seconds.
YOLO took 0.64 seconds.
YOLO took 0.80 seconds.
YOLO took 0.56 seconds.
YOLO took 0.52 seconds.
YOLO took 0.53 seconds.
YOLO took 0.56 seconds.
YOLO took 0.56 seconds.
YOLO took 0.55 seconds.
YOLO took 0.52 seconds.
YOLO took 0.52 seconds.
YOLO took 0.56 seconds.
YOLO took 0.48 seconds.
YOLO took 0.52 seconds.
YOLO took 0.68 seconds.
YOLO took 0.64 seconds.
YOLO took 0.53 seconds.
YOLO took 0.52 seconds.
YOLO took 0.61 seconds.
YOLO took 0.57 seconds.
YOLO took 0.50 seconds.
YOLO took 0.56 seconds.
YOLO took 0.52 seconds.
YOLO took 0.51 seconds.
YOLO took 0.52 seconds.
YOLO took 0.49 seconds.
YOLO took 0.49 seconds.
YOLO took 0.55 seconds.
YOLO took 0.84 seconds.
YOLO took 0.67 seconds.
YOLO took 0.55 seconds.
YOLO took 0.53 seconds.
YOLO took 0.54 seconds.
YOLO took 0.53 seconds.
YOLO took 0.53 s