In [15]:
# YOLOv8
import ultralytics
from ultralytics import YOLO

import numpy as np
import cv2
import torch
import torch.backends.cudnn as cudnn

import supervision as sv

from collections import defaultdict

In [16]:
print('CUDA disponível' if torch.cuda.is_available() else 'CUDA indisponível')
print(f'Ultralytics: {ultralytics.__version__}')
print(f'Torch: {torch.__version__}')
print(f'supervision: {sv.__version__}')

CUDA disponível
Ultralytics: 8.0.106
Torch: 2.1.1+cu121
supervision: 0.16.0


In [17]:
# Carrega o vídeo
VIDEO = './dataset/road_video001.mp4'
OUTPUT = 'output_supervision.mp4'

cap = cv2.VideoCapture(VIDEO)

video_info = sv.VideoInfo.from_video_path(VIDEO)
print(video_info)

# Linha para contagem de veiculos
START = sv.Point(0, 2*video_info.height//3)
END = sv.Point(video_info.width, 2*video_info.height//3)

# Cria o contador de objetos
veiculos = set()

track_history = defaultdict(lambda: [])

VideoInfo(width=1280, height=720, fps=25, total_frames=750)


In [18]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# Carregando o modelo pré-treinado
model = YOLO('yolov8n.pt')

cuda


In [19]:
print(model.model.names)

{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

In [20]:
# Selecionando apenas as classes relevantes pro problema
classes = [2, 7]

In [21]:
video_info = sv.VideoInfo.from_video_path("./dataset/road_video001.mp4")

with sv.VideoSink("output_supervision1.mp4", video_info) as sink:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        results = model.track(frame, classes=classes, persist=True, tracker='bytetrack.yaml', device=0, verbose=False)
        
        if results[0].boxes.id is not None:
            boxes = results[0].boxes.xywh.cpu()
            tracks_ids = results[0].boxes.id.int().cpu().tolist()
            
            annotated_frame = results[0].plot()
                        
            detections = sv.Detections.from_ultralytics(results[0])
            
            for box, track_id in zip(boxes, tracks_ids):
                x, y, w,h = box
                track = track_history[track_id]
                track.append((float(x), float(y)))
                
                if START.x < x < END.x and abs(y - START.y) < 5:
                    if track_id not in veiculos:
                        veiculos.add(track_id)
                
                cv2.rectangle(annotated_frame, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (0, 255, 0), 2)
            
        cv2.line(annotated_frame, (START.x, START.y), (END.x, END.y), (0, 255, 0), 2)
        
        cv2.putText(annotated_frame, f'Veiculos: {len(veiculos)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        sink.write_frame(annotated_frame)
    
cap.release()

print(f'Veículos: {len(veiculos)}')

Veículos: 68
