In [1]:
import os
from ultralytics import YOLO
import torch
import cv2
from tqdm import tqdm
import pyttsx3
import numpy as np
import time

In [2]:
model = YOLO("D:\\Documents\\pythonProjectLabelimg\\yolo_dataset\\res\\yolov8n_fold_3_BEST\\weights\\best.pt")

In [3]:
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[0].id)
engine.setProperty('rate', 150)

In [4]:
def speak(text):
    engine.say(text)
    engine.runAndWait()

In [8]:
def calculate_iou(box1, box2):
    #box1: (x1, y1, x2, y2) - bounding box
    #box2: (x1, y1, x2, y2) - основной прямоугольник
    x1_intersect = max(box1[0], box2[0])
    y1_intersect = max(box1[1], box2[1])
    x2_intersect = min(box1[2], box2[2])
    y2_intersect = min(box1[3], box2[3])

    intersection_area = max(0, x2_intersect - x1_intersect) * max(0, y2_intersect - y1_intersect)
    square = abs(box1[2]-box1[0])*abs(box1[3]-box1[1])

    return intersection_area/square

In [11]:
video_path = "D:\\Documents\\pythonProjectLabelimg\\yolo_dataset\\video\\video_2025-04-27_18-31-56.mp4" 
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Ошибка при открытии видеофайла")
    exit()

# FPS и размеры видео
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)

rect_width = 550
rect_height = 1100
roi_x1 = (frame_width - rect_width) // 2 
roi_y1 = (frame_height - rect_height) // 2 + 100
color = (0, 255, 0)
thickness = 2 

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_path = "D:\\Documents\\pythonProjectLabelimg\\yolo_dataset\\res\\videos\\video_1.mp4"
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

spoken_objects = {}  # словарь для хранения класса последнего объекта
objects = []
cooldown_duration = 8 # задержка для одного класса

while True:
    ret, frame = cap.read()
    if not ret:
        break

    start_time = time.time()

    results = model(frame, stream=True)

    cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x1 + rect_width, roi_y1 + rect_height), color, thickness)#отрисовка ограничевающего прямоугольника
#    print(roi_box)
    cv2.ROTATE_90_CLOCKWISE
    for r in results:
        boxes = r.boxes # bounding boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0] # координаты bounding box
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            w, h = x2 - x1, y2 - y1 # ширина и высота

            conf = box.conf[0] # уверенность
            conf = float(conf)
            if conf >= 0.6:
                roi_box = (roi_x1, roi_y1, roi_x1 + rect_width, roi_y1 + rect_height)
                bbox = (x1, y1, x2, y2)
                square = calculate_iou(bbox, roi_box)
                print(bbox)
                if square >= 0.7:
                    class_id = int(box.cls[0])  #ID класса
                    #classes =  ['car', 'curb', 'door', 'wall', 'ladder', 'down', 'pit', 'hatch', 'fence', 'tram tracks']
                    classes = ['машина', 'бордюр', 'дверь', 'стена', 'лестница', 'спуск', 'яма', 'люк', 'ограждение', 'трамвайные пути']
                    class_name = classes[class_id]
                    print(square, class_name)
                    if class_name not in objects or time.time() - spoken_objects[class_name] >= cooldown_duration:
                        spoken_objects[class_name] = time.time()
                        objects.append(class_name)
                        speak(f"Впереди {class_name}")


                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)
                label = f'{class_name} {conf:.2f}'
                cv2.putText(frame, label, (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 255), 2)

    end_time = time.time()
    fps = 1 / (end_time - start_time) # FPS
    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)


    cv2.imshow('YOLOv8 Detections', frame)
    out.write(frame)

    # выход по нажатию 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


0: 640x384 2  s, 18.6ms
Speed: 1357.6ms preprocess, 18.6ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2  s, 13.3ms
(0, 409, 575, 1280)
0.8521739130434782
Speed: 3.2ms preprocess, 13.3ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2  s, 22.4ms
(0, 412, 561, 1280)
0.8484848484848485
Speed: 3.7ms preprocess, 22.4ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2  s, 19.1ms
(0, 399, 492, 1261)
0.8272357723577236
Speed: 3.5ms preprocess, 19.1ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2  s, 18.6ms
(0, 420, 551, 1280)
0.8457350272232305
Speed: 2.4ms preprocess, 18.6ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1  , 17.4ms
(0, 399, 512, 1264)
0.833984375
Speed: 2.4ms preprocess, 17.4ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2  s, 13.2ms
(0, 408, 570, 1280)
0.8508771929824561
Speed: 2.3ms p

KeyboardInterrupt: 