In [12]:
import os
from ultralytics import YOLO
import torch
import cv2
from tqdm import tqdm
import pyttsx3
import numpy as np
import time

In [13]:
model = YOLO("D:\\Documents\\pythonProjectLabelimg\\yolo_dataset\\res\\yolov8n_fold_3_BEST\\weights\\best.pt")

In [14]:
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[0].id)
engine.setProperty('rate', 150)

In [15]:
def speak(text):
    engine.say(text)
    engine.runAndWait()

In [16]:
def calculate_iou(box1, box2, class_name):
    #box1: (x1, y1, x2, y2) - bounding box
    #box2: (x1, y1, x2, y2) - основной прямоугольник
    x1_intersect = max(box1[0], box2[0])
    y1_intersect = max(box1[1], box2[1])
    x2_intersect = min(box1[2], box2[2])
    y2_intersect = min(box1[3], box2[3])

    intersection_area = max(0, x2_intersect - x1_intersect) * max(0, y2_intersect - y1_intersect)
    if class_name in ['люк', 'яма', 'бордюр', 'машина', 'дверь','трамвайные пути', 'спуск', 'лестница', 'стена']:
        square = abs(box1[2]-box1[0])*abs(box1[3]-box1[1])
    else:
        square = abs(box2[2]-box2[0])*abs(box2[3]-box2[1])
    return intersection_area/square

In [None]:
video_path = "D:\\Documents\\pythonProjectLabelimg\\yolo_dataset\\video\\video_2025-04-30_00-05-50.mp4" 
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Ошибка при открытии видеофайла")
    exit()

# FPS и размеры видео
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)

rect_width = 550
rect_height = 1100
roi_x1 = (frame_width - rect_width) // 2 
roi_y1 = (frame_height - rect_height) // 2 + 100
color = (0, 255, 0)
thickness = 2 

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_path = "D:\\Documents\\pythonProjectLabelimg\\yolo_dataset\\res\\videos\\video_1.mp4"
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

spoken_objects = {}  # словарь для хранения класса последнего объекта
objects = []
cooldown_duration = 10 # задержка для одного класса

while True:
    ret, frame = cap.read()
    if not ret:
        break

    start_time = time.time()

    results = model(frame, stream=True)

    cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x1 + rect_width, roi_y1 + rect_height), color, thickness)#отрисовка ограничевающего прямоугольника
#    print(roi_box)
    cv2.ROTATE_90_CLOCKWISE
    for r in results:
        boxes = r.boxes # bounding boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0] # координаты bounding box
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            w, h = x2 - x1, y2 - y1 # ширина и высота

            conf = box.conf[0] # уверенность
            conf = float(conf)
            if conf >= 0.6:
                roi_box = (roi_x1, roi_y1, roi_x1 + rect_width, roi_y1 + rect_height)
                bbox = (x1, y1, x2, y2)
                class_id = int(box.cls[0])  #ID класса
                classes_box =  ['car', 'curb', 'door', 'wall', 'ladder', 'down', 'pit', 'hatch', 'fence', 'tram tracks']
                classes = ['машина', 'бордюр', 'дверь', 'стена', 'лестница', 'спуск', 'яма', 'люк', 'ограждение', 'трамвайные пути']
                class_name = classes[class_id]
                class_name_box = classes_box[class_id]
                square = calculate_iou(bbox, roi_box, class_name)
                if square >= 0.7:
                    if class_name not in objects and time.time() - spoken_objects[class_name] >= cooldown_duration:
                        spoken_objects[class_name] = time.time()
                        objects.append(class_name)
                        speak(f"Впереди {class_name}")


                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)
                label = f'{class_name_box} {conf:.2f}'
                cv2.putText(frame, label, (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 255), 2)

    end_time = time.time()
    fps = 1 / (end_time - start_time) # FPS
    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)


    cv2.imshow('YOLOv8 Detections', frame)
    out.write(frame)

    # выход по нажатию 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


0: 640x384 1 , 39.5ms
Speed: 3.6ms preprocess, 39.5ms inference, 7.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 , 21.3ms
Speed: 2.5ms preprocess, 21.3ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 19.1ms
Speed: 2.2ms preprocess, 19.1ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 14.7ms
Speed: 3.2ms preprocess, 14.7ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 20.0ms
Speed: 2.2ms preprocess, 20.0ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 55.2ms
Speed: 2.3ms preprocess, 55.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 23.6ms
Speed: 2.5ms preprocess, 23.6ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 31.1ms
Speed: 4.7ms preprocess, 31.1ms inference, 4.0ms postproces