# Интеллектуальные методы обработки видео

## Модуль 4. Аннотирование видео


In [2]:
# Константы
dataFolder = "./data"

In [6]:
import cv2
import datetime

# Инициализация переменных
file_src = f"{dataFolder}/src/Sequence 01.mp4"
file_dst = "{dataFolder}/result/annotated_output1.avi"
log_file = f"{dataFolder}/result/face_detection_log1.txt"

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
cap = cv2.VideoCapture(file_src)

# Получение параметров видео для создания объекта VideoWriter
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
out = cv2.VideoWriter(file_dst, cv2.VideoWriter_fourcc("M", "J", "P", "G"), 10, (frame_width, frame_height))

# Инициализация для логирования
faces_detected_prev = False
start_time = None

# Лог-файл
with open(log_file, "w") as log:
    log.write("Face Detection Log\n")
    log.write("Format: [Start Time] - [End Time]\n")

while True:
    ret, frame = cap.read()
    if ret:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)

        if len(faces) > 0:
            if not faces_detected_prev:
                start_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                faces_detected_prev = True

            for x, y, w, h in faces:
                cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
        else:
            if faces_detected_prev:
                end_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                with open(log_file, "a") as log:
                    log.write(f"[{start_time}] - [{end_time}]\n")
                faces_detected_prev = False

        # Запись аннотированного кадра в файл
        out.write(frame)

        cv2.imshow("Video Annotation", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        break

if faces_detected_prev:
    end_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(log_file, "a") as log:
        log.write(f"[{start_time}] - [{end_time}]\n")

cap.release()
out.release()
cv2.destroyAllWindows()

In [4]:
from facenet_pytorch import MTCNN
import cv2
import torch

file_src = f"{dataFolder}/src/Sequence 01.mp4"
file_dst = "{dataFolder}/result/annotated_output2.avi"
log_file = f"{dataFolder}/result/face_detection_log.txt"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mtcnn = MTCNN(keep_all=True, device=device)

# Открытие видео
cap = cv2.VideoCapture(file_src)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

# Создание объекта VideoWriter
out = cv2.VideoWriter(file_dst, cv2.VideoWriter_fourcc("M", "J", "P", "G"), 10, (frame_width, frame_height))

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Конвертация кадра из BGR в RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Детектирование лиц
    boxes, _ = mtcnn.detect(frame_rgb)

    # Нарисовать прямоугольники вокруг обнаруженных лиц
    if boxes is not None:
        for box in boxes:
            cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)

    # Запись аннотированного кадра в файл
    out.write(frame)

    """ # Отображение кадра
    cv2.imshow('Video Annotation', frame)
    
    # Нажмите Q на клавиатуре для выхода
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break """

cap.release()
out.release()
cv2.destroyAllWindows()

In [5]:
from facenet_pytorch import MTCNN
import cv2
import torch
import numpy as np
from datetime import datetime

file_src = f"{dataFolder}/src/Sequence 01.mp4"
file_dst = "{dataFolder}/result/annotated_output1.avi"
log_file = f"{dataFolder}/result/face_detection_log1.txt"

# Инициализация
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mtcnn = MTCNN(keep_all=True, device=device)
cap = cv2.VideoCapture(file_src)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
out = cv2.VideoWriter(file_dst, cv2.VideoWriter_fourcc("M", "J", "P", "G"), 10, (frame_width, frame_height))

# Словарь для отслеживания лиц
faces_dict = {}
face_id = 1


def get_center(box):
    return np.array([box[0] + (box[2] - box[0]) / 2, box[1] + (box[3] - box[1]) / 2])


def find_closest_face(face_centers, new_face_center):
    distances = np.linalg.norm(face_centers - new_face_center, axis=1)
    return np.argmin(distances)


log = []

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    boxes, _ = mtcnn.detect(frame_rgb)

    current_faces = {}

    if boxes is not None:
        face_centers = np.array([get_center(face["box"]) for face in faces_dict.values()])
        for box in boxes:
            new_face_center = get_center(box)
            if len(face_centers) > 0:
                idx = find_closest_face(face_centers, new_face_center)
                distances = np.linalg.norm(face_centers - new_face_center, axis=1)
                if distances[idx] < 50:  # Threshold for matching faces
                    face_id = list(faces_dict.keys())[idx]
                else:
                    face_id = max(faces_dict.keys(), default=0) + 1
            else:
                face_id = 1

            current_faces[face_id] = {"box": box, "last_seen": datetime.now()}
            cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)
            cv2.putText(frame, f"ID: {face_id}", (int(box[0]), int(box[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

    faces_dict = current_faces

    out.write(frame)

    """ cv2.imshow('Video Annotation', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break """

cap.release()
out.release()
cv2.destroyAllWindows()

# Save log to file
with open(log_file, "w") as f:
    for entry in log:
        f.write(f"{entry}\n")