In [None]:
import cv2
import os
from pathlib import Path
from matplotlib import pyplot as plt
import math
import numpy as np
from tqdm import tqdm

In [None]:
path = Path("Dataset_TP3")
videos_paths = [p for p in path.iterdir() if p.is_file()]

videos = {}
for file in videos_paths:
    vid = cv2.VideoCapture(file)
    if not vid.isOpened():
        print("Error: Could not open video.")
        continue
    videos[file] = vid

In [None]:
# Histogramme
frame_number = 0
histograms = {}
for vid in videos:
    histograms_frame = []
    video_file = videos[vid]
    print(f"Computing histograms for {vid}")
    frame_count = int(video_file.get(cv2.CAP_PROP_FRAME_COUNT))
    for frame_idx in tqdm(range(frame_count)):
        ret, frame = video_file.read()
        if not ret:
            break

        frame_number += 1
        b, g, r = cv2.split(frame)

        hist_r = cv2.calcHist([r], [0], None, [256], [0, 256])
        hist_g = cv2.calcHist([g], [0], None, [256], [0, 256])
        hist_b = cv2.calcHist([b], [0], None, [256], [0, 256])

        hist = []
        hist.append(hist_r)
        hist.append(hist_g)
        hist.append(hist_b)
        histograms_frame.append(hist)
    histograms[vid] = histograms_frame

In [None]:
def compute_euclidean_distance(hist_a, hist_b) -> float:
    return np.sqrt(np.sum((hist_a - hist_b) ** 2))

In [None]:
SEUIL = 45000
N = 2
cuts: dict[Path, list[int]] = {}

for histo in histograms:
    frames = histograms[histo]
    distance_r = []
    distance_g = []
    distance_b = []
    scene_changes = []

    for i in range(1, len(frames)):
        prev_r, prev_g, prev_b = frames[i - 1]
        curr_r, curr_g, curr_b = frames[i]

        dist_r = compute_euclidean_distance(
            prev_r,
            curr_r,
        )
        dist_g = compute_euclidean_distance(
            prev_g,
            curr_g,
        )
        dist_b = compute_euclidean_distance(
            prev_b,
            curr_b,
        )

        distance_r.append(dist_r)
        distance_g.append(dist_g)
        distance_b.append(dist_b)

        total_dist = dist_r + dist_g + dist_b

        if total_dist > SEUIL:
            scene_changes.append(i)

    cuts[histo] = scene_changes

    plt.figure(figsize=(10, 5))
    plt.plot(distance_r, label="Rouge")
    plt.plot(distance_g, label="Vert")
    plt.plot(distance_b, label="Bleu")
    plt.axhline(
        y=SEUIL / 3, color="r", linestyle="--", label=f"Seuil ({SEUIL / 3:.0f})"
    )

    for sc in scene_changes:
        plt.axvline(x=sc - 1, color="k", linestyle=":", alpha=0.5)

    plt.xlabel("Frame Index")
    plt.ylabel("Distance Euclidienne")
    plt.title(f"{histo}: Distance d'histogramme entre frames consécutives")
    plt.legend()
    plt.tight_layout()
    plt.show()

    print(f"Changements de scène détectés aux frames: {scene_changes}")

In [None]:
DETECTION_THRESHOLD = 1
from pathlib import Path

cuts_path = Path("./cuts")

for cut_name in cuts:
    name_file = cut_name.name.split(".")[0]
    valid_cuts_path = cuts_path / f"{name_file}.txt"

    if not valid_cuts_path.exists():
        print(f"Ground truth file not found: {valid_cuts_path}")
        continue

    valid_cuts_str = open(valid_cuts_path, "r").read()

    valid_cuts = []
    for x in valid_cuts_str.split(","):
        stripped = x.strip()
        if stripped.isdigit():
            valid_cuts.append(int(stripped))

    gen_cuts = cuts[cut_name]

    correct_cuts = []
    incorrect_cuts = []

    for gc in gen_cuts:
        found = False
        for vc in valid_cuts:
            if abs(gc - vc) <= DETECTION_THRESHOLD:
                found = True
                break
        if found:
            correct_cuts.append(gc)
        else:
            incorrect_cuts.append(gc)

    total_detected = len(gen_cuts)

    tp = len(correct_cuts)
    fp = len(incorrect_cuts)
    if (tp + fp) > 0:
        precision = tp / (tp + fp) * 100
    else:
        precision = 0.0

    if total_detected > 0:
        pct_correct = tp / total_detected * 100
        pct_incorrect = fp / total_detected * 100
    else:
        pct_correct = 0.0
        pct_incorrect = 0.0


    if valid_cuts:
        missed_cuts = []
        for vc in valid_cuts:
            detected = any(abs(gc - vc) <= DETECTION_THRESHOLD for gc in gen_cuts)
            if not detected:
                missed_cuts.append(vc)
        recall = (len(valid_cuts) - len(missed_cuts)) / len(valid_cuts) * 100
    else:
        missed_cuts = []
        recall = 0.0

    print(f"\n{'=' * 50}")
    print(f"Vidéo: {cut_name.name}")
    print(f"{'=' * 50}")
    print(f"Cuts détectés: {total_detected} | Ground truth: {len(valid_cuts)}")
    print(f"\n✓ Détections correctes: {len(correct_cuts)} ({pct_correct:.1f}%)")
    print(f"  Frames: {correct_cuts}")
    print(f"\n✗ Faux positifs: {len(incorrect_cuts)} ({pct_incorrect:.1f}%)")
    print(f"  Frames: {incorrect_cuts}")
    print(f"\n⊘ Cuts manqués: {len(missed_cuts)} ({100 - recall:.1f}% du ground truth)")
    print(f"  Frames: {missed_cuts}")
    print(f"\nRappel: {recall:.1f}%")
    print(f"\nPrécision: {precision:.1f}%")

    if precision + recall > 0:
        precision_frac = precision / 100
        recall_frac = recall / 100
        f1 = 2 * (precision_frac * recall_frac) / (precision_frac + recall_frac) * 100
    else:
        f1 = 0.0

    print(f"\nF-score (F1): {f1:.1f}%")