In [19]:
import pandas as pd
import numpy as np

def merge_intervals(intervals):
    if not intervals:
        return []

    # Сортировка по начальному времени
    intervals.sort(key=lambda x: x[0])
    merged = [intervals[0]]

    for current in intervals[1:]:
        prev_start, prev_end = merged[-1]
        curr_start, curr_end = current

        # Проверка на перекрытие или соприкосновение
        if curr_start <= prev_end:
            # Объединяем интервалы
            merged[-1] = (prev_start, max(prev_end, curr_end))
        else:
            merged.append(current)

    return merged

detections = pd.read_csv('../dataset/detections.csv')
detections_call = detections[detections['label'] == 'call']
# Не нужно использовать merge_intervals сейчас он только вредит
predicted_intervals = (list(zip(detections_call['start_s'], detections_call['end_s'])))

# 2. Парсинг true_label.txt вручную
true_intervals = []
with open('../dataset/true_label.txt', 'r') as f:
    for line in f:
        parts = line.strip().split('\t')
        if len(parts) < 2:
            continue
        start_str = parts[0].replace(',', '.')
        end_str = parts[1].replace(',', '.')
        start = float(start_str)
        end = float(end_str)
        true_intervals.append((start, end))

# 3 IoU Глобально
iou_global = 0.3
# Выводим для проверки
print("True intervals (call):", true_intervals)
print("Detected intervals (call):", predicted_intervals)

True intervals (call): [(16.64595, 17.64888), (24.368349, 25.349689), (42.17569, 42.778351), (43.80769, 44.47702), (41.309021, 42.06636), (51.514622, 52.214931), (61.05003, 61.72472), (75.898003, 77.025993), (84.827797, 85.286003), (109.514198, 110.776497), (116.905998, 117.618103), (128.7603, 129.434601), (131.129303, 131.864105), (134.044998, 134.736801), (158.687897, 159.206604), (167.8526, 168.423004), (198.684097, 199.358398), (203.811203, 204.407898), (252.985703, 253.841599), (49.790119, 50.428871), (157.111206, 157.822998), (158.680695, 159.958206), (160.797699, 161.746704), (172.183502, 172.779694), (186.304596, 187.077194), (187.624695, 188.518906), (189.139404, 189.723404), (222.542496, 223.023102), (254.650299, 255.349899), (279.752411, 280.257294)]
Detected intervals (call): [(16.5, 17.5), (17.0, 18.0), (24.0, 25.0), (24.5, 25.5), (41.0, 42.0), (41.5, 42.5), (42.0, 43.0), (42.5, 43.5), (43.0, 44.0), (43.5, 44.5), (44.0, 45.0), (48.5, 49.5), (49.0, 50.0), (49.5, 50.5), (51.

In [20]:
import numpy as np
from tqdm import tqdm

def calculate_iou(interval_a, interval_b):
    start_a, end_a = interval_a
    start_b, end_b = interval_b
    
    intersection = max(0, min(end_a, end_b) - max(start_a, start_b))
    union = (end_a - start_a) + (end_b - start_b) - intersection
    return intersection / union if union > 0 else 0.0

def calculate_ap(true_segments, pred_segments, iou_thresholds):
    aps = []
    
    for thresh in iou_thresholds:
        # Сопоставление предсказаний с истинными интервалами
        matched_true = set()
        tp = np.zeros(len(pred_segments))
        fp = np.zeros(len(pred_segments))
        
        for i, pred in enumerate(pred_segments):
            best_iou = 0.0
            best_idx = -1
            for j, true in enumerate(true_segments):
                if j in matched_true:
                    continue
                iou = calculate_iou(pred, true)
                if iou > best_iou:
                    best_iou = iou
                    best_idx = j
            
            if best_iou >= thresh:
                matched_true.add(best_idx)
                tp[i] = 1
            else:
                fp[i] = 1

        # Рассчитываем precision-recall кривую
        tp_cumsum = np.cumsum(tp)
        fp_cumsum = np.cumsum(fp)
        
        recalls = tp_cumsum / len(true_segments)
        precisions = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-12)
        
        # Интерполяция precision для 101 точки
        interp_precision = np.zeros(101)
        for r in range(101):
            precision_vals = precisions[recalls >= r/100]
            interp_precision[r] = max(precision_vals) if len(precision_vals) > 0 else 0
        
        # Вычисляем AP как среднее значение precision
        ap = np.mean(interp_precision)
        aps.append(ap)
    
    return aps

def evaluate_metrics(true_segments, pred_segments, iou_threshold=iou_global): #Важный коэф надо покрутить
    true_positives = 0
    matched_true_indices = set()
    
    for pred in pred_segments:
        best_iou = 0.0
        best_true_idx = -1
        
        for i, true in enumerate(true_segments):
            if i in matched_true_indices:
                continue
                
            iou = calculate_iou(pred, true)
            if iou > best_iou:
                best_iou = iou
                best_true_idx = i
                
        if best_iou >= iou_threshold:
            true_positives += 1
            matched_true_indices.add(best_true_idx)
    
    false_positives = len(pred_segments) - true_positives
    false_negatives = len(true_segments) - true_positives
    #print(true_positives, false_negatives, false_positives)
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0.0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0.0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    
    return {
        'Precision': precision,
        'Recall': recall,
        'F1-score': f1_score,
        'AP50': calculate_ap(true_segments, pred_segments, [0.5])[0],
        'AP75': calculate_ap(true_segments, pred_segments, [0.75])[0],
        'AP50-95': np.mean(calculate_ap(true_segments, pred_segments, np.arange(0.5, 1.0, 0.05)))
    }

# Пример использования
"""
true_intervals = [
(0.592229  ,1.262479),
(7.773479  ,8.491604),
(29.272560  ,29.863020),
(33.896339  ,34.614460),
(36.090611  ,36.593288),
(37.319401  ,37.806129),
(49.592899  ,50.366879),
(51.084999  ,51.866959),
(52.304119  ,52.727020),
(64.454803  ,65.069199),
(66.313957  ,66.872513),
(67.590607  ,68.133209),
(104.301201  ,105.083199),
(111.745796  ,112.288399),
(113.694801  ,114.321503)
]
predicted_intervals = [
 (0.496, 1.328),
 (7.834666666666666, 8.802666666666667),
 (29.24266666666667, 30.466666666666665),
 (33.91466666666667, 34.909333333333336),
 (35.89066666666667, 36.792),
 (37.42133333333334, 37.824),
 (43.58133333333333, 43.59733333333333),
 (49.584, 50.42133333333334),
 (50.99466666666667, 52.925333333333334),
 (64.39466666666667, 65.02666666666667),
 (66.344, 68.11733333333333),
 (104.37333333333333, 105.34933333333333),
 (111.75733333333334, 112.25333333333333),
 (113.58666666666667, 114.30133333333333),
 (115.55733333333333, 115.576)  
]
"""
metrics = evaluate_metrics(true_intervals, predicted_intervals)
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

Precision: 0.4328
Recall: 0.9667
F1-score: 0.5979
AP50: 0.4306
AP75: 0.0215
AP50-95: 0.1439


In [21]:
def bootstrap_confidence_intervals(true_segments, pred_segments, iou_threshold=iou_global,
                                  n_bootstrap=100, confidence_level=0.95):
    # Модифицируем для новых метрик
    n_true = len(true_segments)
    n_pred = len(pred_segments)
    
    metrics_samples = {
        'Precision': [],
        'Recall': [],
        'F1-score': [],
        'AP50': [],
        'AP75': [],
        'AP50-95': []
    }
    
    for _ in tqdm(range(n_bootstrap), desc="Bootstrapping"):
        true_bootstrap = [true_segments[i] for i in np.random.choice(n_true, n_true, replace=True)]
        pred_bootstrap = [pred_segments[i] for i in np.random.choice(n_pred, n_pred, replace=True)]
        
        metrics = evaluate_metrics(true_bootstrap, pred_bootstrap, iou_threshold)
        for k in metrics_samples:
            metrics_samples[k].append(metrics[k])
    
    # Вычисление квантилей для всех метрик
    alpha = (1 - confidence_level) / 2
    ci = {}
    
    for metric, samples in metrics_samples.items():
        ci[metric + "_CI"] = np.percentile(samples, [alpha*100, (1-alpha)*100])
    
    return ci

bootstrap = bootstrap_confidence_intervals(true_intervals, predicted_intervals)
print(bootstrap)
for metric in bootstrap:
    print(f"{metric}: {bootstrap[metric]}")
    
import math
from scipy.stats import norm

def wilson_interval(k, n, alpha=0.05):
    """Wilson score interval для k успехов в n испытаниях."""
    z = norm.ppf(1 - alpha/2)
    p = k / n
    denom = 1 + z*z/n
    centre = p + z*z/(2*n)
    half = z * math.sqrt(p*(1-p)/n + z*z/(4*n*n))
    lo = (centre - half) / denom
    hi = (centre + half) / denom
    return lo, hi

def evaluate_wilson_ci(true_segments, pred_segments, iou_threshold = iou_global, alpha= 0.05):
    true_positives = 0
    matched_true_indices = set()
    
    for pred in pred_segments:
        best_iou = 0.0
        best_true_idx = -1
        
        for i, true in enumerate(true_segments):
            if i in matched_true_indices:
                continue
                
            iou = calculate_iou(pred, true)
            if iou > best_iou:
                best_iou = iou
                best_true_idx = i
                
        if best_iou >= iou_threshold:
            true_positives += 1
            matched_true_indices.add(best_true_idx)
    
    false_positives = len(pred_segments) - true_positives
    false_negatives = len(true_segments) - true_positives
    
    n_p = true_positives + false_positives
    n_r = true_positives + false_negatives
    p_hat = true_positives / n_p if n_p > 0 else 0.0
    r_hat = true_positives / n_r if n_r > 0 else 0.0

    # 2) Интервалы Уилсона для precision и recall
    p_lo, p_hi = wilson_interval(true_positives, true_positives+false_positives, alpha=alpha)
    r_lo, r_hi = wilson_interval(true_positives, true_positives+false_negatives, alpha=alpha)  # recall CI

    # 3) Оценка F1 и её производные
    if p_hat + r_hat == 0:
        f1_hat = 0.0
        se_f1 = 0.0
    else:
        f1_hat = 2 * p_hat * r_hat / (p_hat + r_hat)
        # δ-метод: Var[F1] ≈ (∂g/∂p)² Var[p] + (∂g/∂r)² Var[r]
        # где g(p,r)=2pr/(p+r)
        dg_dp = 2 * r_hat**2 / (p_hat + r_hat)**2
        dg_dr = 2 * p_hat**2 / (p_hat + r_hat)**2
        var_p = p_hat * (1 - p_hat) / n_p
        var_r = r_hat * (1 - r_hat) / n_r
        var_f1 = dg_dp**2 * var_p + dg_dr**2 * var_r
        se_f1 = math.sqrt(var_f1)

    # 4) Нормальный CI для F1
    z = norm.ppf(1 - alpha/2)
    f1_lo = max(0.0, f1_hat - z * se_f1)
    f1_hi = min(1.0, f1_hat + z * se_f1)
    
    return {
        'Precision_Ci_Wilson': ( p_lo, p_hi),
        'Recall_Ci_Wilson': (r_lo, r_hi),
        'F1_Ci_Wilson': (f1_lo, f1_hi)
    }

wilson = evaluate_wilson_ci(true_intervals, predicted_intervals)
for metric in wilson:
    print(f"{metric}: {wilson[metric]}")


Bootstrapping: 100%|██████████| 100/100 [00:00<00:00, 163.43it/s]

{'Precision_CI': array([0.20895522, 0.35820896]), 'Recall_CI': array([0.46666667, 0.8       ]), 'F1-score_CI': array([0.28865979, 0.49484536]), 'AP50_CI': array([0.0612737 , 0.26700133]), 'AP75_CI': array([0.       , 0.0535607]), 'AP50-95_CI': array([0.01205379, 0.09114842])}
Precision_CI: [0.20895522 0.35820896]
Recall_CI: [0.46666667 0.8       ]
F1-score_CI: [0.28865979 0.49484536]
AP50_CI: [0.0612737  0.26700133]
AP75_CI: [0.        0.0535607]
AP50-95_CI: [0.01205379 0.09114842]
Precision_Ci_Wilson: (np.float64(0.3210431149754174), np.float64(0.5519126350920707))
Recall_Ci_Wilson: (np.float64(0.8332960900859081), np.float64(0.9940914096183874))
F1_Ci_Wilson: (np.float64(0.4840688717112453), np.float64(0.7118074169485485))



