In [None]:
!pip install pyannote.metrics --quiet

In [1]:
import json
import glob
import os
from pyannote.metrics.diarization import DiarizationErrorRate
from pyannote.core import Segment, Annotation

metric = DiarizationErrorRate()

In [2]:
def load_reference_rttm(rttm_path):
    """Parses an RTTM file into a pyannote Annotation object."""
    annotation = Annotation()
    with open(rttm_path, "r") as f:
        for line in f:
            if line.startswith("SPEAKER"):
                parts = line.strip().split()
                start = float(parts[3])
                duration = float(parts[4])
                speaker = parts[7]
                annotation[Segment(start, start + duration)] = speaker
    return annotation

def load_hypothesis_json(json_path):
    """Parses your specific JSON format into a pyannote Annotation object."""
    annotation = Annotation()
    with open(json_path, "r") as f:
        segments = json.load(f)
    
    for seg in segments:
        start = seg["start"]
        end = seg["end"]
        speaker = seg["speaker"]
        annotation[Segment(start, end)] = speaker
    return annotation

In [3]:
def evaluate_models(reference_path, json_pattern):
    """
    Args:
        reference_path: Path to the Ground Truth RTTM.
        json_pattern: A wildcard pattern (e.g., "results/*.json") or list of files.
    """
    try:
        reference = load_reference_rttm(reference_path)
        print(f"Reference loaded: {os.path.basename(reference_path)}")
    except Exception as e:
        print(f"Could not load reference: {e}")
        return

    if isinstance(json_pattern, list):
        files = json_pattern
    else:
        files = glob.glob(json_pattern)
        
    print(f"Found {len(files)} model outputs to test.\n")

    print(f"{'MODEL / FILE':<40} | {'DER':<10}")
    print("-" * 55)

    results = {}
    for file_path in files:
        try:
            hypothesis = load_hypothesis_json(file_path)
            
            der = metric(reference, hypothesis)
            results[file_path] = der
            
            filename = os.path.basename(file_path)
            print(f"{filename:<40} | {der:.2%}")
            
        except Exception as e:
            print(f"{os.path.basename(file_path):<40} | ERROR: {e}")

    if results:
        best_model = min(results, key=results.get)
        print("-" * 55)
        print(f"Best Model: {os.path.basename(best_model)} (DER: {results[best_model]:.2%})")

In [9]:
# English
model_3_path = "../../data/raw/audio/diarization_output/model_3.1/english_3.json"
model_2_path = "../../data/raw/audio/diarization_output/model_2.1/english_2.json"
model_percision_path = "../../data/raw/audio/diarization_output/precision_model/english_precision.json"
model_community_path = "../../data/raw/audio/diarization_output/community_model/english_community.json"

evaluate_models(
    reference_path="../../data/raw/audio/audio_rttms/english_reference.rttm", 
    json_pattern=[model_3_path, model_2_path, model_percision_path, model_community_path] 
)

Reference loaded: english_reference.rttm
Found 4 model outputs to test.

MODEL / FILE                             | DER       
-------------------------------------------------------
english_3.json                           | 17.65%
english_2.json                           | 6.75%
english_precision.json                   | 0.00%
english_community.json                   | 21.05%
-------------------------------------------------------
Best Model: english_precision.json (DER: 0.00%)


In [10]:
# Noise English
model_3_path = "../../data/raw/audio/diarization_output/model_3.1/noise_english_3.json"
model_2_path = "../../data/raw/audio/diarization_output/model_2.1/noise_english_2.json"
model_percision_path = "../../data/raw/audio/diarization_output/precision_model/noise_english_precision.json"
mdoel_community_path = "../../data/raw/audio/diarization_output/community_model/noise_english_community.json"

evaluate_models(
    reference_path="../../data/raw/audio/audio_rttms/english_reference.rttm", 
    json_pattern=[model_3_path, model_2_path, model_percision_path, model_community_path] 
)

Reference loaded: english_reference.rttm
Found 4 model outputs to test.

MODEL / FILE                             | DER       
-------------------------------------------------------
noise_english_3.json                     | 18.14%
noise_english_2.json                     | 6.22%
noise_english_precision.json             | 0.35%
english_community.json                   | 21.05%
-------------------------------------------------------
Best Model: noise_english_precision.json (DER: 0.35%)


In [11]:
# Arabic
model_3_path = "../../data/raw/audio/diarization_output/model_3.1/arabic_3.json"
model_2_path = "../../data/raw/audio/diarization_output/model_2.1/arabic_2.json"
model_percision_path = "../../data/raw/audio/diarization_output/precision_model/arabic_precision.json"
mdoel_community_path = "../../data/raw/audio/diarization_output/community_model/arabic_community.json"

evaluate_models(
    reference_path="../../data/raw/audio/audio_rttms/arabic_reference.rttm", 
    json_pattern=[model_3_path, model_2_path, model_percision_path, model_community_path]  
)

Reference loaded: arabic_reference.rttm
Found 4 model outputs to test.

MODEL / FILE                             | DER       
-------------------------------------------------------
arabic_3.json                            | 8.15%
arabic_2.json                            | 51.65%
arabic_precision.json                    | 0.00%
english_community.json                   | 89.29%
-------------------------------------------------------
Best Model: arabic_precision.json (DER: 0.00%)


In [12]:
# Noise Arabic
model_3_path = "../../data/raw/audio/diarization_output/model_3.1/noise_arabic_3.json"
model_2_path = "../../data/raw/audio/diarization_output/model_2.1/noise_arabic_2.json"
model_percision_path = "../../data/raw/audio/diarization_output/precision_model/noise_arabic_precision.json"
mdoel_community_path = "../../data/raw/audio/diarization_output/community_model/noise_arabic_community.json"

evaluate_models(
    reference_path="../../data/raw/audio/audio_rttms/arabic_reference.rttm", 
    json_pattern=[model_3_path, model_2_path, model_percision_path, model_community_path]  
)

Reference loaded: arabic_reference.rttm
Found 4 model outputs to test.

MODEL / FILE                             | DER       
-------------------------------------------------------
noise_arabic_3.json                      | 9.92%
noise_arabic_2.json                      | 8.70%
noise_arabic_precision.json              | 0.71%
english_community.json                   | 89.29%
-------------------------------------------------------
Best Model: noise_arabic_precision.json (DER: 0.71%)


### NOTE : All Code switched attemps were used by adding (num_speakers = 2) except for Precision model

In [14]:
# Code Switch
model_3_path = "../../data/raw/audio/diarization_output/model_3.1/code_switch_3.json"
model_2_path = "../../data/raw/audio/diarization_output/model_2.1/code_switch_2.json"
model_percision_path = "../../data/raw/audio/diarization_output/precision_model/code_switch_precision.json"
mdoel_community_path = "../../data/raw/audio/diarization_output/community_model/code_switch_community.json"

evaluate_models(
    reference_path="../../data/raw/audio/audio_rttms/code_switch_reference.rttm", 
    json_pattern=[model_3_path, model_2_path, model_percision_path, model_community_path] 
)

Reference loaded: code_switch_reference.rttm
Found 4 model outputs to test.

MODEL / FILE                             | DER       
-------------------------------------------------------
code_switch_3.json                       | 19.97%
code_switch_2.json                       | 16.88%
code_switch_precision.json               | 0.00%
english_community.json                   | 100.63%
-------------------------------------------------------
Best Model: code_switch_precision.json (DER: 0.00%)


In [15]:
# Noise Code Switch
model_3_path = "../../data/raw/audio/diarization_output/model_3.1/noise_code_switch_3.json"
model_2_path = "../../data/raw/audio/diarization_output/model_2.1/noise_code_switch_2.json"
model_percision_path = "../../data/raw/audio/diarization_output/precision_model/noise_code_switch_precision.json"
mdoel_community_path = "../../data/raw/audio/diarization_output/community_model/noise_code_switch_community.json"

evaluate_models(
    reference_path="../../data/raw/audio/audio_rttms/code_switch_reference.rttm", 
    json_pattern=[model_3_path, model_2_path, model_percision_path, model_community_path] 
)

Reference loaded: code_switch_reference.rttm
Found 4 model outputs to test.

MODEL / FILE                             | DER       
-------------------------------------------------------
noise_code_switch_3.json                 | 19.93%
noise_code_switch_2.json                 | 15.65%
noise_code_switch_precision.json         | 0.31%
english_community.json                   | 100.63%
-------------------------------------------------------
Best Model: noise_code_switch_precision.json (DER: 0.31%)
