In [1]:
from pyannote.core import Segment, Annotation
from typing import TextIO, Optional

class Annotation(Annotation):
    @classmethod
    def from_rttm(
        cls, rttm_file: TextIO, uri: Optional[str] = None, modality: Optional[str] = None,
    ) -> "Annotation":
        """
        Create annotation from RTTM file.
        
        Parameters
        ----------
        rttm_file : TextIO
            File object for the RTTM file.
        uri : str, optional
            Name of the annotated resource (e.g., audio or video file).
        modality : str, optional
            Name of the annotated modality.
            
        Returns
        -------
        annotation : Annotation
            New annotation object with parsed segments.
        """
        # Create an empty annotation
        annotation = cls(uri=uri, modality=modality)
        
        for line in rttm_file:
            line = line.strip().split()
            if len(line) < 9:
                continue  # Skip lines that do not have enough fields
            
            # Parse RTTM fields
            start_time = float(line[3])
            duration = float(line[4])
            speaker_label = line[7]
            

            segment = Segment(start=start_time, end=start_time + duration)
            annotation[segment] = speaker_label  # Manually add segment with label
        
        return annotation


In [5]:
from pyannote.metrics.diarization import DiarizationErrorRate

with open("whisper-pyannote-audio.rttm") as f:
    hyp = Annotation.from_rttm(f)

with open('diarization\\audio_new-gt.rttm') as f :
    ref = Annotation.from_rttm(f)
der = DiarizationErrorRate()
der_result = der(ref,hyp)
der_result



0.8005054862513825

In [9]:
from pyannote.metrics.diarization import DiarizationErrorRate

with open('transcription-error-correction\\rttm\\audio_new\\audio_new.rttm') as f :
    hyp = Annotation.from_rttm(f)
with open('diarization\\audio_new-gt.rttm') as f :
    ref = Annotation.from_rttm(f)

der = DiarizationErrorRate()
der_result = der(ref,hyp, detailed=True)
der_result



{'missed detection': 162.13,
 'correct': 960.2959999999996,
 'total': 1674.822999999998,
 'confusion': 552.3969999999983,
 'false alarm': 204.26700000000062,
 'diarization error rate': 0.5485917019290993}

In [8]:
from pyannote.metrics.diarization import DiarizationErrorRate

with open('transcription-error-correction\\rttm\\audio\\audio.rttm') as f :
    hyp = Annotation.from_rttm(f)
with open('diarization\\audio-pyannote-replaced.rttm') as f :
    ref = Annotation.from_rttm(f)

der = DiarizationErrorRate(detailed = True)
der_result = der(ref,hyp, detailed =True)
der_result



{'missed detection': 31.058999999999827,
 'correct': 461.6959999999999,
 'total': 790.5019999999996,
 'confusion': 297.7469999999999,
 'false alarm': 134.3969999999998,
 'diarization error rate': 0.5859605668296851}

In [17]:
hyp.__dict__

{'_uri': None,
 'modality': None,
 '_tracks': SortedDict({}),
 '_labels': {},
 '_labelNeedsUpdate': {},
 '_timeline': <Timeline(uri=None, segments=[])>,
 '_timelineNeedsUpdate': False}

In [19]:
test_annotation = Annotation()
test_annotation[Segment(0, 1)] = "TEST_SPEAKER"
print(test_annotation)

[ 00:00:00.000 -->  00:00:01.000] _ TEST_SPEAKER


audio_test  for downloading 


'audio_test'