In [1]:
import logging
import asyncio
from jiwer import wer

In [2]:
from src.eval.Dataset import Dataset
from src.eval.StreamingTranscriber import StreamingTranscriber
from src.eval.DebugTranscriberAdapter import DebugTranscriberAdapter
from src.eval.WebsocketTranscriberAdapter import WebsocketTranscriberAdapter

from src.melvin.Transcriber import Transcriber as WhisperTranscriber
from src.melvin.WhisperStreamingTranscriberAdapter import WhisperStreamingTranscriberAdapter

from src.helper.logging import init_logger, set_global_loglevel

init_logger()
set_global_loglevel("INFO")

logger = logging.getLogger("src.Main")

In [3]:
dataset = Dataset()
element = next(dataset)
logger.info(f"Evaluating dataset element {element[0]} with length {len(element[1])} bytes ")

[37m[2025-04-07 15:14:56,027 src.eval.Dataset:20] INFO Loaded dataset librispeech-pc-test-clean with 83 elements[0m
[34m[2025-04-07 15:14:56,028 src.eval.Dataset:29] DEBUG Loading dataset element with ID 2961-961-0000[0m


[34m[2025-04-07 15:14:56,188 src.eval.Dataset:50] DEBUG Read 6469632 bytes from data/librispeech-pc-test-clean/2961-961-0000/2961-961-0000.mp3[0m
[37m[2025-04-07 15:14:56,190 src.Main:3] INFO Evaluating dataset element 2961-961-0000 with length 6469632 bytes [0m


In [4]:
w = WhisperTranscriber.for_gpu("large-v3-turbo", [0])
adapter = WhisperStreamingTranscriberAdapter(w)

transcriber = StreamingTranscriber(adapter, chunk_length_ms=1000)

[37m[2025-04-07 15:14:59,008 src.helper.model_handler:19] INFO Setting up model.. large-v3-turbo[0m
[34m[2025-04-07 15:14:59,011 src.helper.model_handler:26] DEBUG Model large-v3-turbo found, skipping download..[0m


In [6]:
async def run_transcription():
    transcription = await transcriber.transcribe(element[1])
    logger.info(f'Transcription result: "{transcription}"')
    transcription_wer = wer(element[2], transcription)
    logger.info(f"Transcription WER: {transcription_wer}")
    offline_transcription, _ = w.transcribe(element[1])
    offline_transcription = " ".join([s.text for s in offline_transcription])
    logger.info(f"Offline Transcription result: {offline_transcription}")
    offline_wer = wer(element[2], offline_transcription)
    logger.info(f"Offline Transcription WER: {offline_wer}")

run_transcription()

<coroutine object run_transcription at 0x7f22e5148c40>