In [None]:
from vj.voice.stt import WhisperSTT, MoonshineSTT, ParakeetSTT
from vj.voice.base import BaseSTT
from vj.evals.utils import (
    load_audio,
    ASSETS_DIR,
    RESULTS_DIR,
    chunk_audio,
    save_evals,
)

try:
    import jiwer
    import optuna
except ModuleNotFoundError:
    raise ModuleNotFoundError(
        "In order to run notebooks, please install dev dependencies with `uv sync --group dev`"
    )

import time
import io

[0;93m2025-12-27 15:19:37.489157969 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card0/device/vendor"[m
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def create_whisper_model(
    model: str,
    greedy: int,
    beam_size: int,
    patience: float,
    use_ctx: bool,
    n_threads: int,
) -> tuple[WhisperSTT, io.BytesIO, dict[str, int | str | float | bool], str]:

    params = {
        "no_context": use_ctx,
        "beam_search": {"beam_size": beam_size, "patience": patience},
        "greedy": {"best_of": greedy},
        "n_threads": n_threads,
        "print_progress": False,
    }

    return (
        *create_model(WhisperSTT),
        params,
        model,
    )


def create_model[ModelT: BaseSTT = BaseSTT](
    model_t: type[ModelT], **params
) -> tuple[ModelT, io.BytesIO]:

    buf = io.BytesIO()

    def save_response(text: str) -> None:
        buf.seek(0)
        buf.write(text.encode())

    return model_t(callback_fn=save_response, **params), buf

In [3]:
transformation = jiwer.Compose(
    [
        jiwer.ToLowerCase(),
        jiwer.RemovePunctuation(),
        jiwer.RemoveWhiteSpace(replace_by_space=True),
        jiwer.RemoveMultipleSpaces(),
        jiwer.Strip(),
    ]
)


def calculate_metrics(reference: str, hypothesis: str):
    """
    reference: The ground truth text
    hypothesis: What the model produced
    """
    # Clean both strings so capitalization/commas don't ruin the score
    ref_clean = transformation(reference)
    hyp_clean = transformation(hypothesis)

    wer = jiwer.wer(ref_clean, hyp_clean)
    wil = jiwer.wil(ref_clean, hyp_clean)  # Word Information Lost

    return {"wer": wer, "accuracy": 1 - wer, "wil": wil}

In [4]:
def run_evals(model: BaseSTT, trans_buf: io.BytesIO, **params):
    files = {
        "download.wav": "so what's going to happen now?",
        "download (1).wav": "you work amazingly but the transcription part is a little bit too slow",
    }
    scores = []
    print(f"----- Model: {model} -----")
    for file in files.keys():
        audio = load_audio(ASSETS_DIR / file)
        audio_duration = len(audio) / 16000  # Duration in seconds
        start_t = time.perf_counter()
        model.transcribe(audio, **params)
        end_t = time.perf_counter()
        latency = end_t - start_t
        rtf = audio_duration / latency
        trans = trans_buf.getvalue().decode()
        metrics = calculate_metrics(files[file], trans)
        scores.append(
            {
                "wer": metrics["wer"],
                "accuracy": metrics["accuracy"],
                "wil": metrics["wil"],
                "rtf": rtf,
                "latency": latency,
                "transcription": trans,
            }
        )
    return scores

In [5]:
stt, trans_buf, params, model = create_whisper_model("tiny", -1, -1, -1.0, True, 4)
scores = run_evals(stt, trans_buf)

whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: <voxtral.stt.whisper.WhisperSTT object at 0x753ad69ce510> -----


In [6]:
scores

[{'wer': 0.3333333333333333,
  'accuracy': 0.6666666666666667,
  'wil': 0.4666666666666667,
  'rtf': 1.1817821140801237,
  'latency': 2.589309792001586,
  'transcription': "So what's gonna happen now?"},
 {'wer': 0.0,
  'accuracy': 1.0,
  'wil': 0.0,
  'rtf': 2.9281507570129643,
  'latency': 2.7867417620000197,
  'transcription': 'You work amazingly but the transcription part is a little bit too slow.'}]

In [7]:
from typing import Literal, TypedDict


models = [
    "base",
    "base-q5_1",
    "base-q8_0",
    "small",
    "small-q5_1",
    "small-q8_0",
    "tiny",
    "tiny-q5_1",
    "tiny-q8_0",
]
greedies = Literal[0, 1, 2, -1]
beam_sizes = Literal[-1, 1, 2, 3, 4, 5]
patiences = Literal[-1.0, 0.0, 0.5, 1.0]
use_ctxs = Literal[False, True]
n_threads_l = Literal[4, 8, 16]

In [8]:
def whisper_objective(trial: optuna.Trial) -> tuple[float, float]:
    model = trial.suggest_categorical("model", models)
    greedy = trial.suggest_int("greedy", -1, 2)
    beam_size = trial.suggest_int("beam_size", -1, 5)
    patience = trial.suggest_float("patience", -1.0, 1.0, step=0.5)
    use_ctx = trial.suggest_int("use_ctx", 0, 1)
    n_threads = trial.suggest_int("n_threads", 4, 16, step=4)

    scores = []

    stt, trans_buf, params, _ = create_whisper_model(
        model, greedy, beam_size, patience, bool(use_ctx), n_threads
    )
    for score in run_evals(stt, trans_buf, **params):
        scores.append(score)

    return scores[1]["rtf"], scores[1]["wer"]

In [None]:
study = optuna.create_study(
    storage=f"sqlite:///{RESULTS_DIR}/db.sqlite3",
    directions=["maximize", "minimize"],  # maximize rtf and minimize wer
)
study.optimize(whisper_objective, 100, n_jobs=4)

[I 2025-12-27 10:57:32,129] A new study created in RDB with name: no-name-97f34b9a-0a9f-4ea6-a410-a806bca8ce8f


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 1, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
----- Model: small -----


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---


whisper_model_load: model size    =  487.01 MB
whisper_backend_init_gpu: no GPU found
whisper_init_state: kv self size  =   18.87 MB
whisper_init_state: kv cross size =   56.62 MB
whisper_init_state: kv pad  size  =    4.72 MB
whisper_init_state: compute buffer (conv)   =   22.42 MB
whisper_init_state: compute buffer (encode) =   33.85 MB
whisper_init_state: compute buffer (cross)  =    6.20 MB
whisper_init_state: compute buffer (decode) =   97.28 MB


Evaluation finished in: 4.3196645110001555
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 1, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 7.569701282000096
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:57:41,611] Trial 1 finished with values: [0.5998935427696244, 1.0] and parameters: {'model': 'base-q5_1', 'greedy': 1, 'beam_size': 1, 'patience': -0.5, 'use_ctx': 0, 'n_threads': 4}.


Evaluation finished in: 4.895131309000135
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 10.158550665999883
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 11.263049452999894
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 4.318469610000193
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics

[I 2025-12-27 10:57:47,967] Trial 0 finished with values: [0.9512505116421666, 0.07692307692307693] and parameters: {'model': 'small', 'greedy': 2, 'beam_size': 4, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 7.762204175000079
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:57:50,384] Trial 4 finished with values: [0.5160674457107907, 0.0] and parameters: {'model': 'base-q5_1', 'greedy': -1, 'beam_size': 5, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 4}.


Evaluation finished in: 4.211110357000052
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 2, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

Evaluation finished in: 2.4378210820000277
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:57:51,789] Trial 3 finished with values: [1.0914769436274305, 1.0] and parameters: {'model': 'small-q5_1', 'greedy': -1, 'beam_size': 1, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 8.906451859999834
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': 1.0}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 2.412062393999804
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 2, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:57:53,398] Trial 5 finished with values: [0.3318399122549056, 0.0] and parameters: {'model': 'base-q8_0', 'greedy': 1, 'beam_size': 3, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 2.7078136840000298
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 10:57:54,458] Trial 2 finished with values: [1.2728639131127546, 1.0] and parameters: {'model': 'small', 'greedy': 1, 'beam_size': 4, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 10.386569531000077
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 0.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 2.604997422999986
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': 1.0}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:57:55,402] Trial 6 finished with values: [0.29309342046573483, 0.07692307692307693] and parameters: {'model': 'base-q8_0', 'greedy': 1, 'beam_size': 2, 'patience': -0.5, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 2.391642311000396
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 3.001343770999938
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 0.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:57:58,018] Trial 7 finished with values: [0.39523250257352227, 0.07692307692307693] and parameters: {'model': 'base', 'greedy': 2, 'beam_size': 2, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 3.225097220999942
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

----- Model: tiny-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': 0.0}, 'greedy': {'best_of': 2}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 3.3103847929996846
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 1.158941089000109
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': 0.0}, 'greedy': {'best_of': 2}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 6.095199871000204
Final trascription: You work amazingly bu

[I 2025-12-27 10:58:00,683] Trial 11 finished with values: [0.15818849926472245, 1.0] and parameters: {'model': 'tiny-q8_0', 'greedy': 2, 'beam_size': 5, 'patience': 0.0, 'use_ctx': 0, 'n_threads': 12}.


Evaluation finished in: 1.290818154000135
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
----- Model: tiny-q8_0 -----


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 3.153432630000225
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 10:58:02,095] Trial 10 finished with values: [0.37559583541666614, 1.0] and parameters: {'model': 'base-q8_0', 'greedy': -1, 'beam_size': -1, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 3.064862016999996
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
----- Model: tiny-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': 0.5}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

Evaluation finished in: 1.4521902350002165
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 1.5509259880000172
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': 0.5}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 2.633794197000043
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: downl

[I 2025-12-27 10:58:04,098] Trial 12 finished with values: [0.21401812671568926, 0.46153846153846156] and parameters: {'model': 'tiny-q8_0', 'greedy': 1, 'beam_size': 4, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 1.7463879140000245
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 1 (tiny)
whisper_model_load: adding

----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 10:58:05,675] Trial 14 finished with values: [0.2290316792892536, 1.0] and parameters: {'model': 'tiny-q8_0', 'greedy': 0, 'beam_size': 2, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 1.8688985030003096
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
Evaluation finished in: 1.4080001090001133
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 1.5, 'accuracy': -0.5, 'wil': 0.7777777777777778}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 12, 'print_progress': False} ---
----- Model: tiny-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': 0.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

Evaluation finished in: 2.486968777000129
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

Evaluation finished in: 0.8332886250000229
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}
Evaluation finished in: 6.771188417999838
Final trascription: You work amazingly, but the transcription part is a little bit slow.ow.
Evaluated metrics: metrics={'wer': 0.15384615384615385, 'accuracy': 0.8461538461538461, 'wil': 0.22435897435897445}
Evaluation finished in: 0.9124679020001167
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': 0.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---


whisper_model_load: model size    =  263.87 MB
whisper_backend_init_gpu: no GPU found
whisper_init_state: kv self size  =   18.87 MB
whisper_init_state: kv cross size =   56.62 MB
whisper_init_state: kv pad  size  =    4.72 MB
whisper_init_state: compute buffer (conv)   =   22.42 MB
whisper_init_state: compute buffer (encode) =   33.85 MB
whisper_init_state: compute buffer (cross)  =    6.20 MB
whisper_init_state: compute buffer (decode) =   97.28 MB


----- Model: small-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:58:08,625] Trial 16 finished with values: [0.22716208958333, 0.07692307692307693] and parameters: {'model': 'tiny-q8_0', 'greedy': -1, 'beam_size': 3, 'patience': 0.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 1.8536426509999728
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 4.361388352000176
Final trascription: So what's gonna happen now?transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 1.6666666666666667, 'accuracy': -0.6666666666666667, 'wil': 0.9444444444444444}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 7.9195596179997665
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_thr

[I 2025-12-27 10:58:16,320] Trial 17 finished with values: [0.6654437596813412, 0.46153846153846156] and parameters: {'model': 'base-q5_1', 'greedy': 2, 'beam_size': -1, 'patience': -0.5, 'use_ctx': 0, 'n_threads': 4}.


Evaluation finished in: 5.430021078999744
Final trascription: So what's gonna happen now?transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 7.909921791000215
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 10.524036555000293
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 4.625416608000251
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'w

[I 2025-12-27 10:58:22,831] Trial 19 finished with values: [0.9481260453431636, 0.07692307692307693] and parameters: {'model': 'small-q8_0', 'greedy': -1, 'beam_size': 4, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 7.7367085300002145
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 10:58:25,197] Trial 21 finished with values: [0.4756983688725081, 0.0] and parameters: {'model': 'base', 'greedy': 0, 'beam_size': -1, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 3.8816986899996664
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 1, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 8, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

Evaluation finished in: 8.687075993000235
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 10:58:28,412] Trial 18 finished with values: [1.3397994700980296, 1.0] and parameters: {'model': 'small-q5_1', 'greedy': 1, 'beam_size': 4, 'patience': -0.5, 'use_ctx': 0, 'n_threads': 8}.


Evaluation finished in: 10.932763675999922
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 3.421420647999639
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 1, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 2.848330629999964
Final trascription: You work amazingly, but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 0.5}, 'greedy': {

[I 2025-12-27 10:58:32,250] Trial 24 finished with values: [0.38639895330882557, 0.38461538461538464] and parameters: {'model': 'base', 'greedy': -1, 'beam_size': 3, 'patience': 0.5, 'use_ctx': 0, 'n_threads': 12}.
[I 2025-12-27 10:58:32,337] Trial 23 finished with values: [0.42301766004902003, 0.46153846153846156] and parameters: {'model': 'base-q5_1', 'greedy': 0, 'beam_size': 1, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 8}.


Evaluation finished in: 3.1530154590000166
Final trascription: So what's gonna happen now? transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.38461538461538464, 'accuracy': 0.6153846153846154, 'wil': 0.621301775147929}
Evaluation finished in: 3.4518241060000037
Final trascription: So what's going to happen now?nscription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.7100591715976332}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 1 (tiny)
whisper_model_load: adding

----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---
----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 4.125875740000083
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 1.448136935999628
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'be

[I 2025-12-27 10:58:35,804] Trial 27 finished with values: [0.20240678762251282, 1.0] and parameters: {'model': 'tiny', 'greedy': -1, 'beam_size': 1, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 1.6516393869997046
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:58:36,673] Trial 26 finished with values: [0.2665353476715484, 0.07692307692307693] and parameters: {'model': 'tiny', 'greedy': 0, 'beam_size': -1, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 2.174928436999835
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

Evaluation finished in: 4.253766581000036
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: m

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': 0.5}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 10:58:38,754] Trial 22 finished with values: [0.9611796987745074, 0.07692307692307693] and parameters: {'model': 'small-q5_1', 'greedy': 2, 'beam_size': 4, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 12}.


Evaluation finished in: 7.84322634199998
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 3.6452689600000667
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': 0.5}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 4.420936413000163
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation fini

[I 2025-12-27 10:58:44,206] Trial 30 finished with values: [0.39420338921567966, 0.46153846153846156] and parameters: {'model': 'base-q8_0', 'greedy': 1, 'beam_size': 5, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 3.216699655999946
Final trascription: So what's going to happen now?nscription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.7100591715976332}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 1.0}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 7.847442050000154
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 10:58:48,052] Trial 31 finished with values: [0.557689258088192, 1.0] and parameters: {'model': 'base-q5_1', 'greedy': 2, 'beam_size': -1, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 4.550744345999647
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
----- Model: tiny-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

Evaluation finished in: 2.0047036700002536
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:58:52,077] Trial 33 finished with values: [0.22539168272057358, 0.07692307692307693] and parameters: {'model': 'tiny-q5_1', 'greedy': -1, 'beam_size': 2, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 1.8391961309998806
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 10:58:53,172] Trial 28 finished with values: [1.1372847678921525, 0.07692307692307693] and parameters: {'model': 'small-q5_1', 'greedy': 1, 'beam_size': -1, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 16}.
[I 2025-12-27 10:58:53,182] Trial 29 finished with values: [0.9854447281862893, 0.07692307692307693] and parameters: {'model': 'small', 'greedy': 1, 'beam_size': -1, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 9.280243705999965
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
Evaluation finished in: 8.04122898200012
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---


whisper_model_load: model size    =  487.01 MB
whisper_backend_init_gpu: no GPU found
whisper_init_state: kv self size  =   18.87 MB
whisper_init_state: kv cross size =   56.62 MB
whisper_init_state: kv pad  size  =    4.72 MB
whisper_init_state: compute buffer (conv)   =   22.42 MB
whisper_init_state: compute buffer (encode) =   33.85 MB
whisper_init_state: compute buffer (cross)  =    6.20 MB
whisper_init_state: compute buffer (decode) =   97.28 MB


----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 0, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 1.805428025999845
Final trascription: You work amazingly but the transcription part is a little bit too slow..
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 2.810652282999854
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 12.32208677699964
Fin

[I 2025-12-27 10:58:57,323] Trial 34 finished with values: [0.38766968627451026, 0.46153846153846156] and parameters: {'model': 'base-q8_0', 'greedy': 2, 'beam_size': 5, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 3.163384640000004
Final trascription: So what's going to happen now?nscription part is a little bit too slow..
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.7100591715976332}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

----- Model: tiny-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': 0.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:58:59,512] Trial 35 finished with values: [0.384334164093159, 1.0] and parameters: {'model': 'base-q8_0', 'greedy': -1, 'beam_size': 5, 'patience': -0.5, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 1.8537899359998846
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': 0.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 3.1361667790001775
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 6.328429719999804
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 0, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:59:01,290] Trial 37 finished with values: [0.22833151948527847, 0.07692307692307693] and parameters: {'model': 'tiny-q5_1', 'greedy': -1, 'beam_size': 4, 'patience': 0.0, 'use_ctx': 0, 'n_threads': 4}.


Evaluation finished in: 1.8631851989998722
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 1 (tiny)
whisper_model_load: adding

----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 0.0}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 3.3906289990000005
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 1.7808154800000011
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 0.0}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:59:05,401] Trial 39 finished with values: [0.25989685465682283, 0.07692307692307693] and parameters: {'model': 'tiny', 'greedy': -1, 'beam_size': 3, 'patience': 0.0, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 2.1207583339996745
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -0.5}, 'greedy': {'best_of': 0}, 'n_threads': 16, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 1 (tiny)
whisper_model_load: adding

Evaluation finished in: 3.312958407000224
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
Evaluation finished in: 1.0312465120000525
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 1.5, 'accuracy': -0.5, 'wil': 0.7777777777777778}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -0.5}, 'greedy': {'best_of': 0}, 'n_threads': 16, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 0}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 10:59:07,480] Trial 36 finished with values: [0.8939198981617361, 0.07692307692307693] and parameters: {'model': 'small', 'greedy': 1, 'beam_size': 0, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 7.294386368999767
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

----- Model: tiny-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:59:08,200] Trial 40 finished with values: [0.18902085477941008, 0.46153846153846156] and parameters: {'model': 'tiny', 'greedy': 0, 'beam_size': 4, 'patience': -0.5, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 1.5424101749999863
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 11.927034074999938
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
Evaluation finished in: 1.302174075000039
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': 1.0}, 'greedy': {'best_

whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 1 (tiny)
whisper_model_load: adding

Evaluation finished in: 1.5968936079998457
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 10:59:11,103] Trial 42 finished with values: [0.25789395821075856, 0.07692307692307693] and parameters: {'model': 'tiny-q5_1', 'greedy': 2, 'beam_size': 3, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 2.10441469899979
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
----- Model: tiny-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.0}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

Evaluation finished in: 1.643163676000313
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
Evaluation finished in: 1.334683516000041
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 1.5, 'accuracy': -0.5, 'wil': 0.7777777777777778}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.0}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': 0.5}, 'greedy': {'best_of': 0}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:59:14,396] Trial 45 finished with values: [0.21290341973037902, 0.46153846153846156] and parameters: {'model': 'tiny-q8_0', 'greedy': 1, 'beam_size': 1, 'patience': 0.0, 'use_ctx': 1, 'n_threads': 12}.


Evaluation finished in: 1.7372919049998927
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 8.31804596100028
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 0}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 3.213990322999962
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_

[I 2025-12-27 10:59:21,215] Trial 47 finished with values: [0.39920111556371546, 1.0] and parameters: {'model': 'base', 'greedy': 2, 'beam_size': 2, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 8.177360714000315
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': 0.5}, 'greedy': {'best_of': 0}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 3.257481102999918
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:59:25,356] Trial 41 finished with values: [1.2545061454656603, 0.46153846153846156] and parameters: {'model': 'small-q5_1', 'greedy': 0, 'beam_size': 4, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 10.236770146999788
Final trascription: So what's going to happen now?nscription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.7100591715976332}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

Evaluation finished in: 4.0119446710000375
Final trascription: So what's going to happen now?nscription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 1.3333333333333333, 'accuracy': -0.33333333333333326, 'wil': 0.6794871794871795}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---
----- Model: tiny-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 1.8190968180001619
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n

[I 2025-12-27 10:59:28,717] Trial 49 finished with values: [0.1546203210784713, 0.07692307692307693] and parameters: {'model': 'tiny-q5_1', 'greedy': 1, 'beam_size': 1, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 8}.
whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
wh

Evaluation finished in: 1.2617018200003258
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
Evaluation finished in: 3.2425554289998217
Final trascription: So what's going to happen now?nscription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.7100591715976332}
----- Model: tiny-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---


whisper_model_load: adding 1608 extra tokens
whisper_model_load: n_langs       = 99
whisper_model_load:          CPU total size =    31.57 MB
whisper_model_load: model size    =   31.57 MB
whisper_backend_init_gpu: no GPU found
whisper_init_state: kv self size  =    3.15 MB
whisper_init_state: kv cross size =    9.44 MB
whisper_init_state: kv pad  size  =    2.36 MB
whisper_init_state: compute buffer (conv)   =   13.21 MB
whisper_init_state: compute buffer (encode) =   17.72 MB
whisper_init_state: compute buffer (cross)  =    3.89 MB
whisper_init_state: compute buffer (decode) =   95.91 MB
whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_param

----- Model: tiny-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 7.937970702000257
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': 0.0}, 'greedy': {'best_of': 1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:59:29,930] Trial 43 finished with values: [1.2517990602941387, 0.46153846153846156] and parameters: {'model': 'small', 'greedy': 0, 'beam_size': 1, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 10.214680332000171
Final trascription: So what's gonna happen now?transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}
Evaluation finished in: 1.0674161389997607
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 1.2532552809998379
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---
----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': -0.5}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:59:31,979] Trial 51 finished with values: [0.22594739338236045, 0.46153846153846156] and parameters: {'model': 'tiny-q8_0', 'greedy': -1, 'beam_size': 1, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 12}.


Evaluation finished in: 1.8437307300000612
Final trascription: So what's gonna happen now?transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 2.3227950799996506
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 4.599580719999722
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': -0.5}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 2.413138846000038
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 3.1088253859998076
Final 

[I 2025-12-27 10:59:38,201] Trial 55 finished with values: [0.35376469105390734, 1.0] and parameters: {'model': 'base', 'greedy': 0, 'beam_size': 2, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 2.886719878999884
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 4.04249692600024
Final trascription: So what's going to happen now?anscription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.5384615384615384, 'accuracy': 0.46153846153846156, 'wil': 0.7692307692307692}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:59:39,458] Trial 54 finished with values: [0.4859359569852519, 0.46153846153846156] and parameters: {'model': 'base', 'greedy': 1, 'beam_size': 5, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 4}.


Evaluation finished in: 3.9652374089996556
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 4.2603062500002125
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:59:44,522] Trial 52 finished with values: [1.0485191310049031, 0.15384615384615385] and parameters: {'model': 'small', 'greedy': 1, 'beam_size': 3, 'patience': 0.0, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 8.555916109000009
Final trascription: You work amazingly, but the transcription part is a little bit slow.ow.
Evaluated metrics: metrics={'wer': 0.15384615384615385, 'accuracy': 0.8461538461538461, 'wil': 0.22435897435897445}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': -0.5}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 7.812328463000085
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 10:59:47,409] Trial 57 finished with values: [0.4887419689950713, 0.0] and parameters: {'model': 'base-q8_0', 'greedy': 0, 'beam_size': -1, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 2.565356707999854
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': -0.5}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 3.9881344669997816
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 1 (tiny)
whisper_model_load: adding

----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 0, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 8.215987713999766
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 1.9303394630001094
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 0, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 10:59:51,133] Trial 59 finished with values: [0.451700772058853, 0.07692307692307693] and parameters: {'model': 'base-q8_0', 'greedy': -1, 'beam_size': 4, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 3.6858783000002404
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:59:51,888] Trial 60 finished with values: [0.2769289692401892, 0.07692307692307693] and parameters: {'model': 'tiny', 'greedy': 2, 'beam_size': 0, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 12}.


Evaluation finished in: 2.259740388999944
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 0, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 10:59:54,737] Trial 56 finished with values: [0.9986552301470778, 0.5384615384615384] and parameters: {'model': 'small', 'greedy': 2, 'beam_size': 4, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 8.149026678000155
Final trascription: So what's going to happen now?anscription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.5384615384615384, 'accuracy': 0.46153846153846156, 'wil': 0.7692307692307692}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 3.6978590290000284
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 10:59:57,523] Trial 58 finished with values: [1.1498586149509809, 1.0] and parameters: {'model': 'small-q5_1', 'greedy': 2, 'beam_size': 3, 'patience': 0.5, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 9.382846298000004
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
Evaluation finished in: 2.7327854810000645
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 12, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 10:59:58,962] Trial 61 finished with values: [0.45992377365196796, 0.46153846153846156] and parameters: {'model': 'base-q8_0', 'greedy': 1, 'beam_size': 4, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 3.7529779930000586
Final trascription: So what's going to happen now?anscription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.7100591715976332}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 11:00:01,048] Trial 63 finished with values: [0.40974579816181583, 1.0] and parameters: {'model': 'base-q5_1', 'greedy': -1, 'beam_size': -1, 'patience': 1.0, 'use_ctx': 0, 'n_threads': 12}.


Evaluation finished in: 8.606836853999994
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 0, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 3.3435257130004175
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 3.9824298200001067
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 7.167161149000094
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 11:00:07,948] Trial 65 finished with values: [0.5793736883578272, 1.0] and parameters: {'model': 'base-q5_1', 'greedy': -1, 'beam_size': 5, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 4}.


Evaluation finished in: 4.72768929699987
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 1.0}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 11:00:10,057] Trial 62 finished with values: [1.1121724480392123, 0.15384615384615385] and parameters: {'model': 'small-q5_1', 'greedy': -1, 'beam_size': 0, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 9.075327175999973
Final trascription: You work amazingly, but the transcription part is a little bit slow.ow.
Evaluated metrics: metrics={'wer': 0.15384615384615385, 'accuracy': 0.8461538461538461, 'wil': 0.22435897435897445}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 3.8281226150002112
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 1.0}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 10.440508710999893
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finish

[I 2025-12-27 11:00:13,236] Trial 64 finished with values: [0.9647619287990188, 0.07692307692307693] and parameters: {'model': 'small', 'greedy': 2, 'beam_size': 5, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 7.872457338999993
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 11:00:15,124] Trial 68 finished with values: [0.32934835330885176, 0.0] and parameters: {'model': 'base', 'greedy': 2, 'beam_size': 2, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 2.6874825630002306
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': 0.0}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 3.581560760999764
Final trascription: So what's gonna happen now?transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

----- Model: tiny-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 1.9357714919997306
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 11:00:19,845] Trial 71 finished with values: [0.2525726910538892, 0.07692307692307693] and parameters: {'model': 'tiny-q5_1', 'greedy': 1, 'beam_size': 4, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 2.060993158999736
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 9.119165666999834
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 11:00:23,306] Trial 66 finished with values: [1.3670666259803752, 1.0] and parameters: {'model': 'small', 'greedy': 1, 'beam_size': 4, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 11.155263667999861
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 9.911386111999946
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': 0.0}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 3.5888865929996427
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 11:00:31,211] Trial 73 finished with values: [0.4963138148284253, 1.0] and parameters: {'model': 'base-q5_1', 'greedy': -1, 'beam_size': 3, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 4.04992072899995
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 0, 'patience': 0.5}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 3.1568817249999483
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 0, 'patience': 0.5}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 11:00:34,957] Trial 69 finished with values: [1.4877350889705376, 0.0] and parameters: {'model': 'small-q5_1', 'greedy': 1, 'beam_size': 5, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 8}.


Evaluation finished in: 12.139918325999588
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 1 (tiny)
whisper_model_load: adding

----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 11:00:36,015] Trial 70 finished with values: [1.297854769240238, 0.5384615384615384] and parameters: {'model': 'small-q5_1', 'greedy': 2, 'beam_size': 5, 'patience': 0.0, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 10.590494917000342
Final trascription: So what's going to happen now?anscription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.5384615384615384, 'accuracy': 0.46153846153846156, 'wil': 0.7692307692307692}
Evaluation finished in: 0.8260389110000688
Final trascription: So what's gonna happen now?transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 1.6666666666666667, 'accuracy': -0.6666666666666667, 'wil': 0.9444444444444444}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 11:00:36,535] Trial 74 finished with values: [0.22143950171571794, 0.0] and parameters: {'model': 'base-q8_0', 'greedy': 1, 'beam_size': 0, 'patience': 0.5, 'use_ctx': 0, 'n_threads': 12}.


Evaluation finished in: 1.8069463340002585
Final trascription: You work amazingly, but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 11:00:37,118] Trial 72 finished with values: [1.0635696425245165, 0.07692307692307693] and parameters: {'model': 'small', 'greedy': 2, 'beam_size': 4, 'patience': -1.0, 'use_ctx': 0, 'n_threads': 16}.


Evaluation finished in: 8.678728283000055
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 1 (tiny)
whisper_model_load: adding

Evaluation finished in: 1.2678973160000169
Final trascription: So what's gonna happen now?transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}
----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---


whisper_model_load: model size    =   59.12 MB
whisper_backend_init_gpu: no GPU found
whisper_init_state: kv self size  =    6.29 MB
whisper_init_state: kv cross size =   18.87 MB
whisper_init_state: kv pad  size  =    3.15 MB
whisper_init_state: compute buffer (conv)   =   16.28 MB
whisper_init_state: compute buffer (encode) =   23.09 MB
whisper_init_state: compute buffer (cross)  =    4.66 MB
whisper_init_state: compute buffer (decode) =   96.37 MB


----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': -0.5}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 1.7039066400002412
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': -1.0}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 1.204304498000056
Final trascription: 
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 2.9496466870000404
Final trascription: You work amazi

[I 2025-12-27 11:00:40,067] Trial 78 finished with values: [0.1803820977941354, 1.0] and parameters: {'model': 'tiny', 'greedy': -1, 'beam_size': 4, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 16}.
whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n

Evaluation finished in: 1.4719179180001447
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
Evaluation finished in: 2.226680776000194
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}
----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

Evaluation finished in: 3.1661318870001196
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': -0.5}, 'greedy': {'best_of': 0}, 'n_threads': 4, 'print_progress': False} ---
----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


whisper_model_load: model size    =  487.01 MB
whisper_backend_init_gpu: no GPU found
whisper_init_state: kv self size  =   18.87 MB
whisper_init_state: kv cross size =   56.62 MB
whisper_init_state: kv pad  size  =    4.72 MB
whisper_init_state: compute buffer (conv)   =   22.42 MB
whisper_init_state: compute buffer (encode) =   33.85 MB
whisper_init_state: compute buffer (cross)  =    6.20 MB
whisper_init_state: compute buffer (decode) =   97.28 MB
[I 2025-12-27 11:00:42,428] Trial 77 finished with values: [0.32648171176474, 0.15384615384615385] and parameters: {'model': 'base-q8_0', 'greedy': 1, 'beam_size': -1, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 2.12070285999971
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 2.6640907680002783
Final trascription: You work amazingly, but the transcription part is a little bit slow.ow.
Evaluated metrics: metrics={'wer': 0.15384615384615385, 'accuracy': 0.8461538461538461, 'wil': 0.22435897435897445}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 11:00:44,737] Trial 79 finished with values: [0.49581149926470347, 1.0] and parameters: {'model': 'base-q5_1', 'greedy': 0, 'beam_size': 1, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 4.04582183399998
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

Evaluation finished in: 2.6469189399999777
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
Evaluation finished in: 2.3739518040001713
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---


[I 2025-12-27 11:00:48,122] Trial 82 finished with values: [0.37257449240196455, 0.07692307692307693] and parameters: {'model': 'base-q5_1', 'greedy': 2, 'beam_size': -1, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 3.034737039999982
Final trascription: So what's going to happen now?anscription part is a little bit slow.
Evaluated metrics: metrics={'wer': 1.1666666666666667, 'accuracy': -0.16666666666666674, 'wil': 0.6527777777777777}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -0.5}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 3.0402078580000307
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 9.497677720999945
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 11:00:52,135] Trial 83 finished with values: [0.5072770132353057, 0.07692307692307693] and parameters: {'model': 'base-q8_0', 'greedy': 1, 'beam_size': 3, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 4.139380428000095
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 3, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 7.81479708300003
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 8.529180924999764
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': -1, 'patience': 0.5}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 8.759309849000147

[I 2025-12-27 11:01:03,033] Trial 81 finished with values: [1.555351296568621, 0.07692307692307693] and parameters: {'model': 'small', 'greedy': -1, 'beam_size': 4, 'patience': 1.0, 'use_ctx': 0, 'n_threads': 4}.


Evaluation finished in: 12.691666579999946
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.07692307692307693, 'accuracy': 0.9230769230769231, 'wil': 0.07692307692307687}
----- Model: tiny-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-tiny-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 384
whisper_model_load: n_audio_head  = 6
whisper_model_load: n_audio_layer = 4
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 384
whisper_model_load: n_text_head   = 6
whisper_model_load: n_text_layer  = 4
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 1 (tiny)
whisper_model_load: a

Evaluation finished in: 10.03635565400009
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 1.5678114669999559
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 11:01:06,098] Trial 85 finished with values: [1.1095545719362743, 1.0] and parameters: {'model': 'small-q5_1', 'greedy': 1, 'beam_size': -1, 'patience': 0.5, 'use_ctx': 0, 'n_threads': 12}.


Evaluation finished in: 9.053965306999999
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 1.6248935450003046
Final trascription: You work amazingly, but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_model_load: model size    =   77.11 MB
whisper_backend_init_gpu: no GPU found
whisper_init_state: kv self size  =    3.15 MB
whisper_init_state: kv cross size =    9.44 MB
whisper_init_state: kv pad  size  =    2.36 MB
whisper_init_state: compute buffer (conv)   =   13.21 MB
whisper_init_state: compute buffer (encode) =   17.72 MB
whisper_init_state: compute buffer (cross)  =    3.89 MB
whisper_init_state: compute buffer (decode) =   95.91 MB


----- Model: tiny -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 1.3210196639997775
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 6.865491042000031
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


[I 2025-12-27 11:01:08,217] Trial 86 finished with values: [0.8413591963235332, 0.0] and parameters: {'model': 'small-q8_0', 'greedy': -1, 'beam_size': 3, 'patience': 1.0, 'use_ctx': 0, 'n_threads': 8}.
whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_mode

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 2, 'patience': 1.0}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 11:01:09,652] Trial 90 finished with values: [0.2004959507352882, 0.0] and parameters: {'model': 'tiny', 'greedy': -1, 'beam_size': -1, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 1.6360469579999517
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -0.5}, 'greedy': {'best_of': 2}, 'n_threads': 4, 'print_progress': False} ---
Evaluation finished in: 6.753703619999669
Final trascription: You work amazingly, but the transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 2.0, 'accuracy': -1.0, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.5}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 9.154081703999964
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 0.3333333333333333, 'accuracy': 0.6666666666666667, 'wil': 0.4666666666666667}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': -1, 'patience': -1.0}, 'greedy': {'best_of': 1}, 'n_threads': 4, 'print_progress': False} ---
Evaluatio

[I 2025-12-27 11:01:20,882] Trial 89 finished with values: [0.9401631946078579, 0.46153846153846156] and parameters: {'model': 'small-q8_0', 'greedy': -1, 'beam_size': 1, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 7.67173166800012
Final trascription: So what's gonna happen now? transcription part is a little bit slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.6858974358974359}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3 (small)
whisper_model_load: 

----- Model: small -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 11:01:23,675] Trial 88 finished with values: [1.2656324427696268, 1.0] and parameters: {'model': 'small', 'greedy': 1, 'beam_size': -1, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 10.327560733000155
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -0.5}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 3.3473093679999693
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 3, 'patience': -0.5}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---
Evaluation finished in: 6.6719136520000575
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 1.0}, 'greedy': {'best_of': 1}, 'n_threads': 12, 'print_progress': False} ---


[I 2025-12-27 11:01:30,209] Trial 94 finished with values: [0.3568330040441397, 1.0] and parameters: {'model': 'base-q8_0', 'greedy': -1, 'beam_size': 3, 'patience': -0.5, 'use_ctx': 1, 'n_threads': 8}.


Evaluation finished in: 2.91175731300018
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 3 (small)
whisper_model_l

Evaluation finished in: 10.421739475999857
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
----- Model: small-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': False, 'beam_search': {'beam_size': 0, 'patience': 1.0}, 'greedy': {'best_of': -1}, 'n_threads': 8, 'print_progress': False} ---


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-small-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_l

----- Model: small-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.5}, 'greedy': {'best_of': 0}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 11:01:32,754] Trial 91 finished with values: [1.512608871078446, 0.0] and parameters: {'model': 'small-q5_1', 'greedy': 2, 'beam_size': 2, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 4}.


Evaluation finished in: 12.34288838800012
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q5_1.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q5_1 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---


[I 2025-12-27 11:01:36,009] Trial 93 finished with values: [0.964814016053901, 0.0] and parameters: {'model': 'small', 'greedy': 1, 'beam_size': 4, 'patience': 1.0, 'use_ctx': 1, 'n_threads': 12}.


Evaluation finished in: 7.872882370999832
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base-q8_0.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 7
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 2 (base)
whisper_model_load: a

----- Model: base-q8_0 -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.5}, 'greedy': {'best_of': 0}, 'n_threads': 12, 'print_progress': False} ---
Evaluation finished in: 3.4473685749999277
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 4, 'patience': 0.5}, 'greedy': {'best_of': 2}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 3.0852237130002322
Final trascription: So what's going to happen now?
Evaluated metrics: metrics={'wer': 0.0, 'accuracy': 1.0, 'wil': 0.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.5}, 'greedy': {'best_of': 0}, 'n_threads': 12, 'print_progress': False} ---
Evaluation fi

[I 2025-12-27 11:01:39,892] Trial 97 finished with values: [0.4202070235294286, 0.46153846153846156] and parameters: {'model': 'base-q5_1', 'greedy': 2, 'beam_size': 4, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 8.675360103999992
Final trascription: You work amazingly but the transcription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 2.1666666666666665, 'accuracy': -1.1666666666666665, 'wil': 1.0}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 1, 'patience': 0.5}, 'greedy': {'best_of': 0}, 'n_threads': 16, 'print_progress': False} ---
Evaluation finished in: 3.4288893120001376
Final trascription: So what's going to happen now?nscription part is a little bit too slow.
Evaluated metrics: metrics={'wer': 0.46153846153846156, 'accuracy': 0.5384615384615384, 'wil': 0.7100591715976332}


whisper_init_from_file_with_params_no_state: loading model from '/home/marco/.local/share/pywhispercpp/models/ggml-base.bin'
whisper_init_with_params_no_state: use gpu    = 1
whisper_init_with_params_no_state: flash attn = 1
whisper_init_with_params_no_state: gpu_device = 0
whisper_init_with_params_no_state: dtw        = 0
whisper_init_with_params_no_state: devices    = 1
whisper_init_with_params_no_state: backends   = 1
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 2 (base)
whisper_model_load: adding

----- Model: base -----
---- File: download.wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 11:01:42,424] Trial 98 finished with values: [0.37553284485290855, 1.0] and parameters: {'model': 'base-q8_0', 'greedy': 0, 'beam_size': 1, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 12}.


Evaluation finished in: 3.0643480139997337
Final trascription: So what's gonna happen now?ow?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}
Evaluation finished in: 4.1429417170002125
Final trascription: So what's going to happen now?anscription part is a little bit slow.
Evaluated metrics: metrics={'wer': 1.1666666666666667, 'accuracy': -0.16666666666666674, 'wil': 0.6527777777777777}
---- File: download (1).wav ----
--- Params: {'no_context': True, 'beam_search': {'beam_size': 5, 'patience': -1.0}, 'greedy': {'best_of': -1}, 'n_threads': 4, 'print_progress': False} ---


[I 2025-12-27 11:01:46,764] Trial 95 finished with values: [0.8910649485294047, 1.0] and parameters: {'model': 'small-q8_0', 'greedy': -1, 'beam_size': 0, 'patience': 1.0, 'use_ctx': 0, 'n_threads': 8}.


Evaluation finished in: 7.271089979999942
Final trascription: So what's gonna happen now?
Evaluated metrics: metrics={'wer': 1.0, 'accuracy': 0.0, 'wil': 1.0}


[I 2025-12-27 11:01:46,967] Trial 99 finished with values: [0.31208607683821604, 0.15384615384615385] and parameters: {'model': 'base', 'greedy': -1, 'beam_size': 5, 'patience': -1.0, 'use_ctx': 1, 'n_threads': 4}.
[I 2025-12-27 11:01:46,970] Trial 96 finished with values: [0.87582146495099, 0.15384615384615385] and parameters: {'model': 'small-q5_1', 'greedy': 0, 'beam_size': 1, 'patience': 0.5, 'use_ctx': 1, 'n_threads': 16}.


Evaluation finished in: 7.146703154000079
Final trascription: You work amazingly, but the transcription part is a little bit slow.ow.
Evaluated metrics: metrics={'wer': 0.15384615384615385, 'accuracy': 0.8461538461538461, 'wil': 0.22435897435897445}
Evaluation finished in: 2.546622386999843
Final trascription: You work amazingly, but the transcription part is a little bit slow.ow.
Evaluated metrics: metrics={'wer': 0.15384615384615385, 'accuracy': 0.8461538461538461, 'wil': 0.22435897435897445}


In [9]:
def moonshine_obj(trial: optuna.Trial) -> tuple[float, float]:
    model = trial.suggest_categorical("model", ["tiny", "base"])

    scores = []
    stt, trans_buf = create_model(MoonshineSTT, **{"model": model})
    for score in run_evals(stt, trans_buf):
        scores.append(score)

    return scores[1]["rtf"], scores[1]["wer"]

In [14]:
moonshine_study = optuna.create_study(
    storage=f"sqlite:///{RESULTS_DIR}/moonshine_db.sqlite3",
    directions=["maximize", "minimize"],  # maximize rtf and minimize wer
)
moonshine_study.optimize(moonshine_obj, 10, n_jobs=4)

[I 2025-12-27 14:13:04,897] A new study created in RDB with name: no-name-452cfa95-2baf-4f11-ae32-4825e0b4db0a


----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x749f85bb1d30> -----
----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x749f841b1550> -----
----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x749f85ba5c70> -----
----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x749f85ba6b70> -----


[I 2025-12-27 14:13:09,779] Trial 3 finished with values: [20.106960008703545, 0.38461538461538464] and parameters: {'model': 'tiny'}.
[I 2025-12-27 14:13:09,943] Trial 0 finished with values: [15.635117649164055, 0.38461538461538464] and parameters: {'model': 'tiny'}.
[I 2025-12-27 14:13:10,090] Trial 2 finished with values: [13.620024659681384, 0.15384615384615385] and parameters: {'model': 'base'}.
[I 2025-12-27 14:13:10,841] Trial 1 finished with values: [6.124928412928033, 0.15384615384615385] and parameters: {'model': 'base'}.


----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x749fb6f0a5b0> -----


[I 2025-12-27 14:13:12,218] Trial 4 finished with values: [6.490488522742609, 0.38461538461538464] and parameters: {'model': 'tiny'}.


----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x74a02a10cad0> -----
----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x74a02a10f1d0> -----
----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x749f859ee780> -----
----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x749f859ee200> -----


[I 2025-12-27 14:13:15,890] Trial 8 finished with values: [26.50542911744428, 0.38461538461538464] and parameters: {'model': 'tiny'}.
[I 2025-12-27 14:13:16,027] Trial 5 finished with values: [5.322828397504541, 0.15384615384615385] and parameters: {'model': 'base'}.
[I 2025-12-27 14:13:16,129] Trial 7 finished with values: [11.008488568650407, 0.15384615384615385] and parameters: {'model': 'base'}.
[I 2025-12-27 14:13:16,212] Trial 6 finished with values: [9.832843006445552, 0.15384615384615385] and parameters: {'model': 'base'}.


----- Model: <voxtral.stt.moonshine.MoonshineSTT object at 0x749f841f3750> -----


[I 2025-12-27 14:13:17,779] Trial 9 finished with values: [34.43327969741979, 0.15384615384615385] and parameters: {'model': 'base'}.


In [15]:
def parakeet_objective(trial: optuna.Trial) -> tuple[float, float]:
    model = trial.suggest_categorical(
        "model",
        [
            "nemo-parakeet-ctc-0.6b",
            "nemo-parakeet-rnnt-0.6b",
            "nemo-parakeet-tdt-0.6b-v2",
            "nemo-parakeet-tdt-0.6b-v3",
        ],
    )
    quant = trial.suggest_categorical("quant", [None, "int8"])
    stt, trans_buf = create_model(ParakeetSTT, **{"model": model, "quant": quant})

    scores = []
    for score in run_evals(stt, trans_buf):
        scores.append(score)
    return scores[1]["rtf"], scores[1]["wer"]

In [17]:
parakeet_study = optuna.create_study(
    storage=f"sqlite:///{RESULTS_DIR}/parakeet_db.sqlite3",
    directions=["maximize", "minimize"],  # maximize rtf and minimize wer
)
parakeet_study.optimize(parakeet_objective, 50, n_jobs=4)

[I 2025-12-27 16:25:46,525] A new study created in RDB with name: no-name-064f4994-e041-44d2-9f75-93ab9874d01d


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a201459d0> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d3a6ed0> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20102ab0> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d26ef90> -----


[I 2025-12-27 16:25:56,740] Trial 0 finished with values: [4.499938177137236, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': 'int8'}.
[I 2025-12-27 16:25:59,568] Trial 1 finished with values: [2.0827432865467963, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d232810> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200e9bb0> -----


[I 2025-12-27 16:26:02,114] Trial 2 finished with values: [1.4308803361108757, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': None}.
[I 2025-12-27 16:26:04,216] Trial 3 finished with values: [1.03451587991619, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200eb3b0> -----


[I 2025-12-27 16:26:04,353] Trial 4 finished with values: [3.280971150059515, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239a6990> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20100230> -----


[I 2025-12-27 16:26:09,922] Trial 6 finished with values: [2.1042023592359, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.
[I 2025-12-27 16:26:13,532] Trial 7 finished with values: [2.116664415129755, 0.07692307692307693] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200e9d90> -----


[I 2025-12-27 16:26:15,530] Trial 8 finished with values: [4.616656472909089, 0.0] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': None}.
[I 2025-12-27 16:26:15,542] Trial 5 finished with values: [0.8456532045814158, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239534d0> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200e9d30> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200cc230> -----


[I 2025-12-27 16:26:20,200] Trial 9 finished with values: [1.8364788802791567, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2516c3b0> -----


[I 2025-12-27 16:26:22,298] Trial 10 finished with values: [3.4447990213502195, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': 'int8'}.
[I 2025-12-27 16:26:24,158] Trial 11 finished with values: [4.054590120905226, 0.07692307692307693] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a23906090> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20147d10> -----


[I 2025-12-27 16:26:27,457] Trial 12 finished with values: [2.564965968047655, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': None}.
[I 2025-12-27 16:26:29,202] Trial 14 finished with values: [3.4012316738538373, 0.0] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239056d0> -----


[I 2025-12-27 16:26:31,534] Trial 15 finished with values: [3.620009022798679, 0.0] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200cc890> -----


[I 2025-12-27 16:26:33,427] Trial 13 finished with values: [1.272076045952137, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.
[I 2025-12-27 16:26:33,480] Trial 16 finished with values: [4.5700480815089435, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200cedb0> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20147710> -----


[I 2025-12-27 16:26:35,455] Trial 17 finished with values: [4.056162228660734, 0.0] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a23951610> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a201472f0> -----


[I 2025-12-27 16:26:40,101] Trial 18 finished with values: [3.487128019257497, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': 'int8'}.
[I 2025-12-27 16:26:42,116] Trial 19 finished with values: [3.6549105990950403, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d2ce5d0> -----


[I 2025-12-27 16:26:42,333] Trial 20 finished with values: [3.453714256492293, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239a53d0> -----


[I 2025-12-27 16:26:45,585] Trial 22 finished with values: [5.757171071089889, 0.8461538461538461] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d26c710> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20147cb0> -----


[I 2025-12-27 16:26:47,096] Trial 21 finished with values: [1.6909524602230834, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': None}.
[I 2025-12-27 16:26:48,960] Trial 24 finished with values: [4.16660203157139, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20144950> -----


[I 2025-12-27 16:26:50,949] Trial 23 finished with values: [3.879471536182997, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d232810> -----


[I 2025-12-27 16:26:52,479] Trial 25 finished with values: [3.662109465393689, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d2ce5d0> -----


[I 2025-12-27 16:26:54,668] Trial 26 finished with values: [5.362383492138233, 0.8461538461538461] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239522d0> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d406150> -----


[I 2025-12-27 16:26:56,579] Trial 28 finished with values: [4.942495356657566, 0.8461538461538461] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': 'int8'}.
[I 2025-12-27 16:26:56,593] Trial 27 finished with values: [2.109548029837627, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': None}.
[I 2025-12-27 16:27:00,378] Trial 30 finished with values: [3.7178603140911273, 0.07692307692307693] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20146810> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20145fd0> -----


[I 2025-12-27 16:27:02,434] Trial 29 finished with values: [1.3905262617262697, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a23905b50> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d26ee10> -----


[I 2025-12-27 16:27:04,764] Trial 31 finished with values: [3.6589267735575777, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.
[I 2025-12-27 16:27:06,348] Trial 32 finished with values: [2.1930449753904244, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20103c50> -----


[I 2025-12-27 16:27:08,084] Trial 34 finished with values: [5.2199454329973065, 0.8461538461538461] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a23953890> -----


[I 2025-12-27 16:27:09,466] Trial 35 finished with values: [5.526950242837394, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239a53d0> -----


[I 2025-12-27 16:27:11,387] Trial 36 finished with values: [4.028536097933422, 0.07692307692307693] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': 'int8'}.
[I 2025-12-27 16:27:11,519] Trial 33 finished with values: [1.2343826302922858, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239a7f50> -----


[I 2025-12-27 16:27:13,740] Trial 37 finished with values: [3.5231887862221183, 0.07692307692307693] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d406150> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239056d0> -----


[I 2025-12-27 16:27:17,486] Trial 39 finished with values: [3.4510212366729056, 0.0] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239a6150> -----


[I 2025-12-27 16:27:19,590] Trial 40 finished with values: [3.9481454734538657, 0.07692307692307693] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': 'int8'}.
[I 2025-12-27 16:27:19,776] Trial 38 finished with values: [1.7880287515131847, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239a77d0> -----


[I 2025-12-27 16:27:21,022] Trial 41 finished with values: [5.904751166628857, 0.0] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a20103890> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200cdb50> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239a6990> -----


[I 2025-12-27 16:27:24,681] Trial 42 finished with values: [3.9397405519979, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': None}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a200e9a90> -----


[I 2025-12-27 16:27:26,979] Trial 43 finished with values: [2.904153517835191, 0.0] and parameters: {'model': 'nemo-parakeet-rnnt-0.6b', 'quant': 'int8'}.
[I 2025-12-27 16:27:26,980] Trial 44 finished with values: [3.0742190589861673, 0.8461538461538461] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v3', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239056d0> -----


[I 2025-12-27 16:27:29,509] Trial 45 finished with values: [4.755376126665514, 0.07692307692307693] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': 'int8'}.


----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a239a53d0> -----
----- Model: <voxtral.stt.parakeet.ParakeetSTT object at 0x753a2d406150> -----


[I 2025-12-27 16:27:31,801] Trial 46 finished with values: [3.954596375657883, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': None}.
[I 2025-12-27 16:27:32,863] Trial 47 finished with values: [6.382097266108188, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': 'int8'}.
[I 2025-12-27 16:27:32,944] Trial 48 finished with values: [5.831056883257508, 0.0] and parameters: {'model': 'nemo-parakeet-tdt-0.6b-v2', 'quant': 'int8'}.
[I 2025-12-27 16:27:32,999] Trial 49 finished with values: [9.59798681660592, 0.07692307692307693] and parameters: {'model': 'nemo-parakeet-ctc-0.6b', 'quant': 'int8'}.
