In [4]:
import json
import os
import soundfile as sf

with open("./creolese-audio-dataset/transcripts.json", "r") as f:
    dataset = json.load(f)

with open("manifest.json", "w") as fout:
    for item in dataset:
        filename = os.path.basename(item["audio"])
        audio_path = f"./creolese-audio-dataset/Audio Files/{filename}"

        # Check file exists
        if not os.path.exists(audio_path):
            print(f"Skipping missing file: {audio_path}")
            continue

        audio, sr = sf.read(audio_path)
        duration = len(audio) / sr

        manifest_entry = {
            "audio_filepath": audio_path,
            "duration": duration,
            "text": item["text"]
        }

        fout.write(json.dumps(manifest_entry))
        fout.write("\n")  #  ensure newline after each object

    

Source: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc

In [7]:
!python /path/to/nemo/examples/asr/speech_to_text_transducer/train_speech_to_text_transducer.py \
  --config-path="./" \
  --config-name="fastconformer_config.yaml" \
  model.pretrained_model_name="stt_multilingual_fastconformer_hybrid_large_pc" \
  exp_manager.exp_dir="./nemo_experiments"


python: can't open file '/path/to/nemo/examples/asr/speech_to_text_transducer/train_speech_to_text_transducer.py': [Errno 2] No such file or directory


In [None]:
import nemo.collections.asr as nemo_asr

asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained("stt_multilingual_fastconformer_hybrid_large_pc")



audio = "./creolese-audio-dataset/Audio Files/3.wav"

output = asr_model.transcribe([audio])

print(output)

In [12]:
import os
import json
import gc

In [20]:
ground_truth = []
predictions= []

transcription_path = "creolese-audio-dataset/transcripts.json"

with open(transcription_path, "r") as f:
    dataset = json.load(f)

for item in dataset:
    filename = os.path.basename(item["audio"])
    audio_path = os.path.join("./creolese-audio-dataset/Audio Files", filename)
    reference = item["text"]

    try:
        # Transcribe
        result = asr_model.transcribe([audio_path])
        hypothesis = result[0].text

        # Append results
        ground_truth.append(reference.lower())
        predictions.append(hypothesis.lower())

# Clear memory-heavy objects
        del result
        gc.collect()

    except Exception as e:
                print(f"Skipping {audio_path}: {e}")
                continue

    # (Optional) Print progress
    print(f"Processed: {filename}")

# Final cleanup
gc.collect()
output_data = [{"ref": ref, "hyp": hyp} for ref, hyp in zip(ground_truth, predictions)]

with open("./nvidia_pretrained_predictions.json", "w") as out_file:
        json.dump(output_data, out_file, indent=4)

[NeMo I 2025-05-08 02:34:55 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:34:55 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:17<00:00, 17.71s/it]


Processed: 1.wav
[NeMo I 2025-05-08 02:35:16 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:35:16 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:02<00:00,  2.42s/it]


Processed: 2.wav
[NeMo I 2025-05-08 02:35:19 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:35:19 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:03<00:00,  3.33s/it]


Processed: 3.wav
[NeMo I 2025-05-08 02:35:23 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:35:23 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:19<00:00, 19.82s/it]


Processed: 4.wav
[NeMo I 2025-05-08 02:35:43 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:35:43 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [01:35<00:00, 95.79s/it]


Processed: 5.wav
[NeMo I 2025-05-08 02:37:20 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:37:20 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|██████████████████████████████| 1/1 [02:04<00:00, 124.65s/it]


Processed: 6.wav
[NeMo I 2025-05-08 02:39:25 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:39:25 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:00<00:00,  1.18it/s]


Processed: 7.wav
[NeMo I 2025-05-08 02:39:26 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:39:26 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [01:08<00:00, 68.39s/it]


Processed: 8.wav
[NeMo I 2025-05-08 02:40:35 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:40:35 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:03<00:00,  3.83s/it]


Processed: 9.wav
[NeMo I 2025-05-08 02:40:40 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:40:40 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:02<00:00,  2.97s/it]


Processed: 10.wav
[NeMo I 2025-05-08 02:40:43 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:40:43 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:05<00:00,  5.54s/it]


Processed: 11.wav
[NeMo I 2025-05-08 02:40:49 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:40:49 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:01<00:00,  1.63s/it]


Processed: 12.wav
[NeMo I 2025-05-08 02:40:51 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:40:51 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:00<00:00,  1.54it/s]


Processed: 13.wav
[NeMo I 2025-05-08 02:40:52 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:40:52 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:00<00:00,  1.68it/s]


Processed: 14.wav
[NeMo I 2025-05-08 02:40:53 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:40:53 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:02<00:00,  2.68s/it]


Processed: 15.wav
[NeMo I 2025-05-08 02:40:56 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:40:56 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:06<00:00,  6.55s/it]


Processed: 16.wav
[NeMo I 2025-05-08 02:41:04 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:41:04 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:08<00:00,  8.32s/it]


Processed: 17.wav
[NeMo I 2025-05-08 02:41:12 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:41:12 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|██████████████████████████████| 1/1 [09:11<00:00, 551.68s/it]


Processed: 18.wav
[NeMo I 2025-05-08 02:50:34 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 02:50:34 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|██████████████████████████████| 1/1 [09:39<00:00, 579.90s/it]


Processed: 19.wav
[NeMo I 2025-05-08 03:00:15 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:00:15 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:05<00:00,  5.96s/it]


Processed: 20.wav
[NeMo I 2025-05-08 03:00:22 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:00:22 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:08<00:00,  8.98s/it]


Processed: 21.wav
[NeMo I 2025-05-08 03:00:32 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:00:32 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:25<00:00, 25.57s/it]


Processed: 22.wav
[NeMo I 2025-05-08 03:00:59 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:00:59 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [01:27<00:00, 87.01s/it]


Processed: 23.wav
[NeMo I 2025-05-08 03:02:27 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:02:27 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:16<00:00, 16.25s/it]


Processed: 24.wav
[NeMo I 2025-05-08 03:02:44 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:02:44 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:20<00:00, 20.04s/it]


Processed: 25.wav
[NeMo I 2025-05-08 03:03:04 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:03:04 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:31<00:00, 31.65s/it]


Processed: 26.wav
[NeMo I 2025-05-08 03:03:37 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:03:37 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:05<00:00,  5.57s/it]


Processed: 27.wav
[NeMo I 2025-05-08 03:03:43 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:03:43 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [01:06<00:00, 66.94s/it]


Processed: 28.wav
[NeMo I 2025-05-08 03:04:51 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:04:51 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:39<00:00, 39.63s/it]


Processed: 29.wav
[NeMo I 2025-05-08 03:05:31 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:05:31 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [01:10<00:00, 70.72s/it]


Processed: 30.wav
[NeMo I 2025-05-08 03:06:42 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:06:42 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:32<00:00, 32.16s/it]


Processed: 31.wav
[NeMo I 2025-05-08 03:07:15 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:07:15 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:27<00:00, 27.09s/it]


Processed: 32.wav
[NeMo I 2025-05-08 03:07:43 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:07:43 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:02<00:00,  2.33s/it]


Processed: 33.wav
[NeMo I 2025-05-08 03:07:46 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:07:46 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:03<00:00,  3.80s/it]


Processed: 34.wav
[NeMo I 2025-05-08 03:07:50 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:07:50 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:13<00:00, 13.93s/it]


Processed: 35.wav
[NeMo I 2025-05-08 03:08:05 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}


[NeMo W 2025-05-08 03:08:05 nemo_logging:405] No conditional node support for Cuda.
    Cuda graphs with while loops are disabled, decoding speed will be slower
    Reason: CUDA is not available
Transcribing: 100%|███████████████████████████████| 1/1 [00:04<00:00,  4.43s/it]


Processed: 36.wav


In [40]:
import json
import jiwer

# === STEP 1: LOAD JSON ===
with open("nvidia_pretrained_predictions.json", "r") as f:
    data = json.load(f)

ground_truth = [entry['ref'] for entry in data]
predictions = [entry['hyp'] for entry in data]
        

In [41]:
def custom_transform(text):
        # Convert to lowercase
    text = text.lower()
    # Remove punctuation (basic approach)
    for char in ",.:;!?\"'()[]{}":
        text = text.replace(char, "")
    # Remove extra spaces
    text = " ".join(text.split())
    return text

# Apply transformation to each text individually
ground_truth_transformed = [custom_transform(text) for text in ground_truth]
predictions_transformed = [custom_transform(text) for text in predictions]

# === STEP 4: COMPUTE METRICS ===
wer = jiwer.wer(ground_truth_transformed, predictions_transformed)
cer = jiwer.cer(ground_truth_transformed, predictions_transformed)
mer = jiwer.mer(ground_truth_transformed, predictions_transformed)

# === STEP 5: PRINT ===
print(f"WER: {wer:.4f}")
print(f"CER: {cer:.4f}")
print(f"MER: {mer:.4f}")


WER: 0.9098
CER: 0.4313
MER: 0.8973
