# Evaluate malaya-speech

#### Import packages

In [1]:
import malaya_speech
import numpy as np
from malaya_speech import Pipeline
from pyctcdecode.language_model import LanguageModel
import kenlm
from pyctcdecode import build_ctcdecoder
from malaya_speech.utils.char import CTC_VOCAB
# from ctc_decoders import Scorer

  'Cannot import beam_search_ops from Tensorflow Addons, `deep_model` for stemmer will not available to use, make sure Tensorflow Addons version >= 0.12.0'


#### GPU

In [2]:
# malaya_speech.utils.available_device()

import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
       tf.config.experimental.set_memory_growth(gpu,True)

#### Cache related: https://malaya-speech.readthedocs.io/en/latest/load-cache.html

In [3]:
print('Default cache location: {}'.format(malaya_speech.__home__))

#print out cache subdirectories
# malaya_speech.utils.print_cache()

#delete specific model
# malaya_speech.utils.delete_cache('speech-to-text-ctc-v2/best-rq-conformer-large')

#purge cache (delete all models)
# malaya_speech.utils.delete_all_cache

Default cache location: C:\Users\shuling\Malaya-Speech


#### Functions

In [4]:
from pathlib import Path

def load_audio_list(audio_dir_str):
    loaded_audio = []
    audio_path = []
    audio_dir = Path(audio_dir_str)
    for audio_file in audio_dir.glob("*.wav"): 
        # print(audio_file)
        audio_path.append(audio_file)
        loaded_audio.append(malaya_speech.load(str(audio_file))[0])
    
    return audio_path, loaded_audio


def write_to_file(stt_result, audio_path, desc):
    with open("./stt_result.txt", "a+") as f:
        f.write("-"*30 + "\n")
        f.write(desc + "\n")
        for i in range(len(stt_result)):
            # print("current audio_path: {}".format(audio_path[i]))
            with open(str(audio_path[i]).replace(".wav", ".txt"), "r") as r:
                label = r.read()
            f.write("Label: {}\n".format(label))
            f.write("Result: {}\n".format(stt_result[i]))


def evaluate(model, loaded_audio, audio_path, audio_type='malay', isRnnt=False, with_lm=False, lm=None, writeToFile=False):    
    #greedy decoder
    greedy_result = model.greedy_decoder(loaded_audio)    
    # print("-"*30)
    # print('{} GREEDY on {}: '.format(model.__model__, audio_type))
    print_result(greedy_result, '{} GREEDY on {}: '.format(model.__model__, audio_type))           

    #beam decoder
    beam_result = model.beam_decoder(loaded_audio) 
    print_result(beam_result, '{} BEAM on {}: '.format(model.__model__, audio_type))    

    if writeToFile:
        write_to_file(greedy_result, audio_path, desc="{} GREEDY on {}".format(model.__model__, audio_type))
        write_to_file(beam_result, audio_path, desc="{} BEAM on {}".format(model.__model__, audio_type))

    #with language model
    if with_lm and lm is not None:
        beam_lm_result = evaluate_with_LM(model, loaded_audio, lm, isRnnt)
        print_result(beam_lm_result, "{} with LM {} on {}".format(model.__model__, lm, audio_type)) 
        
        if writeToFile:
            write_to_file(beam_lm_result, audio_path, desc="{} with LM {} on {}".format(model.__model__, lm, audio_type))


def print_result(result_list, desc):
    print("-"*30)
    print(desc)
    for e, item in enumerate(result_list):
        print('Audio file {}: {}'.format(e+1, item))
    

def evaluate_with_LM(model, loaded_audio, lm, isRnnt=False):
    kenlm_model = kenlm.Model(lm)
    if isRnnt:
        #using pyctcdecode with beam decoder        
        language_model = LanguageModel(kenlm_model, alpha=0.01, beta=0.5)
        beam_lm_result = model.beam_decoder_lm(loaded_audio, language_model)

    else:
        beam_lm_result = []
        logits = model.predict_logits(loaded_audio)
        decoder = build_ctcdecoder(
            CTC_VOCAB + ['_'],
            kenlm_model,
            alpha=0.2,
            beta=1.0,
            ctc_token_idx=len(CTC_VOCAB))
        
        for i in range(len(loaded_audio)):
            out = decoder.decode_beams(logits[i], prune_history=True)
            d_lm, lm_state, timesteps, logit_score, lm_score = out[0]
            beam_lm_result.append(d_lm)
        
    return beam_lm_result

#### Load audio samples

In [5]:
malay_syn_path, malay_syn_audio = load_audio_list(r"D:\Datasonic\Coding\voice\_Dataset\malay_synthetic")
malay_youtube_path, malay_youtube_audio = load_audio_list(r"D:\Datasonic\Coding\voice\_Dataset\malay_youtube")
eng_syn_path, eng_syn_audio = load_audio_list(r"D:\Datasonic\Coding\voice\_Dataset\eng_synthetic")

### Language Model: https://malaya-speech.readthedocs.io/en/latest/ctc-language-model.html

In [6]:
malaya_speech.stt.available_language_model()

Unnamed: 0,Size (MB),LM order,Description,Command
bahasa,17.0,3,Gathered from malaya-speech ASR bahasa transcript,[./lmplz --text text.txt --arpa out.arpa -o 3 ...
bahasa-news,24.0,3,Gathered from malaya-speech bahasa ASR transcr...,[./lmplz --text text.txt --arpa out.arpa -o 3 ...
bahasa-combined,29.0,3,Gathered from malaya-speech ASR bahasa transcr...,[./lmplz --text text.txt --arpa out.arpa -o 3 ...
redape-community,887.1,4,Mirror for https://github.com/redapesolutions/...,[./lmplz --text text.txt --arpa out.arpa -o 4 ...
dump-combined,310.0,3,Academia + News + IIUM + Parliament + Watpadd ...,[./lmplz --text text.txt --arpa out.arpa -o 3 ...
manglish,202.0,3,Manglish News + Manglish Reddit + Manglish for...,[./lmplz --text text.txt --arpa out.arpa -o 3 ...
bahasa-manglish-combined,608.0,3,Combined `dump-combined` and `manglish`.,[./lmplz --text text.txt --arpa out.arpa -o 3 ...


In [7]:
lm_dict = {
    1: "bahasa",
    2: "bahasa-news",
    3: "bahasa-combined",
    4: "redape-community",
    5: "dump-combined",
    6: "manglish",
    7: "bahasa-manglish-combined"
}

chosen_lm = lm_dict[5]
lm = malaya_speech.stt.language_model(model = chosen_lm)

### RNN-T models: https://malaya-speech.readthedocs.io/en/latest/load-stt-transducer-model.html#List-available-RNNT-model

In [8]:
malaya_speech.stt.available_transducer()

Unnamed: 0,Size (MB),Quantized Size (MB),WER,CER,WER-LM,CER-LM,Language
tiny-conformer,24.4,9.14,0.212811,0.081369,0.199683,0.077004,[malay]
small-conformer,49.2,18.1,0.198533,0.074495,0.185361,0.071143,[malay]
conformer,125.0,37.1,0.163602,0.058744,0.156182,0.05719,[malay]
large-conformer,404.0,107.0,0.156684,0.061971,0.148622,0.05901,[malay]
conformer-stack-2mixed,130.0,38.5,0.137672,0.071751,,,"[malay, singlish]"
conformer-stack-3mixed,130.0,38.5,0.277588,0.163121,,,"[malay, singlish, mandarin]"
small-conformer-singlish,49.2,18.1,0.12771,0.070395,,,[singlish]
conformer-singlish,125.0,37.1,0.096339,0.054553,,,[singlish]
large-conformer-singlish,404.0,107.0,0.083952,0.044562,,,[singlish]


In [9]:
rnnt_dict = {
    1: "tiny-conformer",
    2: "small-conformer",
    3: "conformer",
    4: "large-conformer",
    5: "conformer-stack-2mixed",
    6: "conformer-stack-3mixed",
    7: "small-conformer-singlish",
    8: "conformer-singlish",
    9: "large-conformer-singlish"
}

### tiny-conformer [malay]

In [12]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[1], device = 'GPU:0')
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)

------------------------------
tiny-conformer GREEDY on malay_syn: 
Audio file 1: datuk roslin bin yusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek austombekalan kad pengenalan 2020 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
tiny-conformer BEAM on malay_syn: 
Audio file 1: datuk roslin bin yusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek austombekalan kad pengenalan 2020 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
tiny-conformer with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: datuk roslin bin yusof iaitu ketua pengarah pendaftaran negara telah disahka

#### small-conformer [malay]

In [10]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[2], device = 'GPU:0')
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)

------------------------------
small-conformer GREEDY on malay_syn: 
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: project of pembekalan kad pengenalan 2022 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
small-conformer BEAM on malay_syn: 
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek tiket pembekalan kad pengenalan 2022 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
small-conformer with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah 

#### conformer [malay]

In [10]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[3], device = 'GPU:0')
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)

------------------------------
conformer GREEDY on malay_syn: 
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek kilo pembekalan kad pengenalan dua ribu dua puluh dua di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
conformer BEAM on malay_syn: 
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek kek open bekalan kad pengenalan dua ribu dua puluh dua di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
conformer with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaf

#### large-conformer [malay]

In [10]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[4], device = 'GPU:0')
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)

------------------------------
large-conformer GREEDY on malay_syn: 
Audio file 1: datuk roslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek of pembekalan kad pengenalan dua ribu dua puluh dua di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
large-conformer BEAM on malay_syn: 
Audio file 1: datuk roslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek of pembekalan kad pengenalan dua ribu dua puluh dua di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
large-conformer with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: datuk roslin bin jusoh iaitu ketua pe

#### conformer-stack-2mixed [malay, singlish]

In [21]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[5], device = 'GPU:0')
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=False, lm=lm, writeToFile=True)
evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=True, with_lm=False, lm=lm, writeToFile=True)

------------------------------
conformer-stack-2mixed GREEDY on malay_syn: 
Audio file 1: dato ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek kick of pembekalan kad pengenalan 202 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
conformer-stack-2mixed BEAM on malay_syn: 
Audio file 1: dato ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek of pembekalan kad pengenalan 202 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
conformer-stack-2mixed with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: dato ruslin bin jusoh iaitu ketua pengarah pendafta

#### conformer-stack-3mixed [malay, singlish, mandarin]

In [11]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[6], device = 'GPU:0')
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=False, lm=lm, writeToFile=True)
evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=True, with_lm=False, lm=lm, writeToFile=True)

------------------------------
conformer-stack-3mixed GREEDY on malay_syn: 
Audio file 1: datuk ruslin bintulu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pemersahan sedang dijalankan
Audio file 4: projek off bekalan pengenalan 2020 negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
conformer-stack-3mixed BEAM on malay_syn: 
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pemersahan sedang dijalankan
Audio file 4: projek kos pembekalan202 dijabatkan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
conformer-stack-3mixed with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan mu

#### small-conformer-singlish

In [10]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[7], device = 'GPU:0')
# evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)

------------------------------
small-conformer-singlish GREEDY on eng_syn: 
Audio file 1: continue speech to tax than displaying the resolut
Audio file 2: can worry speech to text and this playing the result
------------------------------
small-conformer-singlish BEAM on eng_syn: 
Audio file 1: converting speech to tax than displaying the resolut
Audio file 2: converting speech to text and displaying the result
------------------------------
small-conformer-singlish with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on eng_syn
Audio file 1: continue speech to tax than displaying the resort
Audio file 2: converting speech to text and displaying the result


#### conformer-singlish

In [10]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[8], device = 'GPU:0')
# evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)

------------------------------
conformer-singlish GREEDY on eng_syn: 
Audio file 1: converting speech to tax and displaying the result
Audio file 2: can wording speech to tax and displaying the result
------------------------------
conformer-singlish BEAM on eng_syn: 
Audio file 1: converting speech to tax and displaying the result
Audio file 2: can wording speech to tax and displaying the result
------------------------------
conformer-singlish with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on eng_syn
Audio file 1: converting speech to tax and displaying the result
Audio file 2: can wording speech to tax and displaying the result


#### large-conformer-singlish

In [10]:
model = malaya_speech.stt.deep_transducer(model = rnnt_dict[9], device = 'GPU:0')
# evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=True, with_lm=True, lm=lm, writeToFile=True)

------------------------------
large-conformer-singlish GREEDY on eng_syn: 
Audio file 1: converding speech to text and displaying the result
Audio file 2: involving speech to text and just flying the result
------------------------------
large-conformer-singlish BEAM on eng_syn: 
Audio file 1: conguding speech to text and displaying the result
Audio file 2: involving speech to text and just flying the result
------------------------------
large-conformer-singlish with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on eng_syn
Audio file 1: conguding speech to tax and displaying the result
Audio file 2: involving speech to text and just flying the result


### CTC models: https://malaya-speech.readthedocs.io/en/latest/load-stt-ctc-model.html

In [8]:
malaya_speech.stt.available_ctc()

Unnamed: 0,Size (MB),Quantized Size (MB),WER,CER,WER-LM,CER-LM,Language
hubert-conformer-tiny,36.6,10.3,0.335968,0.088257,0.199227,0.063522,[malay]
hubert-conformer,115.0,31.1,0.238714,0.0609,0.141479,0.045075,[malay]
hubert-conformer-large,392.0,100.0,0.220314,0.054927,0.128006,0.038533,[malay]
hubert-conformer-large-3mixed,392.0,100.0,0.241126,0.078794,0.132761,0.057482,"[malay, singlish, mandarin]"
best-rq-conformer-tiny,36.6,10.3,0.319291,0.078988,0.179582,0.055521,[malay]
best-rq-conformer,115.0,31.1,0.253678,0.065805,0.154206,0.048228,[malay]
best-rq-conformer-large,392.0,100.0,0.234651,0.06016,0.130082,0.044521,[malay]


In [8]:
ctc_dict = {
    1: "hubert-conformer-tiny",
    2: "hubert-conformer",
    3: "hubert-conformer-large",
    4: "hubert-conformer-large-3mixed",
    5: "best-rq-conformer-tiny",
    6: "best-rq-conformer",
    7: "best-rq-conformer-large"
}

### hubert-conformer-tiny [malay]

In [9]:
model = malaya_speech.stt.deep_ctc(model = ctc_dict[1])
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)

Instructions for updating:
Use `tf.cast` instead.
------------------------------
hubert-conformer-tiny GREEDY on malay_syn: 
Audio file 1: datoruslin bin jusa iaitu ketua pengarah pendaftara negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projekti koferm berkalan kapengenalan 202 lu dua dijabatan pendaftaran negara atalah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
hubert-conformer-tiny BEAM on malay_syn: 
Audio file 1: datoruslin bin jusah iaitu ketua pengarah pendaftara negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projekti koferm berkalan ka pengenalan 202 luh dua dijabatan pendaftaran negara atalah berjaya
Audio file 5: sila berdiri di hadapan kamera
------------------------------
hubert-conformer-tiny with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio fi

#### hubert-conformer [malay]

In [9]:
model = malaya_speech.stt.deep_ctc(model = ctc_dict[2])
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)

Instructions for updating:
Use `tf.cast` instead.
------------------------------
hubert-conformer GREEDY on malay_syn: 
Audio file 1: datuk ruslim bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek ki opom bekalan kan pengenalan dua ribu dua puluh dua di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
hubert-conformer BEAM on malay_syn: 
Audio file 1: datuk ruslim bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek ki of pom bekalan kan pengenalan dua ribu dua puluh dua di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
hubert-conformer with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on 

#### hubert-conformer-large [malay]

In [10]:
model = malaya_speech.stt.deep_ctc(model = ctc_dict[3])
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)

------------------------------
hubert-conformer-large GREEDY on malay_syn: 
Audio file 1: datu ruslin bin jusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek tik opem bekalankad pengenalan 202 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
hubert-conformer-large BEAM on malay_syn: 
Audio file 1: datu ruslin bin jusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek tik opem bekalankad pengenalan 202 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
hubert-conformer-large with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: datuk ruslin bin jusof iaitu ketua pengarah pendaf

#### hubert-conformer-large-3mixed [malay, singlish, mandarin]

In [9]:
model = malaya_speech.stt.deep_ctc(model = ctc_dict[4])
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)

Instructions for updating:
Use `tf.cast` instead.
------------------------------
hubert-conformer-large-3mixed GREEDY on malay_syn: 
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek kikofpem berkalankat pengenalan 202 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
hubert-conformer-large-3mixed BEAM on malay_syn: 
Audio file 1: datuk ruslin bin jusoh iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek kik ofpem berkalan kat pengenalan 202 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
hubert-conformer-large-3mixed with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm 

#### best-rq-conformer-tiny [malay]

In [10]:
model = malaya_speech.stt.deep_ctc(model = ctc_dict[5])
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)

------------------------------
best-rq-conformer-tiny GREEDY on malay_syn: 
Audio file 1: dato ruslim bin jusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projekik opem bekalan ka pengenalan 202u2 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan ka merah
------------------------------
best-rq-conformer-tiny BEAM on malay_syn: 
Audio file 1: dato ruslim bin jusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek kik opem bekalan ka pengenalan 202u2 di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan ka merah
------------------------------
best-rq-conformer-tiny with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: dato ruslan bin jusof iaitu ketua pengarah pen

#### best-rq-conformer [malay]

In [11]:
model = malaya_speech.stt.deep_ctc(model = ctc_dict[6])
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)

------------------------------
best-rq-conformer GREEDY on malay_syn: 
Audio file 1: datuk ruslin bin jusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projekiofom bekalan kan pengenalan 2a0u2 ud di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
best-rq-conformer BEAM on malay_syn: 
Audio file 1: datuk ruslin bin jusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projekiofom bekalan kan pengenalan 2a0u2 d di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
best-rq-conformer with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: datuk ruslin bin yusof iaitu ketua pengarah pendaftaran nega

#### best-rq-conformer-large [malay]

In [12]:
model = malaya_speech.stt.deep_ctc(model = ctc_dict[7])
evaluate(model, malay_syn_audio, malay_syn_path, 'malay_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
evaluate(model, malay_youtube_audio, malay_youtube_path, 'malay_youtube', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)
# evaluate(model, eng_syn_audio, eng_syn_path, 'eng_syn', isRnnt=False, with_lm=True, lm=lm, writeToFile=True)

------------------------------
best-rq-conformer-large GREEDY on malay_syn: 
Audio file 1: datuk ruslim bin jusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek kif oftom bekalan kan pengenalan dua ribu dua puluh dua di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
best-rq-conformer-large BEAM on malay_syn: 
Audio file 1: datuk ruslim bin jusof iaitu ketua pengarah pendaftaran negara telah disahkan
Audio file 2: pengesahan muka berjaya
Audio file 3: pengesahan sedang dijalankan
Audio file 4: projek kif oftom bekalan kan pengenalan dua ribu dua puluh dua di jabatan pendaftaran negara telah berjaya
Audio file 5: sila berdiri di hadapan kamerah
------------------------------
best-rq-conformer-large with LM C:\Users\shuling\Malaya-Speech\language-model\dump-combined\model.trie.klm on malay_syn
Audio file 1: dat

In [13]:
#predict alignments
# tiny_conformer_malay.predict_alignment(malay1, combined=False)

In [14]:
#transcribed long audio (>20sec): https://malaya-speech.readthedocs.io/en/latest/transcribe-long-audio.html