In [2]:
from os import environ, path, walk
from pocketsphinx import *
from sphinxbase import *
import fnmatch
import json
from jiwer import wer
import math

relative_path = "../../td_corpus_digits"
pwd = os.getcwd()
previous = len("1LM")
pwd = pwd[:len(pwd)-previous]
#wer -i SNR35dB_Manrulen.ref SNR35dB_Manrulen.hyp > SNR35dB_Manrulen.res

In [53]:
def find_files(filename, search_path):
   result = []

   # Wlaking top-down from the root
   for root, dir, files in os.walk(search_path):
      for file in files:
         if file.__contains__(".raw"):        
            result.append(os.path.join(root, file))
   return result

def calculate_CI(wer, noSen):
    return 1.960 * math.sqrt((wer*(1-wer))/noSen)


def create_decoder_ngram(dic, grammar):
    """Create a decoder based on the Ngram language model"""
    config = Decoder.default_config()
    config.set_string('-hmm', pwd +'ps_data/model/en-us')  # acoustic model
    config.set_string('-dict', pwd +'ps_data/lex/' + dic)  # lexicon / dictionary
    config.set_string('-lm', pwd +'ps_data/lm/en-us.lm.bin')  # language model
    decoder_ngram = Decoder(config)
    return decoder_ngram

def create_decoder_digits(dic, grammar, rule):
    config = Decoder.default_config()
    config.set_string('-hmm', pwd +'ps_data/model/en-us')  # acoustic model
    config.set_string('-dict', pwd +'ps_data/lex/' + dic)  # lexicon / dictionary
    decoder_digit = Decoder(config)

    # Now we use a custom language model
    # Prepare the grammar to be used
    jsgf = Jsgf(pwd +'ps_data/jsgf/' + grammar)  # load the grammar file
    rule = jsgf.get_rule('digits.' + rule)  # choose the rule
    fsg = jsgf.build_fsg(rule, decoder_digit.get_logmath(), 7.5)  # build the grammar rule
    fsg.writefile('../outputs/' + grammar)  # write the compiled grammar rule as an external file

    # Now set the fsg grammar rule in the decoder
    decoder_digit.set_fsg("../outputs/" + grammar, fsg)  # load the pre-recorded compiled grammar rule in the decoder
    decoder_digit.set_search("../outputs/" + grammar)  # and set it as the grammar to use

    return decoder_digit

In [4]:
def rundecoder(file_path, decoder):
    # Start the decoder
    decoder.start_utt()

    # Open the file to decode
    stream = open(file_path, 'rb')
    uttbuf = stream.read(-1)

    # Process the file with the decoder
    if uttbuf:
        decoder.process_raw(uttbuf, False, True)
    else:
        print("Error reading speech data")
        exit()
    decoder.end_utt()

    # test for empty hypothesis and replace the output with an empty string if needed
    if decoder.hyp() is None:
        best_hypothesis = ''
    else:
        best_hypothesis = decoder.hyp().hypstr

    #reading ref file for WER
    with open(file_path[:len(file_path)-3] + "ref") as f:
        actualY = f.readlines()
    
    wordErrorRate = wer(actualY, best_hypothesis)

    return {
    "fileName": file_path.split("/")[-1],
    "prediction": best_hypothesis,
    "actual": actualY[0].replace("\n", ""), 
    "confidence": decoder.get_logmath().exp(decoder.hyp().prob),
    "wer" : wordErrorRate}

In [47]:
def processWav(waves, rule, group=""):
    decoder = create_decoder_digits("digits.dict", "digits.jsgf", rule)
    ruleN_result = []
    for wav in waves:
        ruleN_result.append(rundecoder(wav, decoder))

    #this is to dump hyp
    with open(group + rule + '.hyp', 'w') as f:
        for i in ruleN_result:
            f.write(i["prediction"])
            f.write("\n")

    #this is to dump ref
    with open(group + rule + '.ref', 'w') as f:
        for i in ruleN_result:
            f.write(i["actual"])
            f.write("\n")

In [48]:
folder_path = pwd + "td_corpus_digits/SNR35dB/man/seq1digit_200_files/"
wavFiles1 = find_files(".raw",folder_path)

folder_path = pwd + "td_corpus_digits/SNR35dB/man/seq3digits_100_files/"
wavFiles3 = find_files(".raw",folder_path)

folder_path = pwd + "td_corpus_digits/SNR35dB/man/seq5digits_100_files/"
wavFiles5 = find_files(".raw",folder_path)
#Generic ngram

In [None]:
#ruleN
wavFiles = wavFiles1
wavFiles.extend(wavFiles3)
wavFiles.extend(wavFiles5)
processWav(wavFiles, "rulen", group="SNR35dB_Man")

In [56]:
conf_interval_ruleN = calculate_CI(0.273, 600)
print(conf_interval_ruleN)

0.03564749158075502


In [50]:
#rule1
processWav(wavFiles1, "rule1", group="SNR35dB_Man")

INFO: pocketsphinx.c(151): Parsed model-specific feature parameters from /home/user/Documents/GitHub/ASR/lab_material/Assignment/ps_data/model/en-us/feat.params
Current configuration:
[NAME]			[DEFLT]		[VALUE]
-agc			none		none
-agcthresh		2.0		2.000000e+00
-allphone				
-allphone_ci		yes		yes
-alpha			0.97		9.700000e-01
-ascale			20.0		2.000000e+01
-aw			1		1
-backtrace		no		no
-beam			1e-48		1.000000e-48
-bestpath		yes		yes
-bestpathlw		9.5		9.500000e+00
-ceplen			13		13
-cmn			live		batch
-cmninit		40,3,-1		41.00,-5.29,-0.12,5.09,2.48,-4.07,-1.37,-1.78,-5.08,-2.05,-6.45,-1.42,1.17
-compallsen		no		no
-dict					/home/user/Documents/GitHub/ASR/lab_material/Assignment/ps_data/lex/digits.dict
-dictcase		no		no
-dither			no		no
-doublebw		no		no
-ds			1		1
-fdict					
-feat			1s_c_d_dd	1s_c_d_dd
-featparams				
-fillprob		1e-8		1.000000e-08
-frate			100		100
-fsg					
-fsgusealtpron		yes		yes
-fsgusefiller		yes		yes
-fwdflat		yes		yes
-fwdflatbeam		1e-64		1.000000e-64
-fwdflatefwid		4		4

In [42]:
conf_interval_rule1 = calculate_CI(0.005, 200)
print(conf_interval_rule1)

0.009775469298197403


In [51]:
#rule3
processWav(wavFiles3, "rule3", group="SNR35dB_Man")

INFO: pocketsphinx.c(151): Parsed model-specific feature parameters from /home/user/Documents/GitHub/ASR/lab_material/Assignment/ps_data/model/en-us/feat.params
Current configuration:
[NAME]			[DEFLT]		[VALUE]
-agc			none		none
-agcthresh		2.0		2.000000e+00
-allphone				
-allphone_ci		yes		yes
-alpha			0.97		9.700000e-01
-ascale			20.0		2.000000e+01
-aw			1		1
-backtrace		no		no
-beam			1e-48		1.000000e-48
-bestpath		yes		yes
-bestpathlw		9.5		9.500000e+00
-ceplen			13		13
-cmn			live		batch
-cmninit		40,3,-1		41.00,-5.29,-0.12,5.09,2.48,-4.07,-1.37,-1.78,-5.08,-2.05,-6.45,-1.42,1.17
-compallsen		no		no
-dict					/home/user/Documents/GitHub/ASR/lab_material/Assignment/ps_data/lex/digits.dict
-dictcase		no		no
-dither			no		no
-doublebw		no		no
-ds			1		1
-fdict					
-feat			1s_c_d_dd	1s_c_d_dd
-featparams				
-fillprob		1e-8		1.000000e-08
-frate			100		100
-fsg					
-fsgusealtpron		yes		yes
-fsgusefiller		yes		yes
-fwdflat		yes		yes
-fwdflatbeam		1e-64		1.000000e-64
-fwdflatefwid		4		4

In [45]:
conf_interval_rule1 = calculate_CI(0.005, 200)
print(conf_interval_rule1)

100

In [52]:
#rule5
processWav(wavFiles5, "rule5", group="SNR35dB_Man")

INFO: pocketsphinx.c(151): Parsed model-specific feature parameters from /home/user/Documents/GitHub/ASR/lab_material/Assignment/ps_data/model/en-us/feat.params
Current configuration:
[NAME]			[DEFLT]		[VALUE]
-agc			none		none
-agcthresh		2.0		2.000000e+00
-allphone				
-allphone_ci		yes		yes
-alpha			0.97		9.700000e-01
-ascale			20.0		2.000000e+01
-aw			1		1
-backtrace		no		no
-beam			1e-48		1.000000e-48
-bestpath		yes		yes
-bestpathlw		9.5		9.500000e+00
-ceplen			13		13
-cmn			live		batch
-cmninit		40,3,-1		41.00,-5.29,-0.12,5.09,2.48,-4.07,-1.37,-1.78,-5.08,-2.05,-6.45,-1.42,1.17
-compallsen		no		no
-dict					/home/user/Documents/GitHub/ASR/lab_material/Assignment/ps_data/lex/digits.dict
-dictcase		no		no
-dither			no		no
-doublebw		no		no
-ds			1		1
-fdict					
-feat			1s_c_d_dd	1s_c_d_dd
-featparams				
-fillprob		1e-8		1.000000e-08
-frate			100		100
-fsg					
-fsgusealtpron		yes		yes
-fsgusefiller		yes		yes
-fwdflat		yes		yes
-fwdflatbeam		1e-64		1.000000e-64
-fwdflatefwid		4		4

In [None]:
conf_interval_rule1 = calculate_CI(0.005, 200)
print(conf_interval_rule1)

In [54]:
def processWavNgram(waves, group=""):
    decoder = create_decoder_ngram("digits.dict", "digits.jsgf")
    ruleN_result = []
    for wav in waves:
        ruleN_result.append(rundecoder(wav, decoder))

    #this is to dump hyp
    with open(group + '.hyp', 'w') as f:
        for i in ruleN_result:
            f.write(i["prediction"])
            f.write("\n")

    #this is to dump ref
    with open(group + '.ref', 'w') as f:
        for i in ruleN_result:
            f.write(i["actual"])
            f.write("\n")

In [55]:
#NGram
wavFiles = wavFiles1
wavFiles.extend(wavFiles3)
wavFiles.extend(wavFiles5)
processWavNgram(wavFiles, group="SNR35dB_Man")

INFO: pocketsphinx.c(151): Parsed model-specific feature parameters from /home/user/Documents/GitHub/ASR/lab_material/Assignment/ps_data/model/en-us/feat.params
Current configuration:
[NAME]			[DEFLT]		[VALUE]
-agc			none		none
-agcthresh		2.0		2.000000e+00
-allphone				
-allphone_ci		yes		yes
-alpha			0.97		9.700000e-01
-ascale			20.0		2.000000e+01
-aw			1		1
-backtrace		no		no
-beam			1e-48		1.000000e-48
-bestpath		yes		yes
-bestpathlw		9.5		9.500000e+00
-ceplen			13		13
-cmn			live		batch
-cmninit		40,3,-1		41.00,-5.29,-0.12,5.09,2.48,-4.07,-1.37,-1.78,-5.08,-2.05,-6.45,-1.42,1.17
-compallsen		no		no
-dict					/home/user/Documents/GitHub/ASR/lab_material/Assignment/ps_data/lex/digits.dict
-dictcase		no		no
-dither			no		no
-doublebw		no		no
-ds			1		1
-fdict					
-feat			1s_c_d_dd	1s_c_d_dd
-featparams				
-fillprob		1e-8		1.000000e-08
-frate			100		100
-fsg					
-fsgusealtpron		yes		yes
-fsgusefiller		yes		yes
-fwdflat		yes		yes
-fwdflatbeam		1e-64		1.000000e-64
-fwdflatefwid		4		4

In [58]:
conf_interval_Ngram = calculate_CI(0.0311, 600)
print(conf_interval_Ngram*100)

1.3889931969115854
