In [1]:
from os import environ, path, walk
from pocketsphinx import *
import fnmatch
relative_path = "../ps_data"

In [2]:
def create_folder(path):
    """Create a folder if it doesn't already exist"""
    if not os.path.isdir(path):
        os.makedirs(path)
    return


def find_files(directory, pattern='*.raw'):
    """Recursively finds all files matching the pattern."""
    files = []
    for root, dirnames, filenames in walk(directory):
        for filename in fnmatch.filter(filenames, pattern):
            files.append(path.join(root, filename))

    # sort the list, to avoid mismatch in the output files
    files = sorted(files)

    return files


def create_decoder_ngram():
    """Create a decoder based on the Ngram language model"""
    config = Decoder.default_config()
    config.set_string('-hmm',  relative_path +'/model/en-us')  # acoustic model
#     config.set_string('-dict', relative_path +'/lex/turtle.dict')  # lexicon / dictionary
#     config.set_string('-lm', relative_path +'/lm/turtle.lm.bin')  # language model
    config.set_string('-dict', relative_path +'/lex/cmudict-en-us.dict')  # lexicon / dictionary
    config.set_string('-lm', relative_path +'/lm/en-us.lm.bin')  # language model
    decoder_ngram = Decoder(config)
    return decoder_ngram


def create_decoder_goforward():
    """Create a decoder based on the goforward custom grammar"""
    config = Decoder.default_config()
    config.set_string('-hmm', relative_path +'/model/en-us')  # acoustic model
    config.set_string('-dict', relative_path +'/lex/turtle.dict')  # lexicon / dictionary
    decoder_gofwd = Decoder(config)

    # Now we use a custom language model
    # Prepare the grammar to be used
    jsgf = Jsgf(relative_path +'/jsgf/goforward.jsgf')  # load the grammar file
    rule = jsgf.get_rule('goforward.move2')  # choose the rule
    fsg = jsgf.build_fsg(rule, decoder_gofwd.get_logmath(), 7.5)  # build the grammar rule
    fsg.writefile('../outputs/goforward.fsg')  # write the compiled grammar rule as an external file

    # Now set the fsg grammar rule in the decoder
    decoder_gofwd.set_fsg("../outputs/goforward", fsg)  # load the pre-recorded compiled grammar rule in the decoder
    decoder_gofwd.set_search("../outputs/goforward")  # and set it as the grammar to use

    return decoder_gofwd

# EOF


In [3]:

# Define the path of the file to process
file_path = relative_path +'/example/goforward.raw'

# Instantiate the decoder
# decoder = create_decoder_ngram()  # use the N-gram language model
decoder = create_decoder_goforward()  # use the custom grammar

# Start the decoder
decoder.start_utt()

# Open the file to decode
stream = open(file_path, 'rb')
uttbuf = stream.read(-1)

# Process the file with the decoder
if uttbuf:
    decoder.process_raw(uttbuf, False, True)
else:
    print("Error reading speech data")
    exit()
decoder.end_utt()

# test for empty hypothesis and replace the output with an empty string if needed
if decoder.hyp() is None:
    best_hypothesis = ''
else:
    best_hypothesis = decoder.hyp().hypstr

# Print the results
print('Best hypothesis: ', best_hypothesis,
      "\n model score: ", decoder.hyp().best_score,
      "\n confidence: ", decoder.get_logmath().exp(decoder.hyp().prob))

print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])

# Access N best decodings
N = 8
print('Best ' + str(N) + ' hypothesis: ')
for best, i in zip(decoder.nbest(), range(N)):
    print(best.hypstr, best.score)


Best hypothesis:  go forward ten meters 
 model score:  0.7239855458606713 
 confidence:  1.0
Best hypothesis segments:  ['<sil>', 'go', 'forward', 'ten', 'meters', '<sil>', '</s>']
Best 8 hypothesis: 
go forward ten meters 0.7239855458606713
go forward ten meters 0.7239855458606713
go forward ten meters 0.7239855458606713
go forward ten meters 0.6579153085207451
go forward ten meters 0.6579153085207451
go forward ten meters 0.6579153085207451
go forward ten meters 0.6579153085207451
go forward ten meters 0.6579153085207451


  config = Decoder.default_config()
ERROR: "dict.c", line 191: Line 68: Phone 'OH' is missing in the acoustic model; word 'oh(3)' ignored
  decoder_gofwd.set_fsg("../outputs/goforward", fsg)  # load the pre-recorded compiled grammar rule in the decoder
  decoder_gofwd.set_search("../outputs/goforward")  # and set it as the grammar to use


In [4]:

# Define the path of the file to process
file_path = relative_path +'/example/ex_digits.raw'

# Instantiate the decoder
decoder = create_decoder_ngram()  # use the N-gram language model
# decoder = create_decoder_goforward()  # use the custom grammar

# Start the decoder
decoder.start_utt()

# Open the file to decode
stream = open(file_path, 'rb')
uttbuf = stream.read(-1)

# Process the file with the decoder
if uttbuf:
    decoder.process_raw(uttbuf, False, True)
else:
    print("Error reading speech data")
    exit()
decoder.end_utt()

# test for empty hypothesis and replace the output with an empty string if needed
if decoder.hyp() is None:
    best_hypothesis = ''
else:
    best_hypothesis = decoder.hyp().hypstr

# Print the results
print('Best hypothesis: ', best_hypothesis,
      "\n model score: ", decoder.hyp().best_score,
      "\n confidence: ", decoder.get_logmath().exp(decoder.hyp().prob))

print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])

# Access N best decodings
N = 8
print('Best ' + str(N) + ' hypothesis: ')
for best, i in zip(decoder.nbest(), range(N)):
    print(best.hypstr, best.score)


  config = Decoder.default_config()


Best hypothesis:  one eight 
 model score:  0.6008112061264117 
 confidence:  1.0
Best hypothesis segments:  ['<s>', 'one(2)', 'eight', '</s>']
Best 8 hypothesis: 
one eight 0.076361900742479
one egg 0.07543599137899455
one mate 0.07520251355700776
one made 0.07487985309859328
one a 0.0747451973222464
one make 0.07436498618859971
one aide 0.07409035784661228
what made 0.07406813518393898
