In [5]:
####################################
####################################
####################################
####################################

In [6]:
####################################
##
## VERY IMPORTANT:
##     Logic assumes that --audio parameter is right at the end (after alphabet, lm, trie).
##
##     ---------------------------------------------
##                   SECTION 1
## Use this to create the Deepspeech inferernce commands first.
##     Edit the parameters for:
##          -- the Deepspeech command for model, lm, trie, alphabet.
##          -- the output file.
##          -- the audio file locations in the "wavLocsDict" dictionary.
## Logic:
##     Will populate the "wavFilesDict" dictionary with all the .wav files found in
##          each of the locations specified in the "wavLocsDict" dictionary.
##     Next will create the commands for each input .wav file and write all the
##          commands to the output file specified.
##          For each input .wav file, one command will be WITH language model and one
##              for WITHOUT language model.
##              Thus two commands per input .wav file.
##
##     ---------------------------------------------
##                   SECTION 2
## Use this to run each command, capture the output and write it all to a file.
##     Edit the parameters for:
##          -- the input command file.
##          -- the output file.
##          -- the flags to indicate which commands to run.
##
##     IMPORTANT: This section can be run independently of section 1.
##          You can input an already created commands file.
##          Else generate the commands using ection 1 and then run section 2.
##
## Logic:
##     Reads in the commands file specified and creates two lists - for WITH and WITHOUT lm.
##     Issues each command, captures the response and writes to output file.
##     The output consists of the audio file name (from the command itself) and the output transcript.
##
####################################

In [7]:
###################################################################
####    SECTION 1    ####    SECTION 1    ####    SECTION 1    ####
###################################################################

In [8]:
import os
import re
import glob
#
class MyStopExecution(Exception):
    def _render_traceback_(self):
        pass

In [9]:
#
## specify the location and filename where the commands generated should be written to.
opFileLocationS1 = r'/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/'
opFileNameS1 = r'section1op.txt'
#
## command skeleton
cmd_skel = 'deepspeech'
model_skel = '--model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb'
alpha_skel = '--alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt'
lm_skel = '--lm /home/rohit/dpspTraining/data/domainSet_1_20-260total/lm/lm4gram/vocab_domainSet1_20_260total_4gram.klm'
trie_skel = '--trie /home/rohit/dpspTraining/data/domainSet_1_20-260total/trie/trie4gram/vocab_domainSet1_20_260total_4gram.trie'
audio_skel = '--audio'
#
## specify the folder from where to look for the .wav files
## make sure to put the / at end of each wavLocsDict folder entry
wavLocsDict = {
    'L1': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train',
    'L2': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/dev/',
    'L3': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/test/'
}
#

In [10]:
#
## add the slash at end of each folder location if its not there already.
for key in wavLocsDict:
    if wavLocsDict[key][-1] != '/':
        wavLocsDict[key] += '/'
print(f"{wavLocsDict}")

{'L1': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/', 'L2': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/dev/', 'L3': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/test/'}


In [11]:
#
## for each folder specified in the wavLocs dictionary, find the wav files in it and populate
##     the wavFile dictionary.
#
wavFilesDict = {}
for key in wavLocsDict:
    #
    ## make a list to hold the various .wav files. Use the same key as the current one from wavLocs.
    wavFilesDict[key] = []
    #
    currLoc = wavLocsDict[key]
    #print(f"{currLoc}")
    ## if any of the folder locations specified is not found then stop everything.
    if not os.path.isdir(currLoc):
        print(f"\nFATAL ERROR: Folder location not found = {currLoc}")
        raise MyStopExecution
    #
    for eachWavFile in glob.glob(currLoc + '*.wav'):
        #print(f"\t{eachWavFile}")
        wavFilesDict[key].append(eachWavFile) 
#
for key in wavLocsDict:
    wavFilesDict[key].sort()
#
print(f"\n\nwavFiles dictionary is populated.\n\n")
print(f"{wavFilesDict}")



wavFiles dictionary is populated.


{'L1': ['/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/File1.wav', '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/File2.wav'], 'L2': ['/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/dev/File22.wav', '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/dev/File23.wav'], 'L3': ['/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/test/File29.wav', '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/test/File30.wav']}


In [12]:
#
## Generate the commands for WITH and WITHOUT language models and create the output file.
## Each command will be written to one line.
#
opFileS1 = opFileLocationS1 + opFileNameS1
#
## command skeleton for both versions of the commands
cmd_skeleton_NO_lm = cmd_skel + ' ' + model_skel + ' ' + alpha_skel + ' ' + audio_skel + ' '
cmd_skeleton_WITH_lm = cmd_skel + ' ' + model_skel + ' ' + alpha_skel + ' ' + lm_skel + ' ' + trie_skel + ' ' + audio_skel + ' '
#
opCmdList_NO_lm   = []
opCmdList_WITH_lm = []
#
for key in wavLocsDict:
    for wavFile in wavFilesDict[key]:
        opCmdList_NO_lm.append(cmd_skeleton_NO_lm + wavFile)
        opCmdList_WITH_lm.append(cmd_skeleton_WITH_lm + wavFile)
#
## WITHOUT language model commands
print(f"\n\nThe WITHOUT lm commands............\n\n")
with open(opFileS1, 'w') as outF1:
    for cmd in opCmdList_NO_lm:
        print(f"{cmd}\n")
        outF1.write(cmd + '\n')
#
## WITH language model commands
print(f"\n\nThe WITH lm commands............\n\n")
with open(opFileS1, 'a') as outF1:
    for cmd in opCmdList_WITH_lm:
        print(f"{cmd}\n")
        outF1.write(cmd + '\n')
#
print(f"\n\nSection 1 logic complete.\n\nOutput file created here:\n{opFileS1}")



The WITHOUT lm commands............


deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/File1.wav

deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/File2.wav

deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/dev/File22.wav

d

In [13]:
###################################################################
####    SECTION 2    ####    SECTION 2    ####    SECTION 2    ####
###################################################################

In [14]:
import os
import re
import glob
#
class MyStopExecution(Exception):
    def _render_traceback_(self):
        pass

In [15]:
#
## specify the location and filename from where to pick up the already generated commands.
ipFileLocationS2 = r'/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/'
ipFileNameS2 = r'section1op.txt'
#
## specify the location and filename where the outputs should be stored.
opFileLocationS2 = r'/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/'
opFileNameS2 = r'section2op.txt'
#
## set flag as True to run those types of commands
flagRun_NOlm_cmds = True
flagRun_WITHlm_cmds = True
#

In [16]:
#
## read the commands from the input file and put them into a list
#
ipFileS2 = ipFileLocationS2 + ipFileNameS2
#
opCmdList_NO_lm   = []
opCmdList_WITH_lm = []
#
with open(ipFileS2, 'r') as inF2:
    for line in inF2:
        #print(f"line less 1=\n{line[:-1]}")  ## remove the newline character
        line = line[:-1]
        ## add the with or without lm version of commands to the correct list
        if '--lm ' in line:
            opCmdList_WITH_lm.append(line)
        else:
            opCmdList_NO_lm.append(line)
#
print(f"\nCommands WITHOUT lm:\n{opCmdList_NO_lm}\n\nCommands WITH lm:\n{opCmdList_WITH_lm}")


Commands WITHOUT lm:
['deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/File1.wav', 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/File2.wav', 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/dev/File22.wav', 'deepspeech 

In [17]:
#
onlyWavfileDsTranscriptNOlmList = []
onlyWavfileDsTranscriptWITHlmList = []
#
## run the WITHOUT lm commands -- only if the flag is set true
#
if flagRun_NOlm_cmds:
    dsFullResponseNOlmList = []
    #
    ## run each command and capture the full response, along with the filename used, in a list.
    for currCmd in opCmdList_NO_lm:
        tempAns = {'audio': 'xxx', 'dsOpTran': 'xxx'}
        audioFile4mCmd = re.match(r".+--audio (.+)$", currCmd)
        if len(audioFile4mCmd.groups()) == 1:  ## there should be only one audio file specified in command
            audioFile4mCmd = audioFile4mCmd.groups()[0]
            tempAns['audio']= audioFile4mCmd
        else:
            print(f"\nFatal Error: Regex match to find audio file failed. Should have found exactly 1 match. But found = {len(audioFile4mCmd.groups())}\n")
            raise MyStopExecution
        #
        tempAns['dsOpTran']= %sx $currCmd
        dsFullResponseNOlmList.append(tempAns)
    #
    ## from each response extract only the transcript and store with the filename used in command into new list.
    for response in dsFullResponseNOlmList:
        tempAns = {}
        tempAns['audio'] = response['audio']
        if response['dsOpTran'][-2].startswith('Inference took'):
            tempAns['dsOpTran'] = response['dsOpTran'][-1]
        else:
            tempAns['dsOpTran'] = 'PROBLEM WITH DS OUTPUT'
        onlyWavfileDsTranscriptNOlmList.append(tempAns)
        #
    #
#
print(f"\n\nData for WITHOUT lm:\n{onlyWavfileDsTranscriptNOlmList}\n\n")
#
## run the WITH lm commands -- only if the flag is set true
#
if flagRun_WITHlm_cmds:
    dsFullResponseWITHlmList = []
    #
    ## run each command and capture the full response, along with the filename used, in a list.
    for currCmd in opCmdList_WITH_lm:
        tempAns = {'audio': 'xxx', 'dsOpTran': 'xxx'}
        audioFile4mCmd = re.match(r".+--audio (.+)$", currCmd)
        if len(audioFile4mCmd.groups()) == 1:  ## there should be only one audio file specified in command
            audioFile4mCmd = audioFile4mCmd.groups()[0]
            tempAns['audio']= audioFile4mCmd
        else:
            print(f"\nFatal Error: Regex match to find audio file failed. Should have found exactly 1 match. But found = {len(audioFile4mCmd.groups())}\n")
            raise MyStopExecution
        #
        tempAns['dsOpTran']= %sx $currCmd
        dsFullResponseWITHlmList.append(tempAns)
    #
    ## from each response extract only the transcript and store with the filename used in command into new list.
    for response in dsFullResponseWITHlmList:
        tempAns = {}
        tempAns['audio'] = response['audio']
        if response['dsOpTran'][-2].startswith('Inference took'):
            tempAns['dsOpTran'] = response['dsOpTran'][-1]
        else:
            tempAns['dsOpTran'] = 'PROBLEM WITH DS OUTPUT'
        onlyWavfileDsTranscriptWITHlmList.append(tempAns)
        #
    #
#
print(f"\n\nData for WITH lm:\n{onlyWavfileDsTranscriptWITHlmList}\n\n")



Data for WITHOUT lm:
[{'audio': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/File1.wav', 'dsOpTran': 'or e  fls n nlle fo rl f toetll o or oriee t '}, {'audio': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/train/File2.wav', 'dsOpTran': ' ro tes a n oarle  fon anennae formn ree at'}, {'audio': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/dev/File22.wav', 'dsOpTran': 'sstmmr ar oet ten e pld rrit aaooeoro ensvlo e t'}, {'audio': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/dev/File23.wav', 'dsOpTran': 'tsmrm  aptee le f teero ee tn re fon onet tenrai er aalle t '}, {'audio': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/test/File29.wav', 'dsOpTran': 'sthe e oaa gt endal t e ees henn rnetin  elle  t ts'}, {'audio': '/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/test/File30.wav', 'dsOpTran': 's mm rieit ons albmamtarllle ts wrnraie ormtae eind

In [18]:
#
## write the output file
#
opFileS2 = opFileLocationS2 + opFileNameS2
#
with open(opFileS2, 'w') as outF2:
    lineOut = ' --------- WITHOUT language model --------- \n\n'
    outF2.write(lineOut)
    if len(onlyWavfileDsTranscriptNOlmList) == 0:
        lineOut = 'No data to write\n\n'
        outF2.write(lineOut)
    else:
        for response in onlyWavfileDsTranscriptNOlmList:
            lineOut = response['audio'] + '\n' + response['dsOpTran'] + '\n'
            outF2.write(lineOut)
        #
    #
    lineOut = '\n\n ---------- WITH language model ---------- \n\n'
    outF2.write(lineOut)
    if len(onlyWavfileDsTranscriptWITHlmList) == 0:
        lineOut = 'No data to write\n\n'
        outF2.write(lineOut)
    else:
        for response in onlyWavfileDsTranscriptWITHlmList:
            lineOut = response['audio'] + '\n' + response['dsOpTran'] + '\n'
            outF2.write(lineOut)
        #
    #
#
print(f"\n\nSection 2 logic complete.\n\nOutput file created here:\n{opFileS2}")



Section 2 logic complete.

Output file created here:
/home/rohit/dpspTraining/data/domainSet_1_20-260total/OutputTesting/section2op.txt


In [None]:
####################################
####################################
####################################
####################################

In [5]:
opCmdList_NO_lm_one

['deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/train/File10.wav',
 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/dev/File27.wav',
 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/dev/File33.wav',
 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabe

In [6]:
opCmdList_NO_lm_one[4]

'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/test/File30.wav'

In [7]:
cmd2Run = opCmdList_NO_lm_one[4]
cmd2Run

'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/test/File30.wav'

In [8]:
## run it directly
%sx $cmd2Run

['TensorFlow: v1.13.1-10-g3e0cc53',
 'DeepSpeech: v0.5.1-0-g4b29b78',
 '2019-12-13 22:09:33.261472: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA',
 '2019-12-13 22:09:33.303226: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "UnwrapDatasetVariant" device_type: "CPU"\') for unknown op: UnwrapDatasetVariant',
 '2019-12-13 22:09:33.303308: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handle" host_memory_arg: "output_handle"\') for unknown op: WrapDatasetVariant',
 '2019-12-13 22:09:33.303334: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "CPU"\') for unknown op: WrapDatasetVariant',
 '2019-12-13 22:09:33.303518: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "UnwrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handl

In [9]:
## run it but save the answer
dsResp = %sx $cmd2Run

In [10]:
type(dsResp)

IPython.utils.text.SList

In [11]:
dsResp

['TensorFlow: v1.13.1-10-g3e0cc53',
 'DeepSpeech: v0.5.1-0-g4b29b78',
 '2019-12-13 22:09:36.982745: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA',
 '2019-12-13 22:09:36.993622: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "UnwrapDatasetVariant" device_type: "CPU"\') for unknown op: UnwrapDatasetVariant',
 '2019-12-13 22:09:36.993696: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handle" host_memory_arg: "output_handle"\') for unknown op: WrapDatasetVariant',
 '2019-12-13 22:09:36.993729: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "CPU"\') for unknown op: WrapDatasetVariant',
 '2019-12-13 22:09:36.993859: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "UnwrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handl

In [12]:
len(dsResp)

13

In [13]:
dsResp[-1]

's mm rieit ons albmamtarllle ts wrnraie ormtae eind opalle an e rle a s '

In [14]:
type(dsResp[-1])

str

In [None]:
####################################
####################################
####################################
####################################

In [44]:
opCmdList_NO_lm_one

['deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/train/File10.wav',
 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/dev/File27.wav',
 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/dev/File33.wav',
 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabe

In [70]:
import re, os

In [71]:
dsAnsList = []
for eachCmd in opCmdList_NO_lm_one:
    tempAns = {'audio': 'xxx', 'dsOpTran': 'xxx'}
    audioFile4mCmd = re.match(r".+--audio (.+)$", eachCmd)
    if len(audioFile4mCmd.groups()) == 1:
        audioFile4mCmd = audioFile4mCmd.groups()[0]
        tempAns['audio']= audioFile4mCmd
    else:
        print(f"\nFatal Error: Regex match to find audio file failed. Should have found exactly 1 match. But found = {len(audioFile4mCmd.groups())}\n")
        break
    tempAns['dsOpTran']= %sx $eachCmd
    dsAnsList.append(tempAns)
print(f"Done")

Done


In [None]:
audioFile4mCmd = re.match(r".+--audio (.+)$", cmdstr)
if audioFile4mCmd:
    print(f"{audioFile4mCmd.groups()}")
    for audioFile in audioFile4mCmd.groups():
        if os.path.isfile(audioFile):
            print(f"{audioFile} IS A FILE -- ALL OK.")

In [72]:
dsAnsList

[{'audio': '/home/rohit/dpspTraining/data/wav33/train/File10.wav',
  'dsOpTran': ['TensorFlow: v1.13.1-10-g3e0cc53',
   'DeepSpeech: v0.5.1-0-g4b29b78',
   '2019-12-13 22:45:58.757440: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA',
   '2019-12-13 22:45:58.768720: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "UnwrapDatasetVariant" device_type: "CPU"\') for unknown op: UnwrapDatasetVariant',
   '2019-12-13 22:45:58.768796: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handle" host_memory_arg: "output_handle"\') for unknown op: WrapDatasetVariant',
   '2019-12-13 22:45:58.768830: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "CPU"\') for unknown op: WrapDatasetVariant',
   '2019-12-13 22:45:58.768961: E tensorflow/core/framework/op_kernel.cc:

In [73]:
len(dsAnsList)

5

In [74]:
dsAnsList[0]

{'audio': '/home/rohit/dpspTraining/data/wav33/train/File10.wav',
 'dsOpTran': ['TensorFlow: v1.13.1-10-g3e0cc53',
  'DeepSpeech: v0.5.1-0-g4b29b78',
  '2019-12-13 22:45:58.757440: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA',
  '2019-12-13 22:45:58.768720: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "UnwrapDatasetVariant" device_type: "CPU"\') for unknown op: UnwrapDatasetVariant',
  '2019-12-13 22:45:58.768796: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handle" host_memory_arg: "output_handle"\') for unknown op: WrapDatasetVariant',
  '2019-12-13 22:45:58.768830: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "CPU"\') for unknown op: WrapDatasetVariant',
  '2019-12-13 22:45:58.768961: E tensorflow/core/framework/op_kernel.cc:1325] Op

In [93]:
finalAns = []
for eachDsAns in dsAnsList:
    tempFinalAns = {}
    tempFinalAns['audio'] = eachDsAns['audio']
    if eachDsAns['dsOpTran'][-2].startswith('Inference took'):
        tempFinalAns['dsOpTran'] = eachDsAns['dsOpTran'][-1]
    else:
        tempFinalAns['dsOpTran'] = 'PROBLEM WITH DS OUTPUT'
    finalAns.append(tempFinalAns)

In [94]:
finalAns

[{'audio': '/home/rohit/dpspTraining/data/wav33/train/File10.wav',
  'dsOpTran': 'or amarn ert a plre ae col or aearlt'},
 {'audio': '/home/rohit/dpspTraining/data/wav33/dev/File27.wav',
  'dsOpTran': 's ormr priena theellt eros te mritevoro e pallle t '},
 {'audio': '/home/rohit/dpspTraining/data/wav33/dev/File33.wav',
  'dsOpTran': 'tem f eaiseh ors ho ee ssfansysteee  s'},
 {'audio': '/home/rohit/dpspTraining/data/wav33/test/File29.wav',
  'dsOpTran': 'sthe e oaa gt endal t e ees henn rnetin  elle  t ts'},
 {'audio': '/home/rohit/dpspTraining/data/wav33/test/File30.wav',
  'dsOpTran': 's mm rieit ons albmamtarllle ts wrnraie ormtae eind opalle an e rle a s '}]

In [49]:
finalAns = []
for i in range(len(dsAnsList)):
    finalAns.append(dsAnsList[i][-1])

In [50]:
finalAns

['or amarn ert a plre ae col or aearlt',
 's ormr priena theellt eros te mritevoro e pallle t ',
 'tem f eaiseh ors ho ee ssfansysteee  s',
 'sthe e oaa gt endal t e ees henn rnetin  elle  t ts',
 's mm rieit ons albmamtarllle ts wrnraie ormtae eind opalle an e rle a s ']

In [90]:
a = 'rohitbewoor'

In [92]:
a.startswith('ro')

True

In [None]:
####################################
####################################
####################################
####################################

In [15]:
%sx pwd

['/home/rohit/dpspTraining/myCode']

In [16]:
someCmd = 'pwd'

In [17]:
!someCmd

/bin/sh: 1: someCmd: not found


In [18]:
%sx someCmd

['/bin/bash: someCmd: command not found']

In [19]:
# https://stackoverflow.com/questions/14409167/how-to-pass-a-variable-to-magic-%C2%B4run%C2%B4-function-in-ipython

In [20]:
%sx $someCmd

['/home/rohit/dpspTraining/myCode']

In [21]:
## trying to run shell commands and capture the output.
cmdList = ['pwd']

In [22]:
cmdList

['pwd']

In [25]:
cmdList[0]

'pwd'

In [26]:
%sx $cmdList[0]

['/bin/bash: [pwd][0]: command not found']

In [27]:
%sx ${cmdList[0]}

[]

In [28]:
## trying to run shell commands and capture the output.
cmdList = ['pwd']
actualCmd = cmdList[0]

In [29]:
actualCmd

'pwd'

In [30]:
%sx $actualCmd

['/home/rohit/dpspTraining/myCode']

In [42]:
testList = []
for i in range(3):
    testList.append(%sx $actualCmd)
print(f"{testList}")
print(f"{type(testList)}")
print(f"{type(testList[0])}")

SyntaxError: invalid syntax (<ipython-input-42-3c89609078d3>, line 3)

In [43]:
testList = []
for i in range(3):
    tempAns = %sx $actualCmd
    testList.append(tempAns)
print(f"{testList}")
print(f"{type(testList)}")
print(f"{type(testList[0])}")

[['/home/rohit/dpspTraining/myCode'], ['/home/rohit/dpspTraining/myCode'], ['/home/rohit/dpspTraining/myCode']]
<class 'list'>
<class 'IPython.utils.text.SList'>


In [None]:
####################################
####################################
####################################
####################################

In [1]:
!deepspeech --model /home/rohit/dpspTraining/models/v051/model11-domainSet1-pdf1_1_950/savedModel/output_graph_afterPDF1_20191212_1510.pb --alphabet /home/rohit/dpspTraining/data/wavFiles/domainSet1-pdf1_1_950/alphabetDir/alpha_combined_PDF1.txt --audio /home/rohit/dpspTraining/data/wavFiles/wav33/test/File30.wav

Loading model from file /home/rohit/dpspTraining/models/v051/model11-domainSet1-pdf1_1_950/savedModel/output_graph_afterPDF1_20191212_1510.pb
TensorFlow: v1.13.1-10-g3e0cc53
DeepSpeech: v0.5.1-0-g4b29b78
2019-12-13 16:34:19.295033: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2019-12-13 16:34:19.379159: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel ('op: "UnwrapDatasetVariant" device_type: "CPU"') for unknown op: UnwrapDatasetVariant
2019-12-13 16:34:19.379188: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel ('op: "WrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handle" host_memory_arg: "output_handle"') for unknown op: WrapDatasetVariant
2019-12-13 16:34:19.379195: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel ('op: "WrapDatasetVariant" device_type: "CPU"') for unknown op: WrapDatasetVariant
2019-12-13 16:34:19.379260: E tensorflow/core/framew

In [None]:
####################################
####################################
####################################
####################################

In [None]:
## https://jakevdp.github.io/PythonDataScienceHandbook/01.05-ipython-and-shell-commands.html

In [1]:
dsAns = !deepspeech --model /home/rohit/dpspTraining/models/v051/model11-domainSet1-pdf1_1_950/savedModel/output_graph_afterPDF1_20191212_1510.pb --alphabet /home/rohit/dpspTraining/data/wavFiles/domainSet1-pdf1_1_950/alphabetDir/alpha_combined_PDF1.txt --audio /home/rohit/dpspTraining/data/wavFiles/wav33/test/File30.wav

In [2]:
type(dsAns)

IPython.utils.text.SList

In [3]:
dsAns

['TensorFlow: v1.13.1-10-g3e0cc53',
 'DeepSpeech: v0.5.1-0-g4b29b78',
 '2019-12-13 17:09:23.573875: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA',
 '2019-12-13 17:09:23.658355: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "UnwrapDatasetVariant" device_type: "CPU"\') for unknown op: UnwrapDatasetVariant',
 '2019-12-13 17:09:23.658381: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handle" host_memory_arg: "output_handle"\') for unknown op: WrapDatasetVariant',
 '2019-12-13 17:09:23.658387: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "WrapDatasetVariant" device_type: "CPU"\') for unknown op: WrapDatasetVariant',
 '2019-12-13 17:09:23.658451: E tensorflow/core/framework/op_kernel.cc:1325] OpKernel (\'op: "UnwrapDatasetVariant" device_type: "GPU" host_memory_arg: "input_handl

In [None]:
####################################
####################################
####################################
####################################

In [None]:
## pull out the audio file name from command

In [53]:
import re
import os

In [52]:
cmdstr = 'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/test/File30.wav'
cmdstr

'deepspeech --model /home/rohit/dpspTraining/models/v051/model2-domainSet_1_20-260total/savedModelDir/output_graph.pb --alphabet /home/rohit/dpspTraining/data/domainSet_1_20-260total/alphabetDir/alpha_domainSet1_20_260total.txt --audio /home/rohit/dpspTraining/data/wav33/test/File30.wav'

In [61]:
audioFile4mCmd = re.match(r".+--audio (.+)$", cmdstr)

In [67]:
if audioFile4mCmd:
    print(f"{audioFile4mCmd.groups()}")
    for audioFile in audioFile4mCmd.groups():
        if os.path.isfile(audioFile):
            print(f"{audioFile} IS A FILE -- ALL OK.")

('/home/rohit/dpspTraining/data/wav33/test/File30.wav',)
/home/rohit/dpspTraining/data/wav33/test/File30.wav IS A FILE -- ALL OK.


In [68]:
len(audioFile4mCmd.groups())

1

In [69]:
audioFile4mCmd.groups()[0]

'/home/rohit/dpspTraining/data/wav33/test/File30.wav'