Merged PR 1225: Make training script take an options file as input so…

… that a batch of training runs can be prepared and sent to Azure ML Make training script take an options file as input so that a batch of training runs can be prepared and sent to Azure ML Related work items: #2042
microsoft · Apr 19, 2019 · 048e216 · 048e216
1 parent 76289c5
commit 048e216
Show file tree

Hide file tree

Showing 6 changed files with 250 additions and 42 deletions.
diff --git a/.gitignore b/.gitignore
@@ -267,3 +267,5 @@ Gemfile.lock
 
 # ctags tag file
 tags
+.amlignore
+aml_config/
diff --git a/requirements.txt b/requirements.txt
@@ -12,6 +12,7 @@ paramiko
 parse
 psutil
 pyaudio
+python_speech_features
 requests
 serial
 # optional (very large):

diff --git a/tools/utilities/pythonlibs/audio/play_audio.py b/tools/utilities/pythonlibs/audio/play_audio.py
@@ -19,22 +19,32 @@
 arg_parser.add_argument("filename", help="wav file to play ")
 arg_parser.add_argument("--sample_rate", "-s", help="Audio sample rate to use", default=16000, type=int)
 arg_parser.add_argument("--channels", "-c", help="Audio channels to use", default=1, type=int)
+arg_parser.add_argument("--buffer_size", help="Read buffer size", default=512, type=int)
+arg_parser.add_argument("--code", help="Output c-code for sample data", action="store_true")
 
 args = arg_parser.parse_args()
 
 # First tell the WavReader what sample rate and channels we want the audio converted to
-reader = wav_reader.WavReader(args.sample_rate, args.channels)
+reader = wav_reader.WavReader(args.sample_rate, args.channels, auto_scale=False)
 
 # Create a speaker object which we will give to the WavReader.  The WavReader will pass
 # the re-sampled audio to the Speaker so you can hear what it sounds like
 speaker = speaker.Speaker()
 
-# open the reader asking for 256 size chunks of audio, converted to floating point betweeo -1 and 1.
-reader.open(args.filename, 256, speaker)
+# open the reader asking for size chunks of audio, converted to floating point between -1 and 1.
+reader.open(args.filename, args.buffer_size, speaker)
 
 print("wav file contains sample rate {} and {} channels".format(reader.actual_rate, reader.actual_channels))
 
+code = args.code
 # pump the reader until it returns None.  In a real app you would assign the results of read() to
 # a variable so you can process the audio chunks returned.
-while reader.read() is not None:
-    pass
+while True:
+    buffer = reader.read()
+    if buffer is None:
+        break
+    if code:
+        print("{", end='')
+        for x in buffer:
+            print("{}, ".format(x), end='')
+        print("},")
diff --git a/tools/utilities/pythonlibs/audio/training/add_metadata.py b/tools/utilities/pythonlibs/audio/training/add_metadata.py
@@ -17,16 +17,17 @@
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("Add a metadata key/value pair to the input node of the given model")
     parser.add_argument("model", help="The *.ell model to edit)")
-    parser.add_argument("--name", "-n", help="The metadata key", default=None)
-    parser.add_argument("--value", "-v", help="The metadata value", default=None)
+    parser.add_argument("--names", "-n", help="One or more metadata keys", nargs="+")
+    parser.add_argument("--values", "-v", help="The same number of metadata values", nargs="+")
 
     args = parser.parse_args()
     filename = args.model
-    print("Adding metadata {}={}".format(args.name, args.value))
     editor = model_editor.ModelEditor(filename)
     node = editor.get_input_node()
     if node is not None:
-        node.SetMetadataValue(args.name, args.value)
+        for i in range(len(args.names)):
+            print("Adding metadata {}={}".format(args.names[i], args.values[i]))
+            node.SetMetadataValue(args.names[i], args.values[i])
         editor.save(filename)
     else:
         print("No InputNode found in model")
diff --git a/tools/utilities/pythonlibs/audio/training/make_dataset.py b/tools/utilities/pythonlibs/audio/training/make_dataset.py
@@ -153,7 +153,7 @@ def _get_dataset(entry_map, categories, transform, sample_rate, window_size, shi
     return Dataset(features, label_names, categories, parameters)
 
 
-def make_dataset(list_file, categories_path, featurizer_path, sample_rate, window_size, shift, auto_scale=True,
+def make_dataset(list_file, outdir, categories_path, featurizer_path, sample_rate, window_size, shift, auto_scale=True,
                  noise_path=None, max_noise_ratio=0.1, noise_selection=0.1, use_cache=False):
 
     """
@@ -163,6 +163,7 @@ def make_dataset(list_file, categories_path, featurizer_path, sample_rate, windo
     """
     dataset_name = os.path.basename(list_file)
     dataset_path = os.path.splitext(dataset_name)[0] + ".npz"
+    dataset_path = os.path.join(outdir, dataset_path)
     if use_cache and os.path.isfile(dataset_path):
       return
 
@@ -191,6 +192,8 @@ def make_dataset(list_file, categories_path, featurizer_path, sample_rate, windo
 
     # options
     arg_parser.add_argument("--list_file", "-l", help="The path to the list file to process")
+    arg_parser.add_argument("--outdir", "-o", help="The path where you want the *.npz files saved",
+                            default=os.getcwd())
     arg_parser.add_argument("--categories", "-c",
                             help="The full list of labels (a given list file might only see a subset of these)")
     arg_parser.add_argument("--featurizer", "-f", help="Compiled featurizer module to use", default="featurizer/mfcc")
@@ -211,5 +214,5 @@ def make_dataset(list_file, categories_path, featurizer_path, sample_rate, windo
         print("Noise dir '{}' not found".format(args.noise_path))
         sys.exit(1)
 
-    make_dataset(args.list_file, args.categories, args.featurizer, args.sample_rate, args.window_size, args.shift,
-                 args.auto_scale, args.noise_path, args.max_noise_ratio, args.noise_selection)
+    make_dataset(args.list_file, args.outdir, args.categories, args.featurizer, args.sample_rate, args.window_size,
+                 args.shift, args.auto_scale, args.noise_path, args.max_noise_ratio, args.noise_selection)