Merge branch 'speechbrain:develop' into RescueSpeech

speechbrain · Sep 19, 2023 · fa25f82 · fa25f82
2 parents 284e347 + 7201866
commit fa25f82
Show file tree

Hide file tree

Showing 13 changed files with 35 additions and 49 deletions.
diff --git a/recipes/AudioMNIST/diffusion/hparams/train.yaml b/recipes/AudioMNIST/diffusion/hparams/train.yaml
@@ -93,7 +93,6 @@ pad_level_db: -50.
 
 # Model Parameters
 model_channels: 128
-model_norm_num_groups: 32
 model_num_res_blocks: 4
 diffusion_channels: 1
 

diff --git a/recipes/AudioMNIST/diffusion/hparams/train_latent.yaml b/recipes/AudioMNIST/diffusion/hparams/train_latent.yaml
@@ -115,7 +115,6 @@ autoencoder_channels: 32
 autoencoder_norm_num_groups: 32
 autoencoder_num_res_blocks: 1
 autoencoder_encoder_out_channels: 32
-autoencoder_nom_num_groups: 32
 autoencoder_latent_channels: 2
 autoencoder_dropout: 0.1
 latent_mask_value: -3.

diff --git a/recipes/AudioMNIST/diffusion/train.py b/recipes/AudioMNIST/diffusion/train.py
@@ -1463,7 +1463,12 @@ def labels_pipeline(digit, speaker_id):
 
     train_split = dataset_splits["train"]
     data_count = None
-    train_split = apply_overfit_test(hparams, train_split)
+    train_split = apply_overfit_test(
+        hparams["overfit_test"],
+        hparams["overfit_test_sample_count"],
+        hparams["overfit_test_epoch_data_count"],
+        train_split,
+    )
 
     if hparams["train_data_count"] is not None:
         data_count = hparams["train_data_count"]

diff --git a/recipes/BinauralWSJ0Mix/extra_requirements.txt b/recipes/BinauralWSJ0Mix/extra_requirements.txt
@@ -1,4 +1,4 @@
-gitpython==3.1.34
+gitpython==3.1.35
 mir-eval==0.6
 pyroomacoustics>=0.7.3
 
diff --git a/recipes/LJSpeech/TTS/vocoder/diffwave/hparams/train.yaml b/recipes/LJSpeech/TTS/vocoder/diffwave/hparams/train.yaml
@@ -66,9 +66,6 @@ test_dataloader_opts:
     batch_size: 1
     num_workers: !ref <num_workers>
 
-dataloader_options:
-    batch_size: !ref <batch_size>
-
 use_tensorboard: False
 tensorboard_logs: !ref <output_folder>/logs/
 

diff --git a/recipes/LJSpeech/TTS/vocoder/diffwave/train.py b/recipes/LJSpeech/TTS/vocoder/diffwave/train.py
@@ -364,5 +364,5 @@ def check_tensorboard(hparams):
         test_stats = diffusion_brain.evaluate(
             test_set=datasets["test"],
             min_key="error",
-            test_loader_kwargs=hparams["dataloader_options"],
+            test_loader_kwargs=hparams["test_dataloader_opts"],
         )
diff --git a/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml b/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml
@@ -29,7 +29,6 @@ data_folder: !PLACEHOLDER # e.g, /localscratch/LibriSpeech
 # If RIRS_NOISES dir exists in /localscratch/xxx_corpus/RIRS_NOISES
 # then data_folder_rirs should be /localscratch/xxx_corpus
 # otherwise the dataset will automatically be downloaded
-data_folder_rirs: !ref <data_folder>
 train_splits: ["train-clean-100", "train-clean-360", "train-other-500"]
 dev_splits: ["dev-clean"]
 test_splits: ["test-clean", "test-other"]
@@ -158,6 +157,7 @@ speed_perturb: !new:speechbrain.processing.speech_augmentation.SpeedPerturb
    speeds: [95, 100, 105]
 
 # Uncomment if interested in env corruption
+# data_folder_rirs: !ref <data_folder>
 # env_corrupt: !new:speechbrain.lobes.augment.EnvCorrupt
 #   openrir_folder: !ref <data_folder_rirs>
 #   babble_prob: 0.0

diff --git a/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml b/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml
@@ -44,9 +44,7 @@ avoid_if_longer_than: 10.0
 ## Model parameters- Enhance model
 dereverberate: False
 save_audio: True
-resample: False
 enhance_sample_rate: 16000
-lr_enhance: 0.00015
 limit_training_signal_len: False
 training_signal_len: 64000
 use_wavedrop: False
@@ -71,8 +69,6 @@ checkpoint_avg: 5
 # Must be 6 per GPU to fit 16GB of VRAM
 batch_size: 2
 test_batch_size: 2
-dataloader_num_workers: 4
-test_num_workers: 4
 
 
 # These values are only used for the searchers.
@@ -107,15 +103,6 @@ test_loader_kwargs:
 sepformer_weight: 0.1
 asr_weight: 1
 
-# loss thresholding -- this thresholds the training loss
-threshold_byloss: True
-threshold: -30
-clip_grad_norm: 5
-loss_upper_lim: 999999  # this is the upper limit for an acceptable loss
-optimizer: !name:torch.optim.Adam
-    lr: !ref <lr_enhance>
-    weight_decay: 0
-
 # Functions and classes
 speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
     perturb_prob: 1.0

diff --git a/recipes/RescueSpeech/ASR/noise-robust/train.py b/recipes/RescueSpeech/ASR/noise-robust/train.py
@@ -537,9 +537,14 @@ def save_results(self, test_data):
                         )
 
                     # Write enhanced wavs for sanity check
-                    self.save_audio(
-                        snt_id[0], batch.noisy_sig, clean, predictions[0], batch
-                    )
+                    if self.hparams.save_audio:
+                        self.save_audio(
+                            snt_id[0],
+                            batch.noisy_sig,
+                            clean,
+                            predictions[0],
+                            batch,
+                        )
 
                     psq_mode = (
                         "wb"

diff --git a/recipes/Voicebank/enhance/MetricGAN/hparams/train.yaml b/recipes/Voicebank/enhance/MetricGAN/hparams/train.yaml
@@ -12,7 +12,6 @@ seed: 4234
 __set_seed: !!python/object/apply:torch.manual_seed [!ref <seed>]
 
 data_folder: !PLACEHOLDER # e.g, /data/member1/user_jasonfu/noisy-vctk-16k
-train_clean_folder: !ref <data_folder>/clean_trainset_28spk_wav_16k/
 
 MetricGAN_folder: !ref <output_folder>/enhanced_wavs
 output_folder: !ref ./results/MetricGAN/<seed>

diff --git a/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml b/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml
@@ -34,10 +34,9 @@ test_annotation: !ref <output_folder>/test.json
 number_of_epochs: 15
 lr: 0.0001
 lr_wav2vec: 0.00001
-sorting: ascending
 # auto_mix_prec: False
 # do_resample: False
-sample_rate: 16000
+# sample_rate: 16000
 
 # With data_parallel batch_size is split into N jobs
 # With DDP batch_size is multiplied by N jobs
@@ -69,20 +68,13 @@ dataloader_options:
 test_dataloader_opts:
     batch_size: !ref <test_batch_size>
 
-# Model parameters
-activation: !name:torch.nn.LeakyReLU
-# dnn_layers: 2
-
 # # DER evaluation parameters
 # ignore_overlap: True
 # forgiveness_collar: 0.25
 
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
-    sample_rate: !ref <sample_rate>
-    speeds: [95, 100, 105]
 
 input_norm: !new:speechbrain.processing.features.InputNormalization
     norm_type: sentence
@@ -120,11 +112,6 @@ modules:
 model: !new:torch.nn.ModuleList
     - [!ref <output_mlp>]
 
-model_opt_class: !name:torch.optim.Adadelta
-    lr: !ref <lr>
-    rho: 0.95
-    eps: 1.e-8
-
 opt_class: !name:torch.optim.Adam
     lr: !ref <lr>
 
@@ -157,8 +144,6 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
-error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
-
 error_stats: !name:speechbrain.utils.metric_stats.MetricStats
     metric: !name:speechbrain.nnet.losses.classification_error
         reduction: batch
diff --git a/speechbrain/dataio/dataset.py b/speechbrain/dataio/dataset.py
@@ -460,14 +460,24 @@ def set_output_keys(datasets, output_keys):
         dataset.set_output_keys(output_keys)
 
 
-def apply_overfit_test(hparams, dataset):
+def apply_overfit_test(
+    overfit_test,
+    overfit_test_sample_count,
+    overfit_test_epoch_data_count,
+    dataset,
+):
     """Applies the overfit test to the specified dataset,
     as configured in the hyperparameters file
 
     Arguments
     ---------
-    hparams: dict
-        the hyperparameters dictionary
+
+    overfit_test: bool
+        when True the overfitting test is performed
+    overfit_test_sample_count: int
+        number of samples for the overfitting test
+    overfit_test_epoch_data_count: int
+        number of epochs for the overfitting test
 
     dataset: DynamicItemDataset
         the dataset
@@ -477,8 +487,8 @@ def apply_overfit_test(hparams, dataset):
     dataset: DynamicItemDataset
         the dataset, with the overfit test apply
     """
-    if hparams["overfit_test"]:
-        sample_count = hparams["overfit_test_sample_count"]
-        epoch_data_count = hparams["overfit_test_epoch_data_count"]
+    if overfit_test:
+        sample_count = overfit_test_sample_count
+        epoch_data_count = overfit_test_epoch_data_count
         dataset = dataset.overfit_test(sample_count, epoch_data_count)
     return dataset
diff --git a/tests/consistency/test_yaml.py b/tests/consistency/test_yaml.py
@@ -22,6 +22,7 @@ def test_yaml_script_consistency(recipe_folder="tests/recipes"):
 
     # Use this list to itemize special yaml for which we do not have to test
     avoid_check = []
+    check = True
 
     # Loop over all recipe CSVs
     for recipe_csvfile in os.listdir(recipe_folder):
@@ -30,7 +31,6 @@ def test_yaml_script_consistency(recipe_folder="tests/recipes"):
         with open(
             os.path.join(recipe_folder, recipe_csvfile), newline=""
         ) as csvfile:
-            check = True
             reader = csv.DictReader(
                 csvfile, delimiter=",", skipinitialspace=True
             )