Revert "Revert "Fix dynamic batching" (#2173)"

This reverts commit faa5e76.
speechbrain · Sep 26, 2023 · c719ece · c719ece
1 parent 8937a8f
commit c719ece
Show file tree

Hide file tree

Showing 26 changed files with 131 additions and 197 deletions.
diff --git a/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml b/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml
@@ -42,12 +42,10 @@ batch_size: 10
 test_batch_size: 1
 
 dynamic_batching: False
+
 dynamic_batch_sampler:
-   feats_hop_size: 0.01
-   max_batch_len: 15 # in terms of "duration" in annotations by default, second here
-   left_bucket_len: 200 # old implementation attributs
-   multiplier: 1.1 # old implementation attributs
-   shuffle_ex: False # if true re-creates batches at each epoch shuffling examples.
+   max_batch_length: 15 # in terms of "duration" in annotations by default, second here
+   shuffle: False # if true re-creates batches at each epoch shuffling examples.
    num_buckets: 10 # floor(log(max_batch_len/left_bucket_len, multiplier)) + 1
    batch_ordering: ascending
 

diff --git a/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py b/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py
@@ -293,24 +293,13 @@ def text_pipeline(wrd):
         from speechbrain.dataio.sampler import DynamicBatchSampler  # noqa
 
         dynamic_hparams = hparams["dynamic_batch_sampler"]
-        num_buckets = dynamic_hparams["num_buckets"]
 
         train_batch_sampler = DynamicBatchSampler(
-            train_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            train_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
         valid_batch_sampler = DynamicBatchSampler(
-            valid_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            valid_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
     return (

diff --git a/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml b/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml
@@ -37,11 +37,8 @@ sorting: ascending
 
 dynamic_batching: True
 dynamic_batch_sampler:
-   feats_hop_size: 0.01
-   max_batch_len: 15 # in terms of "duration" in annotations by default, second here
-   left_bucket_len: 200 # old implementation attributs
-   multiplier: 1.1 # old implementation attributs
-   shuffle_ex: False # if true re-creates batches at each epoch shuffling examples.
+   max_batch_length: 15 # in terms of "duration" in annotations by default, second here
+   shuffle: False # if true re-creates batches at each epoch shuffling examples.
    num_buckets: 10 # floor(log(max_batch_len/left_bucket_len, multiplier)) + 1
    batch_ordering: ascending
 

diff --git a/recipes/AISHELL-1/ASR/seq2seq/train.py b/recipes/AISHELL-1/ASR/seq2seq/train.py
@@ -246,24 +246,13 @@ def text_pipeline(wrd):
         from speechbrain.dataio.sampler import DynamicBatchSampler  # noqa
 
         dynamic_hparams = hparams["dynamic_batch_sampler"]
-        num_buckets = dynamic_hparams["num_buckets"]
 
         train_batch_sampler = DynamicBatchSampler(
-            train_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            train_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
         valid_batch_sampler = DynamicBatchSampler(
-            valid_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            valid_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
     return (

diff --git a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml
@@ -36,11 +36,8 @@ sorting: random
 
 dynamic_batching: False
 dynamic_batch_sampler:
-    feats_hop_size: 0.01
-    max_batch_len: 15 # in terms of "duration" in annotations by default, second here
-    left_bucket_len: 200 # old implementation attributs
-    multiplier: 1.1 # old implementation attributs
-    shuffle_ex: False # if true re-creates batches at each epoch shuffling examples.
+    max_batch_length: 15 # in terms of "duration" in annotations by default, second here
+    shuffle: False # if true re-creates batches at each epoch shuffling examples.
     num_buckets: 10 # floor(log(max_batch_len/left_bucket_len, multiplier)) + 1
     batch_ordering: ascending
 

diff --git a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml
@@ -41,11 +41,8 @@ ctc_weight: 0.3
 
 dynamic_batching: False
 dynamic_batch_sampler:
-    feats_hop_size: 0.01
-    max_batch_len: 15 # in terms of "duration" in annotations by default, second here
-    left_bucket_len: 200 # old implementation attributs
-    multiplier: 1.1 # old implementation attributs
-    shuffle_ex: False # if true re-creates batches at each epoch shuffling examples.
+    max_batch_length: 15 # in terms of "duration" in annotations by default, second here
+    shuffle: False # if true re-creates batches at each epoch shuffling examples.
     num_buckets: 10 # floor(log(max_batch_len/left_bucket_len, multiplier)) + 1
     batch_ordering: ascending
 

diff --git a/recipes/AISHELL-1/ASR/transformer/train.py b/recipes/AISHELL-1/ASR/transformer/train.py
@@ -362,24 +362,13 @@ def text_pipeline(wrd):
         from speechbrain.dataio.sampler import DynamicBatchSampler  # noqa
 
         dynamic_hparams = hparams["dynamic_batch_sampler"]
-        num_buckets = dynamic_hparams["num_buckets"]
 
         train_batch_sampler = DynamicBatchSampler(
-            train_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            train_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
         valid_batch_sampler = DynamicBatchSampler(
-            valid_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            valid_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
     return (

diff --git a/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py b/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py
@@ -380,24 +380,13 @@ def text_pipeline(wrd):
         from speechbrain.dataio.sampler import DynamicBatchSampler  # noqa
 
         dynamic_hparams = hparams["dynamic_batch_sampler"]
-        num_buckets = dynamic_hparams["num_buckets"]
 
         train_batch_sampler = DynamicBatchSampler(
-            train_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            train_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
         valid_batch_sampler = DynamicBatchSampler(
-            valid_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            valid_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
     return (

diff --git a/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml b/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
@@ -77,12 +77,12 @@ test_dataloader_options:
 # This setup will work with 32GB GPUs.
 # Dynamic Batching parameters, if used are:
 dynamic_batching: False
-dyn_batch_len: 120 # Cumulative length of each batch, per gpu.
+max_batch_length: 120 # Cumulative length of each batch, per gpu.
 max_batch_size: 64 # Max number of samples per batch, per gpu.
 dynamic_batch_sampler:
-    max_batch_len: !ref <dyn_batch_len>
+    max_batch_length: !ref <max_batch_length>
     max_batch_ex: !ref <max_batch_size>
-    shuffle_ex: True
+    shuffle: True
     batch_ordering: !ref <sorting>
     num_buckets: 30
 

diff --git a/recipes/CommonVoice/self-supervised-learning/wav2vec2/train_hf_wav2vec2.py b/recipes/CommonVoice/self-supervised-learning/wav2vec2/train_hf_wav2vec2.py
@@ -1,13 +1,4 @@
 #!/usr/bin/env python3
-
-import sys
-import torch
-import logging
-import speechbrain as sb
-import torchaudio
-from hyperpyyaml import load_hyperpyyaml
-from speechbrain.utils.distributed import run_on_main
-
 """Recipe for pretraining a wav2vec 2.0 model on CommonVoice EN. Note that it can be
 trained with ANY dataset as long as you provide the correct JSON or CSV file.
 
@@ -35,6 +26,13 @@
  * Titouan Parcollet 2021
  * Yan Gao 2021
 """
+import sys
+import torch
+import logging
+import speechbrain as sb
+import torchaudio
+from hyperpyyaml import load_hyperpyyaml
+from speechbrain.utils.distributed import run_on_main
 
 logger = logging.getLogger(__name__)
 
@@ -259,24 +257,13 @@ def audio_pipeline(wav):
         from speechbrain.dataio.sampler import DynamicBatchSampler  # noqa
 
         dynamic_hparams = hparams["dynamic_batch_sampler"]
-        num_buckets = dynamic_hparams["num_buckets"]
 
         train_batch_sampler = DynamicBatchSampler(
-            train_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            train_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
         valid_batch_sampler = DynamicBatchSampler(
-            valid_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
-            length_func=lambda x: x["duration"],
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            valid_data, **dynamic_hparams, length_func=lambda x: x["duration"],
         )
 
     return (

diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml
@@ -51,10 +51,10 @@ sorting: ascending
 dynamic_batching: False
 
 # dynamic batching parameters, if used
+feats_hop_size: 0.01
 dynamic_batch_sampler:
-   feats_hop_size: 0.01
-   max_batch_len: 20000 # in terms of frames
-   shuffle_ex: True
+   max_batch_length: 20000 # in terms of frames
+   shuffle: True
    batch_ordering: random
    num_buckets: 20
 

diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml
@@ -51,10 +51,10 @@ sorting: ascending
 dynamic_batching: False
 
 # dynamic batching parameters, if used
+feats_hop_size: 0.01
 dynamic_batch_sampler:
-   feats_hop_size: 0.01
-   max_batch_len: 20000 # in terms of frames
-   shuffle_ex: True
+   max_batch_length: 20000 # in terms of frames
+   shuffle: True
    batch_ordering: random
    num_buckets: 20
 

diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml
@@ -51,10 +51,10 @@ sorting: ascending
 dynamic_batching: False
 
 # dynamic batching parameters, if used
+feats_hop_size: 0.01
 dynamic_batch_sampler:
-   feats_hop_size: 0.01
-   max_batch_len: 20000 # in terms of frames
-   shuffle_ex: True
+   max_batch_length: 20000 # in terms of frames
+   shuffle: True
    batch_ordering: random
    num_buckets: 20
 

diff --git a/recipes/LibriSpeech/ASR/seq2seq/train.py b/recipes/LibriSpeech/ASR/seq2seq/train.py
@@ -286,26 +286,18 @@ def text_pipeline(wrd):
         from speechbrain.dataio.batch import PaddedBatch  # noqa
 
         dynamic_hparams = hparams["dynamic_batch_sampler"]
-        hop_size = dynamic_hparams["feats_hop_size"]
-
-        num_buckets = dynamic_hparams["num_buckets"]
+        hop_size = hparams["feats_hop_size"]
 
         train_batch_sampler = DynamicBatchSampler(
             train_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
             length_func=lambda x: x["duration"] * (1 / hop_size),
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            **dynamic_hparams,
         )
 
         valid_batch_sampler = DynamicBatchSampler(
             valid_data,
-            dynamic_hparams["max_batch_len"],
-            num_buckets=num_buckets,
             length_func=lambda x: x["duration"] * (1 / hop_size),
-            shuffle=dynamic_hparams["shuffle_ex"],
-            batch_ordering=dynamic_hparams["batch_ordering"],
+            **dynamic_hparams,
         )
 
     return (

diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml
@@ -69,15 +69,21 @@ win_length: 32
 # This setup works well for A100 80GB GPU, adapts it to your needs.
 # Or turn it off (but training speed will decrease)
 dynamic_batching: True
-max_batch_len: 500
-max_batch_len_val: 100 # we reduce it as the beam is much wider (VRAM)
+max_batch_length_train: 500
+max_batch_length_val: 100 # we reduce it as the beam is much wider (VRAM)
 num_bucket: 200
 
-dynamic_batch_sampler:
-    max_batch_len: !ref <max_batch_len>
-    max_batch_len_val: !ref <max_batch_len_val>
+dynamic_batch_sampler_train:
+    max_batch_length: !ref <max_batch_length_train>
     num_buckets: !ref <num_bucket>
-    shuffle_ex: True # if true re-creates batches at each epoch shuffling examples.
+    shuffle: True # if true re-creates batches at each epoch shuffling examples.
+    batch_ordering: random
+    max_batch_ex: 128
+
+dynamic_batch_sampler_valid:
+    max_batch_length: !ref <max_batch_length_val>
+    num_buckets: !ref <num_bucket>
+    shuffle: True # if true re-creates batches at each epoch shuffling examples.
     batch_ordering: random
     max_batch_ex: 128
 

diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml
@@ -67,15 +67,21 @@ win_length: 32
 # This setup works well for A100 80GB GPU, adapts it to your needs.
 # Or turn it off (but training speed will decrease)
 dynamic_batching: True
-max_batch_len: 500
-max_batch_len_val: 100 # we reduce it as the beam is much wider (VRAM)
+max_batch_length_train: 500
+max_batch_length_val: 100 # we reduce it as the beam is much wider (VRAM)
 num_bucket: 200
 
-dynamic_batch_sampler:
-    max_batch_len: !ref <max_batch_len>
-    max_batch_len_val: !ref <max_batch_len_val>
+dynamic_batch_sampler_train:
+    max_batch_length: !ref <max_batch_length_train>
     num_buckets: !ref <num_bucket>
-    shuffle_ex: True # if true re-creates batches at each epoch shuffling examples.
+    shuffle: True # if true re-creates batches at each epoch shuffling examples.
+    batch_ordering: random
+    max_batch_ex: 256
+
+dynamic_batch_sampler_valid:
+    max_batch_length: !ref <max_batch_length_val>
+    num_buckets: !ref <num_bucket>
+    shuffle: True # if true re-creates batches at each epoch shuffling examples.
     batch_ordering: random
     max_batch_ex: 256
 

diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml
@@ -67,15 +67,21 @@ n_mels: 80
 # This setup works well for V100 32GB GPU, adapts it to your needs.
 # Or turn it off (but training speed will decrease)
 dynamic_batching: True
-max_batch_len: 900
-max_batch_len_val: 100 # we reduce it as the beam is much wider (VRAM)
+max_batch_length_train: 900
+max_batch_length_val: 100 # we reduce it as the beam is much wider (VRAM)
 num_bucket: 200
 
-dynamic_batch_sampler:
-    max_batch_len: !ref <max_batch_len>
-    max_batch_len_val: !ref <max_batch_len_val>
+dynamic_batch_sampler_train:
+    max_batch_length: !ref <max_batch_length_train>
     num_buckets: !ref <num_bucket>
-    shuffle_ex: True # if true re-creates batches at each epoch shuffling examples.
+    shuffle: True # if true re-creates batches at each epoch shuffling examples.
+    batch_ordering: random
+    max_batch_ex: 128
+
+dynamic_batch_sampler_valid:
+    max_batch_length: !ref <max_batch_length_val>
+    num_buckets: !ref <num_bucket>
+    shuffle: True # if true re-creates batches at each epoch shuffling examples.
     batch_ordering: random
     max_batch_ex: 128