From a66413401b87a1379a29e6c7e0abbe8c72a90a71 Mon Sep 17 00:00:00 2001 From: priyakasimbeg Date: Thu, 5 Oct 2023 12:56:04 -0700 Subject: [PATCH] Change padding for Deepspeech LSTM layer Remove global_batch_size arg in call to shard_and_maybe_pad batch call. This will result in the final batch of the validation and test sets for librispeech being just padded just enough so that it can be split equally amongst the devices. So we will not have device batches containing all padding. Workaround for https://github.com/mlcommons/algorithmic-efficiency/issues/523. --- .../workloads/librispeech_conformer/librispeech_jax/workload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py index 45d77ede4..bc7eae3b8 100644 --- a/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py +++ b/algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py @@ -144,7 +144,7 @@ def _build_input_queue( } padded_batch = data_utils.shard_and_maybe_pad_np( - numpy_batch, padding_value=1.0, global_batch_size=global_batch_size) + numpy_batch, padding_value=1.0) yield padded_batch # Does NOT apply regularization, which is left to the submitter to do in