speechbrain · Moumeneb1 · Dec 16, 2022 · Dec 16, 2022
diff --git a/recipes/LibriSpeech/ASR/transformer/train_with_whisper.py b/recipes/LibriSpeech/ASR/transformer/train_with_whisper.py
@@ -213,6 +213,9 @@ def audio_pipeline(wav):
         "wrd", "tokens_list", "tokens_bos", "tokens_eos", "tokens"
     )
     def text_pipeline(wrd):
+        #carry on the whisper normalization for fintunning
+        if hasattr(hparams, "normalized_transcripts"):
+            wrd =tokenizer._normalize(wrd)
         yield wrd
         tokens_list = tokenizer.encode(wrd)
         # avoid bos and eos tokens.