Skip to content

Commit

Permalink
Fix blank index ctc (#2266)
Browse files Browse the repository at this point in the history
* update blank_index

* whisper

* revert change

* mistake
  • Loading branch information
Adel-Moumen committed Nov 24, 2023
1 parent 2520723 commit c5f83d0
Show file tree
Hide file tree
Showing 14 changed files with 40 additions and 34 deletions.
Expand Up @@ -80,6 +80,7 @@ eos_index: 2

# Decoding parameters
test_beam_search:
blank_index: !ref <blank_index>
beam_size: 100
beam_prune_logp: -12.0
token_prune_min_logp: -1.2
Expand Down
Expand Up @@ -78,6 +78,7 @@ eos_index: 2

# Decoding parameters
test_beam_search:
blank_index: !ref <blank_index>
beam_size: 100
beam_prune_logp: -12.0
token_prune_min_logp: -1.2
Expand Down
Expand Up @@ -78,6 +78,7 @@ eos_index: 2

# Decoding parameters
test_beam_search:
blank_index: !ref <blank_index>
beam_size: 100
beam_prune_logp: -12.0
token_prune_min_logp: -1.2
Expand Down
Expand Up @@ -79,6 +79,7 @@ eos_index: 2

# Decoding parameters
test_beam_search:
blank_index: !ref <blank_index>
beam_size: 100
beam_prune_logp: -12.0
token_prune_min_logp: -1.2
Expand Down
Expand Up @@ -79,6 +79,7 @@ eos_index: 2

# Decoding parameters
test_beam_search:
blank_index: !ref <blank_index>
beam_size: 100
beam_prune_logp: -12.0
token_prune_min_logp: -1.2
Expand Down
Expand Up @@ -71,12 +71,13 @@ freeze_wav2vec: True
# Outputs
ctc_neurons: 29
output_neurons: 29 # Characters size, index(blank/eos/bos) = 0
blank_index: 0

# Decoding parameters
test_beam_search:
beam_size: 200
topk: 1
blank_index: 0
blank_index: !ref <blank_index>
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_prune_logp: -10.0
token_prune_min_logp: -5
Expand Down
Expand Up @@ -72,12 +72,13 @@ freeze_wav2vec: True
# Outputs
ctc_neurons: 29
output_neurons: 29 # Characters size, index(blank/eos/bos) = 0
blank_index: 0

# Decoding parameters
test_beam_search:
beam_size: 200
topk: 1
blank_index: 0
blank_index: !ref <blank_index>
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_prune_logp: -10.0
token_prune_min_logp: -5
Expand Down
Expand Up @@ -70,12 +70,13 @@ freeze_wav2vec: True
# Outputs
ctc_neurons: 58 # Twice bigger than the number of characters for upsampling
output_neurons: 29 # Characters size, index(blank/eos/bos) = 0
blank_index: 0

# Decoding parameters
test_beam_search:
beam_size: 200
topk: 1
blank_index: 0
blank_index: !ref <blank_index>
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_prune_logp: -10.0
token_prune_min_logp: -5
Expand Down
3 changes: 2 additions & 1 deletion recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml
Expand Up @@ -64,12 +64,13 @@ freeze_wav2vec: True

# Outputs
output_neurons: 29 # BPE size, index(blank/eos/bos) = 0
blank_index: 0

# Decoding parameters
test_beam_search:
beam_size: 143
topk: 1
blank_index: 0
blank_index: !ref <blank_index>
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_prune_logp: -12.0
token_prune_min_logp: -1.2
Expand Down
Expand Up @@ -85,12 +85,14 @@ tokenizer: !new:sentencepiece.SentencePieceProcessor

# Decoding parameters
lm_weight: 0.5
blank_index: 0
# topk is the number of hypotheses that will be rescored in the rescorer
# lowering this value might decrease the wer, but will increase speed.

test_beam_search:
beam_size: 20
topk: 20
blank_index: 0
blank_index: !ref <blank_index>
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_prune_logp: -12.0
token_prune_min_logp: -12.0
Expand Down
Expand Up @@ -87,12 +87,13 @@ tokenizer: !new:sentencepiece.SentencePieceProcessor

# Decoding parameters
lm_weight: 0.5
blank_index: 0
# topk is the number of hypotheses that will be rescored in the rescorer
# lowering this value might decrease the wer, but will increase speed.
test_beam_search:
beam_size: 20
topk: 20
blank_index: 0
blank_index: !ref <blank_index>
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_prune_logp: -12.0
token_prune_min_logp: -12.0
Expand Down
30 changes: 15 additions & 15 deletions recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml
Expand Up @@ -69,23 +69,23 @@ whisper_output_dim: 512

# Outputs
output_neurons: 29 # BPE size, index(blank/eos/bos) = 0

# Decoding parameters
test_searcher: !name:speechbrain.decoders.CTCBeamSearcher
blank_index: 0
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_size: 143
beam_prune_logp: -12.0
token_prune_min_logp: -1.2
prune_history: True
topk: 1
alpha: 0.8
beta: 1.2
# can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
# It can either be a .bin or .arpa ; note: .arpa is much slower at loading
# If you don't want to use an LM, comment it out or set it to null
kenlm_model_path: null

# Decoding parameters
test_beam_search:
beam_size: 143
topk: 1
blank_index: !ref <blank_index>
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_prune_logp: -12.0
token_prune_min_logp: -1.2
prune_history: True
alpha: 0.8
beta: 1.2
# can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
# It can either be a .bin or .arpa ; note: .arpa is much slower at loading
# If you don't want to use an LM, comment it out or set it to null
kenlm_model_path: null
#
# Functions and classes
#
Expand Down
3 changes: 2 additions & 1 deletion recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml
Expand Up @@ -66,12 +66,13 @@ freeze_wav2vec: False

# Outputs
output_neurons: 29 # BPE size, index(blank/eos/bos) = 0
blank_index: 0

# Decoding parameters
test_beam_search:
beam_size: 200
topk: 1
blank_index: 0
blank_index: !ref <blank_index>
space_token: ' ' # make sure this is the same as the one used in the tokenizer
beam_prune_logp: -10.0
token_prune_min_logp: -5.0
Expand Down
17 changes: 5 additions & 12 deletions recipes/LibriSpeech/ASR/CTC/train_with_whisper.py
Expand Up @@ -340,18 +340,11 @@ def text_pipeline(wrd):
vocab_list = [
tokenizer.sp.id_to_piece(i) for i in range(tokenizer.sp.vocab_size())
]
test_searcher = hparams["test_searcher"](
blank_index=hparams["blank_index"],
vocab_list=vocab_list,
space_token=hparams["space_token"],
alpha=hparams["alpha"],
beta=hparams["beta"],
beam_size=hparams["beam_size"],
beam_prune_logp=hparams["beam_prune_logp"],
token_prune_min_logp=hparams["token_prune_min_logp"],
prune_history=hparams["prune_history"],
topk=hparams["topk"],
kenlm_model_path=hparams.get("kenlm_model_path"),

from speechbrain.decoders.ctc import CTCBeamSearcher

test_searcher = CTCBeamSearcher(
**hparams["test_beam_search"], vocab_list=vocab_list,
)

# Training
Expand Down

0 comments on commit c5f83d0

Please sign in to comment.