Skip to content

Commit

Permalink
Merge branch 'speechbrain:develop' into RescueSpeech
Browse files Browse the repository at this point in the history
  • Loading branch information
sangeet2020 committed Sep 19, 2023
2 parents 284e347 + 7201866 commit fa25f82
Show file tree
Hide file tree
Showing 13 changed files with 35 additions and 49 deletions.
1 change: 0 additions & 1 deletion recipes/AudioMNIST/diffusion/hparams/train.yaml
Expand Up @@ -93,7 +93,6 @@ pad_level_db: -50.

# Model Parameters
model_channels: 128
model_norm_num_groups: 32
model_num_res_blocks: 4
diffusion_channels: 1

Expand Down
1 change: 0 additions & 1 deletion recipes/AudioMNIST/diffusion/hparams/train_latent.yaml
Expand Up @@ -115,7 +115,6 @@ autoencoder_channels: 32
autoencoder_norm_num_groups: 32
autoencoder_num_res_blocks: 1
autoencoder_encoder_out_channels: 32
autoencoder_nom_num_groups: 32
autoencoder_latent_channels: 2
autoencoder_dropout: 0.1
latent_mask_value: -3.
Expand Down
7 changes: 6 additions & 1 deletion recipes/AudioMNIST/diffusion/train.py
Expand Up @@ -1463,7 +1463,12 @@ def labels_pipeline(digit, speaker_id):

train_split = dataset_splits["train"]
data_count = None
train_split = apply_overfit_test(hparams, train_split)
train_split = apply_overfit_test(
hparams["overfit_test"],
hparams["overfit_test_sample_count"],
hparams["overfit_test_epoch_data_count"],
train_split,
)

if hparams["train_data_count"] is not None:
data_count = hparams["train_data_count"]
Expand Down
2 changes: 1 addition & 1 deletion recipes/BinauralWSJ0Mix/extra_requirements.txt
@@ -1,4 +1,4 @@
gitpython==3.1.34
gitpython==3.1.35
mir-eval==0.6
pyroomacoustics>=0.7.3

3 changes: 0 additions & 3 deletions recipes/LJSpeech/TTS/vocoder/diffwave/hparams/train.yaml
Expand Up @@ -66,9 +66,6 @@ test_dataloader_opts:
batch_size: 1
num_workers: !ref <num_workers>

dataloader_options:
batch_size: !ref <batch_size>

use_tensorboard: False
tensorboard_logs: !ref <output_folder>/logs/

Expand Down
2 changes: 1 addition & 1 deletion recipes/LJSpeech/TTS/vocoder/diffwave/train.py
Expand Up @@ -364,5 +364,5 @@ def check_tensorboard(hparams):
test_stats = diffusion_brain.evaluate(
test_set=datasets["test"],
min_key="error",
test_loader_kwargs=hparams["dataloader_options"],
test_loader_kwargs=hparams["test_dataloader_opts"],
)
Expand Up @@ -29,7 +29,6 @@ data_folder: !PLACEHOLDER # e.g, /localscratch/LibriSpeech
# If RIRS_NOISES dir exists in /localscratch/xxx_corpus/RIRS_NOISES
# then data_folder_rirs should be /localscratch/xxx_corpus
# otherwise the dataset will automatically be downloaded
data_folder_rirs: !ref <data_folder>
train_splits: ["train-clean-100", "train-clean-360", "train-other-500"]
dev_splits: ["dev-clean"]
test_splits: ["test-clean", "test-other"]
Expand Down Expand Up @@ -158,6 +157,7 @@ speed_perturb: !new:speechbrain.processing.speech_augmentation.SpeedPerturb
speeds: [95, 100, 105]

# Uncomment if interested in env corruption
# data_folder_rirs: !ref <data_folder>
# env_corrupt: !new:speechbrain.lobes.augment.EnvCorrupt
# openrir_folder: !ref <data_folder_rirs>
# babble_prob: 0.0
Expand Down
13 changes: 0 additions & 13 deletions recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml
Expand Up @@ -44,9 +44,7 @@ avoid_if_longer_than: 10.0
## Model parameters- Enhance model
dereverberate: False
save_audio: True
resample: False
enhance_sample_rate: 16000
lr_enhance: 0.00015
limit_training_signal_len: False
training_signal_len: 64000
use_wavedrop: False
Expand All @@ -71,8 +69,6 @@ checkpoint_avg: 5
# Must be 6 per GPU to fit 16GB of VRAM
batch_size: 2
test_batch_size: 2
dataloader_num_workers: 4
test_num_workers: 4


# These values are only used for the searchers.
Expand Down Expand Up @@ -107,15 +103,6 @@ test_loader_kwargs:
sepformer_weight: 0.1
asr_weight: 1

# loss thresholding -- this thresholds the training loss
threshold_byloss: True
threshold: -30
clip_grad_norm: 5
loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
optimizer: !name:torch.optim.Adam
lr: !ref <lr_enhance>
weight_decay: 0

# Functions and classes
speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
perturb_prob: 1.0
Expand Down
11 changes: 8 additions & 3 deletions recipes/RescueSpeech/ASR/noise-robust/train.py
Expand Up @@ -537,9 +537,14 @@ def save_results(self, test_data):
)

# Write enhanced wavs for sanity check
self.save_audio(
snt_id[0], batch.noisy_sig, clean, predictions[0], batch
)
if self.hparams.save_audio:
self.save_audio(
snt_id[0],
batch.noisy_sig,
clean,
predictions[0],
batch,
)

psq_mode = (
"wb"
Expand Down
1 change: 0 additions & 1 deletion recipes/Voicebank/enhance/MetricGAN/hparams/train.yaml
Expand Up @@ -12,7 +12,6 @@ seed: 4234
__set_seed: !!python/object/apply:torch.manual_seed [!ref <seed>]

data_folder: !PLACEHOLDER # e.g, /data/member1/user_jasonfu/noisy-vctk-16k
train_clean_folder: !ref <data_folder>/clean_trainset_28spk_wav_16k/

MetricGAN_folder: !ref <output_folder>/enhanced_wavs
output_folder: !ref ./results/MetricGAN/<seed>
Expand Down
Expand Up @@ -34,10 +34,9 @@ test_annotation: !ref <output_folder>/test.json
number_of_epochs: 15
lr: 0.0001
lr_wav2vec: 0.00001
sorting: ascending
# auto_mix_prec: False
# do_resample: False
sample_rate: 16000
# sample_rate: 16000

# With data_parallel batch_size is split into N jobs
# With DDP batch_size is multiplied by N jobs
Expand Down Expand Up @@ -69,20 +68,13 @@ dataloader_options:
test_dataloader_opts:
batch_size: !ref <test_batch_size>

# Model parameters
activation: !name:torch.nn.LeakyReLU
# dnn_layers: 2

# # DER evaluation parameters
# ignore_overlap: True
# forgiveness_collar: 0.25

epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
limit: !ref <number_of_epochs>

augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
sample_rate: !ref <sample_rate>
speeds: [95, 100, 105]

input_norm: !new:speechbrain.processing.features.InputNormalization
norm_type: sentence
Expand Down Expand Up @@ -120,11 +112,6 @@ modules:
model: !new:torch.nn.ModuleList
- [!ref <output_mlp>]

model_opt_class: !name:torch.optim.Adadelta
lr: !ref <lr>
rho: 0.95
eps: 1.e-8

opt_class: !name:torch.optim.Adam
lr: !ref <lr>

Expand Down Expand Up @@ -157,8 +144,6 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: !ref <train_log>

error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats

error_stats: !name:speechbrain.utils.metric_stats.MetricStats
metric: !name:speechbrain.nnet.losses.classification_error
reduction: batch
22 changes: 16 additions & 6 deletions speechbrain/dataio/dataset.py
Expand Up @@ -460,14 +460,24 @@ def set_output_keys(datasets, output_keys):
dataset.set_output_keys(output_keys)


def apply_overfit_test(hparams, dataset):
def apply_overfit_test(
overfit_test,
overfit_test_sample_count,
overfit_test_epoch_data_count,
dataset,
):
"""Applies the overfit test to the specified dataset,
as configured in the hyperparameters file
Arguments
---------
hparams: dict
the hyperparameters dictionary
overfit_test: bool
when True the overfitting test is performed
overfit_test_sample_count: int
number of samples for the overfitting test
overfit_test_epoch_data_count: int
number of epochs for the overfitting test
dataset: DynamicItemDataset
the dataset
Expand All @@ -477,8 +487,8 @@ def apply_overfit_test(hparams, dataset):
dataset: DynamicItemDataset
the dataset, with the overfit test apply
"""
if hparams["overfit_test"]:
sample_count = hparams["overfit_test_sample_count"]
epoch_data_count = hparams["overfit_test_epoch_data_count"]
if overfit_test:
sample_count = overfit_test_sample_count
epoch_data_count = overfit_test_epoch_data_count
dataset = dataset.overfit_test(sample_count, epoch_data_count)
return dataset
2 changes: 1 addition & 1 deletion tests/consistency/test_yaml.py
Expand Up @@ -22,6 +22,7 @@ def test_yaml_script_consistency(recipe_folder="tests/recipes"):

# Use this list to itemize special yaml for which we do not have to test
avoid_check = []
check = True

# Loop over all recipe CSVs
for recipe_csvfile in os.listdir(recipe_folder):
Expand All @@ -30,7 +31,6 @@ def test_yaml_script_consistency(recipe_folder="tests/recipes"):
with open(
os.path.join(recipe_folder, recipe_csvfile), newline=""
) as csvfile:
check = True
reader = csv.DictReader(
csvfile, delimiter=",", skipinitialspace=True
)
Expand Down

0 comments on commit fa25f82

Please sign in to comment.