diff --git a/egs/ami/s5b/conf/mfcc_hires80.conf b/egs/ami/s5b/conf/mfcc_hires80.conf new file mode 100644 index 00000000000..5fb03de59c4 --- /dev/null +++ b/egs/ami/s5b/conf/mfcc_hires80.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=80 # similar to Google's setup. +--num-ceps=80 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh index 3ff6162f9be..80b2aee60e9 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh @@ -5,17 +5,17 @@ # local/chain/tuning/run_tdnn_1j.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned -# local/chain/compare_wer_general.sh sdm1 tdnn1h_sp_bi_ihmali tdnn1i_sp_bi_ihmali +# local/chain/compare_wer_general.sh sdm1 tdnn1i_sp_bi_ihmali tdnn1j_sp_bi_ihmali # System tdnn1i_sp_bi_ihmali tdnn1i_sp_bi_ihmali -# WER on dev 36.6 32.8 -# WER on eval 40.6 36.3 -# Final train prob -0.196231 -0.131658 -# Final valid prob -0.265572 -0.216094 -# Final train prob (xent) -2.48061 -1.53325 -# Final valid prob (xent) -2.71794 -1.96188 +# WER on dev 36.6 31.7 +# WER on eval 40.6 35.1 +# Final train prob -0.196231 -0.114088 +# Final valid prob -0.265572 -0.214282 +# Final train prob (xent) -2.48061 -1.37987 +# Final valid prob (xent) -2.71794 -1.8639 # steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali -# exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali: num-iters=196 nj=2..12 num-params=17.7M dim=80+100->3728 combine=-0.145->-0.143 (over 5) xent:train/valid[129,195,final]=(-1.81,-1.56,-1.53/-2.13,-2.02,-1.96) logprob:train/valid[129,195,final]=(-0.164,-0.136,-0.132/-0.226,-0.222,-0.216) +# exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali: num-iters=327 nj=2..12 num-params=34.3M dim=80+100->3728 combine=-0.126->-0.124 (over 4) xent:train/valid[217,326,final]=(-1.69,-1.43,-1.38/-2.06,-1.93,-1.86) logprob:train/valid[217,326,final]=(-0.143,-0.120,-0.114/-0.226,-0.218,-0.214) set -e -o pipefail # First the options that are passed through to run_ivector_common.sh @@ -31,7 +31,7 @@ ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). num_threads_ubm=32 ivector_transform_type=pca nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned -num_epochs=9 +num_epochs=15 remove_egs=true # The rest are configs specific to this script. Most of the parameters @@ -40,7 +40,7 @@ train_stage=-10 tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. tdnn_affix=1j #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. common_egs_dir= # you can set this to use previously dumped egs. - +dropout_schedule='0,0@0.20,0.5@0.50,0' # End configuration section. echo "$0 $@" # Print the command line for logging @@ -61,6 +61,7 @@ fi local/nnet3/run_ivector_common.sh --stage $stage \ --mic $mic \ --nj $nj \ + --hires_suffix 80 \ --min-seg-len $min_seg_len \ --train-set $train_set \ --gmm $gmm \ @@ -171,8 +172,11 @@ if [ $stage -le 15 ]; then num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) - opts="l2-regularize=0.02" - output_opts="l2-regularize=0.004" + affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.002" mkdir -p $dir/configs cat < $dir/configs/network.xconfig @@ -185,25 +189,27 @@ if [ $stage -le 15 ]; then fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536 - tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 - tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 - tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 - tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0 - tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 - linear-component name=prefinal-l dim=256 $linear_opts - prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 + relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=2136 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf14 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + tdnnf-layer name=tdnnf15 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3 + linear-component name=prefinal-l dim=512 $linear_opts + + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=2136 small-dim=512 output-layer name=output include-log-softmax=false dim=$num_targets $output_opts - prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 + + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=2136 small-dim=512 output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts EOF @@ -226,10 +232,11 @@ if [ $stage -le 16 ]; then --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights false \ --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ --egs.dir "$common_egs_dir" \ --egs.opts "--frames-overlap-per-eg 0" \ --egs.chunk-width 150 \ - --trainer.num-chunk-per-minibatch 128 \ + --trainer.num-chunk-per-minibatch 32 \ --trainer.frames-per-iter 1500000 \ --trainer.num-epochs $num_epochs \ --trainer.optimization.num-jobs-initial 2 \ @@ -238,6 +245,7 @@ if [ $stage -le 16 ]; then --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.max-param-change 2.0 \ --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 50 \ --feat-dir $train_data_dir \ --tree-dir $tree_dir \ --lat-dir $lat_dir \ diff --git a/egs/ami/s5b/local/nnet3/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/run_ivector_common.sh index 4317f375769..e67d1039c40 100755 --- a/egs/ami/s5b/local/nnet3/run_ivector_common.sh +++ b/egs/ami/s5b/local/nnet3/run_ivector_common.sh @@ -21,7 +21,7 @@ num_threads_ubm=32 ivector_transform_type=lda nnet3_affix=_cleaned # affix for exp/$mic/nnet3 directory to put iVector stuff in, so it # becomes exp/$mic/nnet3_cleaned or whatever. - +hires_suffix= . ./cmd.sh . ./path.sh . ./utils/parse_options.sh @@ -72,7 +72,7 @@ if [ $stage -le 2 ]; then utils/data/perturb_data_dir_volume.sh data/$mic/${train_set}_sp_hires for datadir in ${train_set}_sp dev eval; do - steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires$hires_suffix.conf \ --cmd "$train_cmd" data/$mic/${datadir}_hires steps/compute_cmvn_stats.sh data/$mic/${datadir}_hires utils/fix_data_dir.sh data/$mic/${datadir}_hires