[src,scripts,egs] online-cmvn for online2 with chain models, (#3560)

* Add OnlineCMVN to Online NNET2 pipeline. This is used in some models (e.g. CVTE). This is optional and off by default. It applies CMVN before applying pitch. This code is essentially copied out of "online2/online-feature-pipeline.cc/h". Patch set provided by Levi Barnes. * online2: bugfix of config script, include ivector_period into the iextractor config file. * online-cmvn in online-nnet2-feature-pipeline, - update the C++ code from @luitjens, - introduced `OnlineFeatureInterface *nnet3_feature_` to explictly mark features that are passed to nnet3 model - added the transfer of OnlineCmvnState across utterances from same speaker - update the 'prepare_online_decoding.sh' to support online-cmvn - enabled OnlineCmvnStats transfer in training/decoding * OnlineNnet2FeaturePipeline, removing unused constructor, updating results
kaldi-asr · Sep 7, 2019 · 09abdda · 09abdda
1 parent 7c8e66e
commit 09abdda
Show file tree

Hide file tree

Showing 17 changed files with 380 additions and 171 deletions.
diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1k.sh
@@ -5,20 +5,21 @@
 
 # local/chain/compare_wer.sh --online exp/chain/tdnn1j_sp exp/chain_online_cmn/tdnn1k_sp
 # System                tdnn1j_sp tdnn1k_sp
-#WER dev_clean_2 (tgsmall)      11.25     10.99
-#WER dev_clean_2 (tglarge)       7.72      7.54
-# Final train prob        -0.0632   -0.0623
-# Final valid prob        -0.0792   -0.0803
-# Final train prob (xent)   -1.4499   -1.4396
-# Final valid prob (xent)   -1.5643   -1.5628
+#WER dev_clean_2 (tgsmall)      10.97     10.64
+#             [online:]         10.97     10.62
+#WER dev_clean_2 (tglarge)       7.57      7.17
+#             [online:]          7.65      7.16
+# Final train prob        -0.0623   -0.0618
+# Final valid prob        -0.0793   -0.0793
+# Final train prob (xent)   -1.4448   -1.4376
+# Final valid prob (xent)   -1.5605   -1.5461
 # Num-params                 5210944   5210944
 
-
 # steps/info/chain_dir_info.pl exp/chain/tdnn1j_sp
-# exp/chain/tdnn1j_sp: num-iters=34 nj=2..5 num-params=5.2M dim=40+100->2336 combine=-0.069->-0.064 (over 4) xent:train/valid[21,33,final]=(-1.66,-1.48,-1.45/-1.78,-1.59,-1.56) logprob:train/valid[21,33,final]=(-0.075,-0.069,-0.063/-0.093,-0.085,-0.079)
+# exp/chain/tdnn1j_sp: num-iters=34 nj=2..5 num-params=5.2M dim=40+100->2336 combine=-0.068->-0.064 (over 4) xent:train/valid[21,33,final]=(-1.65,-1.48,-1.44/-1.77,-1.58,-1.56) logprob:train/valid[21,33,final]=(-0.076,-0.068,-0.062/-0.091,-0.084,-0.079)
 
 # steps/info/chain_dir_info.pl exp/chain_online_cmn/tdnn1k_sp
-# exp/chain_online_cmn/tdnn1k_sp: num-iters=34 nj=2..5 num-params=5.2M dim=40+100->2336 combine=-0.067->-0.062 (over 5) xent:train/valid[21,33,final]=(-1.64,-1.46,-1.44/-1.75,-1.58,-1.56) logprob:train/valid[21,33,final]=(-0.075,-0.068,-0.062/-0.093,-0.085,-0.080)
+# exp/chain_online_cmn/tdnn1k_sp: num-iters=34 nj=2..5 num-params=5.2M dim=40+100->2336 combine=-0.067->-0.062 (over 5) xent:train/valid[21,33,final]=(-1.63,-1.47,-1.44/-1.73,-1.57,-1.55) logprob:train/valid[21,33,final]=(-0.074,-0.067,-0.062/-0.093,-0.085,-0.079)
 
 # Set -e here so that we catch if any executable fails immediately
 set -euo pipefail
@@ -60,7 +61,7 @@ remove_egs=true
 reporting_email=
 
 #decode options
-test_online_decoding=false  # if true, it will run the last decoding stage.
+test_online_decoding=true  # if true, it will run the last decoding stage.
 
 
 # End configuration section.
@@ -284,6 +285,7 @@ if $test_online_decoding && [ $stage -le 17 ]; then
   # change the options of the following command line.
   steps/online/nnet3/prepare_online_decoding.sh \
     --mfcc-config conf/mfcc_hires.conf \
+    --online-cmvn-config conf/online_cmvn.conf \
     $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
 
   rm $dir/.error 2>/dev/null || true
@@ -299,7 +301,7 @@ if $test_online_decoding && [ $stage -le 17 ]; then
         $tree_dir/graph_tgsmall data/${data} ${dir}_online/decode_tgsmall_${data} || exit 1
       steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
         data/lang_test_{tgsmall,tglarge} \
-       data/${data}_hires ${dir}_online/decode_{tgsmall,tglarge}_${data} || exit 1
+        data/${data}_hires ${dir}_online/decode_{tgsmall,tglarge}_${data} || exit 1
     ) || touch $dir/.error &
   done
   wait

diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1d.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1d.sh
@@ -5,20 +5,21 @@
 
 # local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1c_sp exp/chain_cleaned_1d/tdnn1d_sp
 # System                tdnn1c_sp tdnn1d_sp
-# WER on dev(orig)           8.32      8.22
-# WER on dev(rescored)       7.63      7.60
-# WER on test(orig)          8.44      8.45
-# WER on test(rescored)      7.84      7.73
-# Final train prob        -0.0688   -0.0677
-# Final valid prob        -0.0826   -0.0847
-# Final train prob (xent)   -0.9842   -0.9814
-# Final valid prob (xent)   -1.0976   -1.0930
+# WER on dev(orig)           8.32      8.50
+# WER on dev(rescored)       7.63      7.91
+# WER on test(orig)          8.44      8.39
+# WER on test(rescored)      7.84      7.88
+# Final train prob        -0.0688   -0.0698
+# Final valid prob        -0.0826   -0.0850
+# Final train prob (xent)   -0.9842   -0.9898
+# Final valid prob (xent)   -1.0976   -1.1018
 # Num-params                 9476304   9476304
 
 # steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn1c_sp
 # exp/chain_cleaned/tdnn1c_sp: num-iters=228 nj=3..12 num-params=9.5M dim=40+100->3688 combine=-0.070->-0.070 (over 5) xent:train/valid[151,227,final]=(-1.19,-0.993,-0.984/-1.28,-1.10,-1.10) logprob:train/valid[151,227,final]=(-0.090,-0.070,-0.069/-0.107,-0.083,-0.083)
+
 # steps/info/chain_dir_info.pl exp/chain_cleaned_1d/tdnn1d_sp
-# exp/chain_cleaned_1d/tdnn1d_sp: num-iters=228 nj=3..12 num-params=9.5M dim=40+100->3688 combine=-0.070->-0.069 (over 4) xent:train/valid[151,227,final]=(-1.18,-0.986,-0.981/-1.28,-1.10,-1.09) logprob:train/valid[151,227,final]=(-0.088,-0.069,-0.068/-0.109,-0.086,-0.085)
+# exp/chain_cleaned_1d/tdnn1d_sp: num-iters=228 nj=3..12 num-params=9.5M dim=40+100->3688 combine=-0.072->-0.072 (over 5) xent:train/valid[151,227,final]=(-1.19,-0.997,-0.990/-1.29,-1.11,-1.10) logprob:train/valid[151,227,final]=(-0.090,-0.071,-0.070/-0.110,-0.085,-0.085)
 
 ## how you run this (note: this assumes that the run_tdnn.sh soft link points here;
 ## otherwise call it directly in its location).

diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1i.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1i.sh
@@ -5,23 +5,22 @@
 
 # local/chain/compare_wer.sh exp/chain/tdnn1h_sp exp/chain_online_cmn/tdnn1i_sp
 # System                tdnn1h_sp tdnn1i_sp
-#WER dev93 (tgpr)                6.89      6.66
-#WER dev93 (tg)                  6.63      6.56
-#WER dev93 (big-dict,tgpr)       4.96      4.74
+#WER dev93 (tgpr)                6.89      6.90
+#WER dev93 (tg)                  6.63      6.73
+#WER dev93 (big-dict,tgpr)       4.96      4.91
 #WER dev93 (big-dict,fg)         4.53      4.44
-#WER eval92 (tgpr)               4.68      4.89
-#WER eval92 (tg)                 4.32      4.43
-#WER eval92 (big-dict,tgpr)      2.69      2.91
-#WER eval92 (big-dict,fg)        2.34      2.37
-# Final train prob        -0.0442   -0.0439
-# Final valid prob        -0.0537   -0.0529
-# Final train prob (xent)   -0.6548   -0.6581
-# Final valid prob (xent)   -0.7324   -0.7300
+#WER eval92 (tgpr)               4.68      4.77
+#WER eval92 (tg)                 4.32      4.36
+#WER eval92 (big-dict,tgpr)      2.69      2.85
+#WER eval92 (big-dict,fg)        2.34      2.36
+# Final train prob        -0.0442   -0.0436
+# Final valid prob        -0.0537   -0.0540
+# Final train prob (xent)   -0.6548   -0.6592
+# Final valid prob (xent)   -0.7324   -0.7326
 # Num-params                 8349232   8349232
 
 # steps/info/chain_dir_info.pl exp/chain_online_cmn/tdnn1i_sp
-# exp/chain_online_cmn/tdnn1i_sp: num-iters=108 nj=2..8 num-params=8.3M dim=40+100->2840 combine=-0.044->-0.044 (over 1) xent:train/valid[71,107,final]=(-0.873,-0.648,-0.658/-0.914,-0.712,-0.730) logprob:train/valid[71,107,final]=(-0.065,-0.044,-0.044/-0.068,-0.054,-0.053)
-
+# exp/chain_online_cmn/tdnn1i_sp: num-iters=108 nj=2..8 num-params=8.3M dim=40+100->2840 combine=-0.045->-0.045 (over 1) xent:train/valid[71,107,final]=(-0.873,-0.653,-0.659/-0.922,-0.713,-0.733) logprob:train/valid[71,107,final]=(-0.064,-0.044,-0.044/-0.068,-0.054,-0.054)
 
 set -e -o pipefail
 
@@ -72,7 +71,7 @@ srand=0
 remove_egs=true
 
 #decode options
-test_online_decoding=false  # if true, it will run the last decoding stage.
+test_online_decoding=true  # if true, it will run the last decoding stage.
 
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging

diff --git a/egs/wsj/s5/steps/nnet3/chain/e2e/get_egs_e2e.sh b/egs/wsj/s5/steps/nnet3/chain/e2e/get_egs_e2e.sh
@@ -14,7 +14,6 @@
 # Begin configuration section.
 cmd=run.pl
 normalize_egs=true
-feat_type=raw     # set it to 'lda' to use LDA features.
 frame_subsampling_factor=3 # frames-per-second of features we train on divided
                            # by frames-per-second at output of chain model
 left_context=4    # amount of left-context per eg (i.e. extra frames of input features
@@ -47,6 +46,13 @@ online_ivector_dir=  # can be used if we are including speaker information as iV
 cmvn_opts=  # can be used for specifying CMVN options, if feature type is not lda (if lda,
             # it doesn't make sense to use different options than were used as input to the
             # LDA transform).  This is used to turn off CMVN in the online-nnet experiments.
+online_cmvn=false # Set to 'true' to replace 'apply-cmvn' by 'apply-cmvn-online' in the nnet3 input.
+                  # The configuration is passed externally via '$cmvn_opts' given to train.py,
+                  # typically as: --cmvn-opts="--config conf/online_cmvn.conf".
+                  # The global_cmvn.stats are computed by this script from the features.
+                  # Note: the online cmvn for ivector extractor it is controlled separately in
+                  #       steps/online/nnet2/train_ivector_extractor.sh by --online-cmvn-iextractor
+
 
 echo "$0 $@"  # Print the command line for logging
 
@@ -147,18 +153,39 @@ if [ $len_uttlist -lt $num_utts_subset ]; then
   echo "Number of utterances which have length at least $frames_per_eg is really low. Please check your data." && exit 1;
 fi
 
-
 ## Set up features.
-echo "$0: feature type is $feat_type"
-
-case $feat_type in
-  raw) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |"
-    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
-    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
-    echo $cmvn_opts >$dir/cmvn_opts # caution: the top-level nnet training script should copy this to its own dir now.
-   ;;
-  *) echo "$0: invalid feature type --feat-type '$feat_type'" && exit 1;
-esac
+
+# get the global_cmvn stats for online-cmvn,
+if $online_cmvn; then
+  # create global_cmvn.stats,
+  #
+  # caution: the top-level nnet training script should copy
+  # 'global_cmvn.stats' and 'online_cmvn' to its own dir.
+  if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then
+    echo "$0: Error summing cmvn stats"
+    exit 1
+  fi
+  touch $dir/online_cmvn
+else
+  [ -f $dir/online_cmvn ] && rm $dir/online_cmvn
+fi
+
+# create the feature pipelines,
+if ! $online_cmvn; then
+  # the original front-end with 'apply-cmvn',
+  echo "$0: feature type is raw, with 'apply-cmvn'"
+  feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |"
+  valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+  train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |"
+else
+  # the alternative front-end with 'apply-cmvn-online',
+  # - the $cmvn_opts can be set to '--config=conf/online_cmvn.conf' which is the setup of ivector-extractor,
+  echo "$0: feature type is raw, with 'apply-cmvn-online'"
+  feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$sdata/JOB/spk2utt $dir/global_cmvn.stats scp:- ark:- |"
+  valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$data/spk2utt $dir/global_cmvn.stats scp:- ark:- |"
+  train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$data/spk2utt $dir/global_cmvn.stats scp:- ark:- |"
+fi
+echo $cmvn_opts >$dir/cmvn_opts # caution: the top-level nnet training script should copy this to its own dir now.
 
 if [ ! -z "$online_ivector_dir" ]; then
   ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;

diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh
@@ -68,7 +68,7 @@ cmvn_opts=  # can be used for specifying CMVN options, if feature type is not ld
 online_cmvn=false # Set to 'true' to replace 'apply-cmvn' by 'apply-cmvn-online' in the nnet3 input.
                   # The configuration is passed externally via '$cmvn_opts' given to train.py,
                   # typically as: --cmvn-opts="--config conf/online_cmvn.conf".
-                  # The global_cmvn.stats are computed by this script from the featutres.
+                  # The global_cmvn.stats are computed by this script from the features.
                   # Note: the online cmvn for ivector extractor it is controlled separately in
                   #       steps/online/nnet2/train_ivector_extractor.sh by --online-cmvn-iextractor
 lattice_lm_scale=     # If supplied, the graph/lm weight of the lattices will be
@@ -221,9 +221,9 @@ else
   # the alternative front-end with 'apply-cmvn-online',
   # - the $cmvn_opts can be set to '--config=conf/online_cmvn.conf' which is the setup of ivector-extractor,
   echo "$0: feature type is raw, with 'apply-cmvn-online'"
-  feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn-online $cmvn_opts $dir/global_cmvn.stats scp:- ark:- |"
-  valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts $dir/global_cmvn.stats scp:- ark:- |"
-  train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts $dir/global_cmvn.stats scp:- ark:- |"
+  feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$sdata/JOB/spk2utt $dir/global_cmvn.stats scp:- ark:- |"
+  valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$data/spk2utt $dir/global_cmvn.stats scp:- ark:- |"
+  train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$data/spk2utt $dir/global_cmvn.stats scp:- ark:- |"
 fi
 echo $cmvn_opts >$dir/cmvn_opts # caution: the top-level nnet training script should copy this to its own dir now.
 

diff --git a/egs/wsj/s5/steps/nnet3/decode.sh b/egs/wsj/s5/steps/nnet3/decode.sh
@@ -104,7 +104,7 @@ if ! $online_cmvn; then
 echo "$0: feature type is raw"
   feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
 else
-  feats="ark,s,cs:apply-cmvn-online $cmvn_opts $srcdir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- |"
+  feats="ark,s,cs:apply-cmvn-online $cmvn_opts --spk2utt=ark:$sdata/JOB/spk2utt $srcdir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- |"
 fi
 
 if [ ! -z "$online_ivector_dir" ]; then

diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh
@@ -54,7 +54,7 @@ cmvn_opts=  # can be used for specifying CMVN options, if feature type is not ld
 online_cmvn=false # Set to 'true' to replace 'apply-cmvn' by 'apply-cmvn-online' in the nnet3 input.
                   # The configuration is passed externally via '$cmvn_opts' given to train.py,
                   # typically as: --cmvn-opts="--config conf/online_cmvn.conf".
-                  # The global_cmvn.stats are computed by this script from the featutres.
+                  # The global_cmvn.stats are computed by this script from the features.
                   # Note: the online cmvn for ivector extractor it is controlled separately in
                   #       steps/online/nnet2/train_ivector_extractor.sh by --online-cmvn-iextractor
 
@@ -171,9 +171,9 @@ else
   # the alternative front-end with 'apply-cmvn-online',
   # - the $cmvn_opts can be set to '--config=conf/online_cmvn.conf' which is the setup of ivector-extractor,
   echo "$0: feature type is raw, with 'apply-cmvn-online'"
-  feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn-online $cmvn_opts $dir/global_cmvn.stats scp:- ark:- |"
-  valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts $dir/global_cmvn.stats scp:- ark:- |"
-  train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts $dir/global_cmvn.stats scp:- ark:- |"
+  feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$sdata/JOB/spk2utt  $dir/global_cmvn.stats scp:- ark:- |"
+  valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$data/spk2utt $dir/global_cmvn.stats scp:- ark:- |"
+  train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-online $cmvn_opts --spk2utt=ark:$data/spk2utt $dir/global_cmvn.stats scp:- ark:- |"
 fi
 echo $cmvn_opts >$dir/cmvn_opts # caution: the top-level nnet training script should copy this to its own dir now.