diff --git a/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh index 9f132bdbda1..029070422a8 100755 --- a/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh +++ b/egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh @@ -41,18 +41,25 @@ done # Set various variables. mkdir -p $dir/log mkdir -p $data_out -featdir=${PWD}/$dir +featdir=$(utils/make_absolute.sh $dir) -cp $data_in/utt2spk $data_out/utt2spk -cp $data_in/spk2utt $data_out/spk2utt -cp $data_in/wav.scp $data_out/wav.scp +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b{14,15,16,17}/$USER/kaldi-data/egs/sre16/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_feats/storage $featdir/storage +fi for n in $(seq $nj); do # the next command does nothing unless $featdir/storage/ exists, see # utils/create_data_link.pl for more info. - utils/create_data_link.pl $featdir/xvector_feats_${name}.$n.ark + utils/create_data_link.pl $featdir/xvector_feats_${name}.${n}.ark done +cp $data_in/utt2spk $data_out/utt2spk +cp $data_in/spk2utt $data_out/spk2utt +cp $data_in/wav.scp $data_out/wav.scp + +write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB" + sdata_in=$data_in/split$nj; utils/split_data.sh $data_in $nj || exit 1; @@ -60,11 +67,16 @@ $cmd JOB=1:$nj $dir/log/create_xvector_feats_${name}.JOB.log \ apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \ scp:${sdata_in}/JOB/feats.scp ark:- \| \ select-voiced-frames ark:- scp,s,cs:${sdata_in}/JOB/vad.scp ark:- \| \ - copy-feats --compress=$compress ark:- \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$featdir/xvector_feats_${name}.JOB.ark,$featdir/xvector_feats_${name}.JOB.scp || exit 1; for n in $(seq $nj); do cat $featdir/xvector_feats_${name}.$n.scp || exit 1; done > ${data_out}/feats.scp || exit 1 +for n in $(seq $nj); do + cat $featdir/log/utt2num_frames.$n || exit 1; +done > $data_out/utt2num_frames || exit 1 +rm $featdir/log/utt2num_frames.* + echo "$0: Succeeded creating xvector features for $name" diff --git a/egs/sre16/v2/run.sh b/egs/sre16/v2/run.sh index 3675823fdf9..375b07c0e53 100755 --- a/egs/sre16/v2/run.sh +++ b/egs/sre16/v2/run.sh @@ -82,15 +82,19 @@ fi if [ $stage -le 1 ]; then # Make filterbanks and compute the energy-based VAD for each dataset + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b{14,15,16,17}/$USER/kaldi-data/egs/sre16/v2/xvector-$(date +'%m_%d_%H_%M')/mfccs/storage $mfccdir/storage + fi for name in sre swbd sre16_eval_enroll sre16_eval_test sre16_major; do - steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \ + steps/make_mfcc.sh --write-utt2num-frames true --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \ data/${name} exp/make_mfcc $mfccdir utils/fix_data_dir.sh data/${name} sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \ data/${name} exp/make_vad $vaddir utils/fix_data_dir.sh data/${name} done - utils/combine_data.sh data/swbd_sre data/swbd data/sre + utils/combine_data.sh --extra-files "utt2num_frames" data/swbd_sre data/swbd data/sre utils/fix_data_dir.sh data/swbd_sre fi @@ -99,7 +103,6 @@ fi # The combined list will be used to train the xvector DNN. The SRE # subset will be used to train the PLDA model. if [ $stage -le 2 ]; then - utils/data/get_utt2num_frames.sh --nj 40 --cmd "$train_cmd" data/swbd_sre frame_shift=0.01 awk -v frame_shift=$frame_shift '{print $1, $2*frame_shift;}' data/swbd_sre/utt2num_frames > data/swbd_sre/reco2dur @@ -180,8 +183,6 @@ if [ $stage -le 3 ]; then local/nnet3/xvector/prepare_feats_for_egs.sh --nj 40 --cmd "$train_cmd" \ data/swbd_sre_combined data/swbd_sre_combined_no_sil exp/swbd_sre_combined_no_sil utils/fix_data_dir.sh data/swbd_sre_combined_no_sil - utils/data/get_utt2num_frames.sh --nj 40 --cmd "$train_cmd" data/swbd_sre_combined_no_sil - utils/fix_data_dir.sh data/swbd_sre_combined_no_sil # Now, we need to remove features that are too short after removing silence # frames. We want atleast 5s (500 frames) per utterance. @@ -203,7 +204,7 @@ if [ $stage -le 3 ]; then utils/filter_scp.pl data/swbd_sre_combined_no_sil/utt2spk data/swbd_sre_combined_no_sil/utt2num_frames > data/swbd_sre_combined_no_sil/utt2num_frames.new mv data/swbd_sre_combined_no_sil/utt2num_frames.new data/swbd_sre_combined_no_sil/utt2num_frames - # Now we're reaady to create training examples. + # Now we're ready to create training examples. utils/fix_data_dir.sh data/swbd_sre_combined_no_sil fi