From 10f2fcba95cf8297ee610b30f0b2e7c1203b2e26 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 19 Jun 2019 14:46:57 -0400 Subject: [PATCH] [build,scripts] Dependency fix; add cross-references to scripts (#3400) --- egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py | 3 ++- egs/wsj/s5/utils/data/subsegment_data_dir.sh | 16 ++++++++-------- src/Makefile | 3 ++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py b/egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py index 61c9a3014aa..4463bc9fcf0 100755 --- a/egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py +++ b/egs/wsj/s5/utils/ctm/resolve_ctm_overlaps.py @@ -38,7 +38,8 @@ def get_args(): """gets command line arguments""" - usage = """ Python script to resolve overlaps in ctms """ + usage = """ Python script to resolve overlaps in ctms. May be used with + utils/data/subsegment_data_dir.sh. """ parser = argparse.ArgumentParser(usage) parser.add_argument('segments', type=argparse.FileType('r'), help='use segments to resolve overlaps') diff --git a/egs/wsj/s5/utils/data/subsegment_data_dir.sh b/egs/wsj/s5/utils/data/subsegment_data_dir.sh index 1b399ba730a..d077b851d23 100755 --- a/egs/wsj/s5/utils/data/subsegment_data_dir.sh +++ b/egs/wsj/s5/utils/data/subsegment_data_dir.sh @@ -44,6 +44,7 @@ if [ $# != 4 ] && [ $# != 3 ]; then echo " # not just applied to the input segments file, is that" echo " # for purposes of computing the num-frames of the parts of" echo " # matrices in feats.scp, the padding should not be done." + echo " See also: resolve_ctm_overlaps.py" exit 1; fi @@ -147,7 +148,7 @@ if [ -f $srcdir/feats.scp ]; then frame_shift=$(cat $srcdir/frame_shift) fi echo "$0: note: frame shift is $frame_shift [affects feats.scp]" - + # The subsegments format is . # e.g. 'utt_foo-1 utt_foo 7.21 8.93' # The first awk command replaces this with the format: @@ -167,31 +168,31 @@ if [ -f $srcdir/feats.scp ]; then # like pipes that might contain spaces, so it has to be able to produce output like the # following: # utt_foo-1 some command|[721:892] - # The 'end' frame is ensured to not exceed the feature archive size of + # The 'end' frame is ensured to not exceed the feature archive size of # . This is done using the script fix_subsegment_feats.pl. - # e.g if the number of frames in foo-bar.ark is 891, then the features are + # e.g if the number of frames in foo-bar.ark is 891, then the features are # truncated to that many frames. # utt_foo-1 foo-bar.ark:514231[721:890] # Lastly, utils/data/normalize_data_range.pl will only do something nontrivial if # the original data-dir already had data-ranges in square brackets. - + # Here, we computes the maximum 'end' frame allowed for each . # This is equal to the number of frames in the feature archive for . if [ ! -f $srcdir/utt2num_frames ]; then echo "$0: WARNING: Could not find $srcdir/utt2num_frames. It might take a long time to run get_utt2num_frames.sh." - echo "Increase the number of jobs or write this file while extracting features by passing --write-utt2num-frames true to steps/make_mfcc.sh etc." + echo "Increase the number of jobs or write this file while extracting features by passing --write-utt2num-frames true to steps/make_mfcc.sh etc." fi utils/data/get_utt2num_frames.sh --cmd "$cmd" --nj $nj $srcdir awk '{print $1" "$2}' $subsegments | \ utils/apply_map.pl -f 2 $srcdir/utt2num_frames > \ $dir/utt2max_frames - + awk -v s=$frame_shift '{print $1, $2, int(($3/s)+0.5), int(($4/s)-0.5);}' <$subsegments| \ utils/apply_map.pl -f 2 $srcdir/feats.scp | \ awk '{p=NF-1; for (n=1;n$dir/feats.scp || { echo "Failed to create $dir/feats.scp" && exit; } - + # Parse the frame ranges from feats.scp, which is in the form of [first-frame:last-frame] # and write the number-of-frames = last-frame - first-frame + 1 for the utterance. cat $dir/feats.scp | perl -ne 'm/^(\S+) .+\[(\d+):(\d+)\]$/; print "$1 " . ($3-$2+1) . "\n"' > \ @@ -236,4 +237,3 @@ done rm $dir/new2old_utt echo "$0: subsegmented data from $srcdir to $dir" - diff --git a/src/Makefile b/src/Makefile index 63d9f228f12..07b7947f3b1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -145,7 +145,8 @@ $(EXT_SUBDIRS) : checkversion kaldi.mk mklibdir ext_depend #1)The tools depend on all the libraries bin fstbin gmmbin fgmmbin sgmm2bin featbin cudafeatbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin rnnlmbin cudadecoderbin: \ base matrix util feat cudafeat tree gmm transform sgmm2 fstext hmm \ - lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 rnnlm + lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 rnnlm \ + cudadecoder #2)The libraries have inter-dependencies base: base/.depend.mk