Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updating fisher_swbd nnet3/chain scripts: removing non-chain training… #2136

Merged
merged 2 commits into from Jan 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 5 additions & 1 deletion egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh
Expand Up @@ -112,8 +112,12 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text
# assume swbd_p1_data_prep.sh was done already.
[ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1;

rm $dir/lexicon0.txt 2>/dev/null
cp $srcdict $dir/lexicon0.txt || exit 1;
patch <local/dict.patch $dir/lexicon0.txt || exit 1;
chmod +w $srcdict $dir/lexicon0.txt

# Use absolute path in case patch reports the "Invalid file name" error (a bug with patch)
patch <local/dict.patch `pwd`/$dir/lexicon0.txt || exit 1;

#(2a) Dictionary preparation:
# Pre-processing (remove comments)
Expand Down
97 changes: 47 additions & 50 deletions egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh 100644 → 100755
Expand Up @@ -4,7 +4,7 @@
set -e
stage=1
train_stage=-10
generate_alignments=true # false if doing chain training
generate_alignments=false # false if doing chain training
speed_perturb=true

. ./path.sh
Expand All @@ -16,60 +16,42 @@ if [ "$speed_perturb" == "true" ]; then
if [ $stage -le 1 ]; then
#Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
# _sp stands for speed-perturbed

for datadir in train_nodup; do
utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
rm -r data/temp1 data/temp2

mfccdir=mfcc_perturbed
steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
utils/fix_data_dir.sh data/${datadir}_tmp

utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
utils/fix_data_dir.sh data/${datadir}_sp
rm -r data/temp0 data/${datadir}_tmp
done
echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)"
utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
echo "$0: making MFCC features for low-resolution speed-perturbed data"
steps/make_mfcc.sh --nj 70 --cmd "$train_cmd" \
data/${train_set}_sp || exit 1
steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1
utils/fix_data_dir.sh data/${train_set}_sp || exit 1
fi

if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then
#obtain the alignment of the perturbed data
steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
data/train_nodup_sp data/lang exp/tri5a exp/tri5a_ali_nodup_sp || exit 1
data/${train_set}_sp data/lang exp/tri5a exp/tri5a_ali_nodup_sp || exit 1
fi
train_set=train_nodup_sp
train_set=${train_set}_sp
fi

if [ $stage -le 3 ]; then
# Create high-resolution MFCC features (with 40 cepstra instead of 13).
# this shows how you can split across multiple file-systems.
echo "$0: creating high-resolution MFCC features"
mfccdir=mfcc_hires
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
date=$(date +'%m_%d_%H_%M')
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/mfcc/fisher_swbd-$date/s5b/$mfccdir/storage $mfccdir/storage
fi

# the 100k_nodup directory is copied seperately, as
# we want to use exp/tri1b_ali_100k_nodup for lda_mllt training
# we want to use exp/tri1b_ali_100k_nodup for ivector extractor training
# the main train directory might be speed_perturbed
for dataset in $train_set train_100k_nodup; do
utils/copy_data_dir.sh data/$dataset data/${dataset}_hires

# scale the waveforms, this is useful as we don't use CMVN
data_dir=data/${dataset}_hires
cat $data_dir/wav.scp | python -c "
import sys, os, subprocess, re, random
scale_low = 1.0/8
scale_high = 2.0
for line in sys.stdin.readlines():
if len(line.strip()) == 0:
continue
print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high))
"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1;
mv $data_dir/wav.scp_scaled $data_dir/wav.scp
# do volume-perturbation on the training data prior to extracting hires
# features; this helps make trained nnets more invariant to test data volume.
utils/data/perturb_data_dir_volume.sh data/${dataset}_hires

steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir;
Expand All @@ -95,46 +77,61 @@ for line in sys.stdin.readlines():
utils/data/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires # 33hr
fi

# ivector extractor training
if [ $stage -le 5 ]; then
# We need to build a small system just because we need the LDA+MLLT transform
# to train the diag-UBM on top of. We use --num-iters 13 because after we get
# the transform (12th iter is the last), any further training is pointless.
# this decision is based on fisher_english
steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
echo "$0: computing a PCA transform from the hires data."
steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \
--splice-opts "--left-context=3 --right-context=3" \
5500 90000 data/train_100k_nodup_hires \
data/lang exp/tri1b_ali exp/nnet3/tri2b
--max-utts 10000 --subsample 2 \
data/${train_set}_30k_nodup_hires exp/nnet3/pca
fi

if [ $stage -le 6 ]; then
# To train a diagonal UBM we don't need very much data, so use the smallest subset.
steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
data/${train_set}_30k_nodup_hires 512 exp/nnet3/tri2b exp/nnet3/diag_ubm
echo "$0: training the diagonal UBM."
steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
data/${train_set}_30k_nodup_hires 512 exp/nnet3/pca exp/nnet3/diag_ubm
fi

if [ $stage -le 7 ]; then
# iVector extractors can be sensitive to the amount of data, but this one has a
# fairly small dim (defaults to 100) so we don't use all of it, we use just the
# 100k subset (just under half the data).
echo "$0: training the iVector extractor"
steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
data/train_100k_nodup_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1;
fi

if [ $stage -le 8 ]; then
# We extract iVectors on all the train_nodup data, which will be what we
# train the system on.
# We extract iVectors on the speed-perturbed training data after combining
# short segments, which will be what we train the system on. With
# --utts-per-spk-max 2, the script pairs the utterances into twos, and treats
# each of these pairs as one speaker; this gives more diversity in iVectors..
# Note that these are extracted 'online'.

# note, we don't encode the 'max2' in the name of the ivectordir even though
# that's the data we extract the ivectors from, as it's still going to be
# valid for the non-'max2' data, the utterance list is the same.

ivectordir=exp/nnet3/ivectors_${train_set}
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/ivectors/fisher_swbd-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
fi


# having a larger number of speakers is helpful for generalization, and to
# handle per-utterance decoding well (iVector starts at zero).
steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires
temp_data_root=${ivectordir}
utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \
data/${train_set}_hires ${temp_data_root}/${train_set}_hires_max2

steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1;
${temp_data_root}/${train_set}_hires_max2 \
exp/nnet3/extractor $ivectordir

# Also extract iVectors for the test data
for data_set in eval2000 rt03; do
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
data/${data_set}_hires exp/nnet3/extractor exp/nnet3/ivectors_$data_set || exit 1;
data/${data_set}_hires exp/nnet3/extractor exp/nnet3/ivectors_${data_set} || exit 1;
done
fi

Expand Down
158 changes: 0 additions & 158 deletions egs/fisher_swbd/s5/local/nnet3/run_lstm.sh

This file was deleted.