Skip to content

Commit

Permalink
[src]: Minor updates to sequence training and adjusting priors. (#1345)
Browse files Browse the repository at this point in the history
  • Loading branch information
vimalmanohar authored and danpovey committed Jan 17, 2017
1 parent d498cdb commit d3787c1
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 57 deletions.
17 changes: 7 additions & 10 deletions egs/wsj/s5/steps/nnet3/adjust_priors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ egs_type=egs # Compute from $egs_type.*.ark in $egs_dir
use_raw_nnet=false # If raw nnet, the averaged posterior is computed
# and stored in post.$iter.vec; but there is no
# adjusting of priors
minibatch_size=256
iter=final

. utils/parse_options.sh
Expand Down Expand Up @@ -59,28 +60,24 @@ fi

rm -f $dir/post.$iter.*.vec 2>/dev/null

left_context=`cat $egs_dir/info/left_context` || exit 1
right_context=`cat $egs_dir/info/right_context` || exit 1

context_opts="--left-context=$left_context --right-context=$right_context"

num_archives=$(cat $egs_dir/info/num_archives) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
if [ $num_jobs_compute_prior -gt $num_archives ]; then egs_part=1;
else egs_part=JOB; fi
if [ $num_jobs_compute_prior -gt $num_archives ]; then
num_jobs_compute_prior=$num_archives
fi

if [ $egs_type != "degs" ]; then
$cmd JOB=1:$num_jobs_compute_prior $prior_queue_opt $dir/log/get_post.$iter.JOB.log \
nnet3-copy-egs ark:$egs_dir/$egs_type.$egs_part.ark ark:- \| \
nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
nnet3-merge-egs ark:- ark:- \| \
nnet3-merge-egs --minibatch-size=$minibatch_size ark:- ark:- \| \
nnet3-compute-from-egs $prior_gpu_opt --apply-exp=true \
"$model" ark:- ark:- \| \
matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$iter.JOB.vec || exit 1;
else
$cmd JOB=1:$num_jobs_compute_prior $prior_queue_opt $dir/log/get_post.$iter.JOB.log \
nnet3-discriminative-copy-egs ark:$egs_dir/$egs_type.$egs_part.ark ark:- \| \
nnet3-discriminative-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
nnet3-discriminative-merge-egs ark:- ark:- \| \
nnet3-discriminative-merge-egs --minibatch-size=$minibatch_size ark:- ark:- \| \
nnet3-compute-from-degs $prior_gpu_opt --apply-exp=true \
"$model" ark:- ark:- \| \
matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$iter.JOB.vec || exit 1;
Expand All @@ -94,7 +91,7 @@ $cmd $dir/log/vector_sum.$iter.log \

if ! $use_raw_nnet; then
run.pl $dir/log/adjust_priors.$iter.log \
nnet3-am-adjust-priors $dir/$iter.mdl $dir/post.$iter.vec $dir/$iter.adj.mdl
nnet3-am-adjust-priors $dir/$iter.mdl $dir/post.$iter.vec $dir/${iter}_adj.mdl
fi

rm -f $dir/post.$iter.*.vec;
Expand Down
77 changes: 30 additions & 47 deletions egs/wsj/s5/steps/nnet3/train_discriminative.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,15 @@ shuffle_buffer_size=1000 # This "buffer_size" variable controls randomization of

stage=-3

adjust_priors=true # If true then it will

num_threads=16 # this is the default but you may want to change it, e.g. to 1 if
# using GPUs.

cleanup=true
keep_model_iters=1
keep_model_iters=100
remove_egs=false
src_model= # will default to $degs_dir/final.mdl

num_jobs_compute_prior=10

min_deriv_time=0
max_deriv_time_relative=0
Expand Down Expand Up @@ -129,11 +128,6 @@ done

silphonelist=`cat $degs_dir/info/silence.csl` || exit 1;

num_archives_priors=0
if $adjust_priors; then
num_archives_priors=`cat $degs_dir/info/num_archives_priors` || exit 1
fi

num_archives=$(cat $degs_dir/info/num_archives) || exit 1;
frame_subsampling_factor=$(cat $degs_dir/info/frame_subsampling_factor)

Expand Down Expand Up @@ -200,6 +194,8 @@ if [ $stage -le -1 ]; then

$cmd $dir/log/convert.log \
nnet3-am-copy --learning-rate=$learning_rate "$src_model" $dir/0.mdl || exit 1;

ln -sf 0.mdl $dir/epoch0.mdl
fi


Expand Down Expand Up @@ -307,28 +303,11 @@ while [ $x -lt $num_iters ]; do
nnet3-am-copy --set-raw-nnet=- $dir/$x.mdl $dir/$[$x+1].mdl || exit 1;

rm $nnets_list

if [ ! -z "${iter_to_epoch[$x]}" ]; then
e=${iter_to_epoch[$x]}
ln -sf $x.mdl $dir/epoch$e.mdl
fi

if $adjust_priors && [ ! -z "${iter_to_epoch[$x]}" ]; then
if [ ! -f $degs_dir/priors_egs.1.ark ]; then
echo "$0: Expecting $degs_dir/priors_egs.1.ark to exist since --adjust-priors was true."
echo "$0: Run this script with --adjust-priors false to not adjust priors"
exit 1
fi
(
e=${iter_to_epoch[$x]}
rm $dir/.error 2> /dev/null

steps/nnet3/adjust_priors.sh --egs-type priors_egs \
--num-jobs-compute-prior $num_archives_priors \
--cmd "$cmd" --use-gpu false \
--use-raw-nnet false --iter epoch$e $dir $degs_dir \
|| { touch $dir/.error; echo "Error in adjusting priors. See $dir/log/adjust_priors.epoch$e.log"; exit 1; }
) &
[ ! -f $dir/$[$x+1].mdl ] && echo "$0: Did not create $dir/$[$x+1].mdl" && exit 1;
if [ -f $dir/$[$x-1].mdl ] && $cleanup && \
[ $[($x-1)%$keep_model_iters] -ne 0 ] && \
[ -z "${iter_to_epoch[$[$x-1]]}" ]; then
rm $dir/$[$x-1].mdl
fi

[ -f $dir/.error ] && { echo "Found $dir/.error. Error on iteration $x"; exit 1; }
Expand All @@ -337,28 +316,27 @@ while [ $x -lt $num_iters ]; do
rm $dir/cache.$x 2>/dev/null || true
x=$[$x+1]
num_archives_processed=$[num_archives_processed+num_jobs_nnet]
done

rm $dir/final.mdl 2>/dev/null
cp $dir/$x.mdl $dir/final.mdl
ln -sf final.mdl $dir/epoch$num_epochs_expanded.mdl
if [ $stage -le $x ] && [ ! -z "${iter_to_epoch[$x]}" ]; then
e=${iter_to_epoch[$x]}
ln -sf $x.mdl $dir/epoch$e.mdl

if $adjust_priors && [ $stage -le $num_iters ]; then
if [ ! -f $degs_dir/priors_egs.1.ark ]; then
echo "$0: Expecting $degs_dir/priors_egs.1.ark to exist since --adjust-priors was true."
echo "$0: Run this script with --adjust-priors false to not adjust priors"
exit 1
fi
(
rm $dir/.error 2> /dev/null

steps/nnet3/adjust_priors.sh --egs-type priors_egs \
--num-jobs-compute-prior $num_archives_priors \
--cmd "$cmd $prior_queue_opt" --use-gpu false \
--use-raw-nnet false --iter epoch$num_epochs_expanded \
$dir $degs_dir || exit 1
fi
steps/nnet3/adjust_priors.sh --egs-type degs \
--num-jobs-compute-prior $num_jobs_compute_prior \
--cmd "$cmd" --use-gpu false \
--minibatch-size $minibatch_size \
--use-raw-nnet false --iter epoch$e $dir $degs_dir \
|| { touch $dir/.error; echo "Error in adjusting priors. See $dir/log/adjust_priors.epoch$e.log"; exit 1; }
) &
fi

echo Done
done

rm $dir/final.mdl 2>/dev/null
cp $dir/$x.mdl $dir/final.mdl

# function to remove egs that might be soft links.
remove () { for x in $*; do [ -L $x ] && rm $(readlink -f $x); rm $x; done }
Expand All @@ -379,3 +357,8 @@ if $cleanup; then
fi
done
fi

wait
[ -f $dir/.error ] && { echo "Found $dir/.error."; exit 1; }

echo Done && exit 0

0 comments on commit d3787c1

Please sign in to comment.