Skip to content

Commit

Permalink
Merge pull request kaldi-asr#2 from kaldi-asr/master
Browse files Browse the repository at this point in the history
merge from kaldi-asr
  • Loading branch information
meixu song committed Nov 19, 2015
2 parents 4c8c072 + 892c7a2 commit 1bfb6c4
Show file tree
Hide file tree
Showing 11 changed files with 52 additions and 15 deletions.
10 changes: 6 additions & 4 deletions egs/swbd/s5c/local/nnet3/run_ivector_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,23 @@ if [ "$speed_perturb" == "true" ]; then
for datadir in train_nodup; do
utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
utils/combine_data.sh --extra-files utt2uniq data/${datadir}_tmp data/temp1 data/temp2
utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
rm -r data/temp1 data/temp2

mfccdir=mfcc_perturbed
steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
utils/fix_data_dir.sh data/${datadir}_tmp

utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
utils/fix_data_dir.sh data/${datadir}_sp
rm -r data/temp0 data/${datadir}_tmp
done
fi

if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then
#obtain the alignment of the perturbed data
steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
Expand Down
2 changes: 1 addition & 1 deletion egs/swbd/s5c/path.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export KALDI_ROOT=`pwd`/../../..
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH
#$KALDI_ROOT/tools/srilm/bin:$KALDI_ROOT/tools/srilm/bin/i686-m64:$KALDI_ROOT/tools/srilm/bin/i686:$PATH
export LC_ALL=C
26 changes: 26 additions & 0 deletions egs/wsj/s5/utils/combine_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,32 @@ for dir in $*; do
fi
done

# W.r.t. utt2uniq file the script has different behavior compared to other files
# it is not compulsary for it to exist in src directories, but if it exists in
# even one it should exist in all. We will create the files where necessary
has_utt2uniq=false
for in_dir in $*; do
if [ -f $in_dir/utt2uniq ]; then
has_utt2uniq=true
break
fi
done

if $has_utt2uniq; then
# we are going to create an utt2uniq file in the destdir
for in_dir in $*; do
if [ ! -f $in_dir/utt2uniq ]; then
# we assume that utt2uniq is a one to one mapping
cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
else
cat $in_dir/utt2uniq
fi
done | sort -k1 > $dest/utt2uniq
echo "$0: combined utt2uniq"
fi
# some of the old scripts might provide utt2uniq as an extrafile, so just remove it
extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")

for file in utt2spk utt2lang feats.scp text cmvn.scp segments reco2file_and_channel wav.scp spk2gender $extra_files; do
if [ -f $first_src/$file ]; then
( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
Expand Down
8 changes: 8 additions & 0 deletions egs/wsj/s5/utils/copy_data_dir.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ mkdir -p $destdir
cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map

if [ ! -f $srcdir/utt2uniq ]; then
if [[ ! -z $utt_prefix || ! -z $utt_suffix ]]; then
cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
fi
else
cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
fi

cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \
utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk

Expand Down
9 changes: 5 additions & 4 deletions egs/wsj/s5/utils/fix_data_dir.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ function check_sorted {
fi
}

for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp reco2file_and_channel spk2gender utt2lang; do
for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp reco2file_and_channel spk2gender utt2lang utt2uniq; do
if [ -f $data/$x ]; then
cp $data/$x $data/.backup/$x
check_sorted $data/$x
Expand Down Expand Up @@ -121,6 +121,9 @@ function filter_utts {
! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
echo "utt2spk is not in sorted order (fix this yourself)" && exit 1;

! cat $data/utt2uniq | sort | cmp - $data/utt2uniq && \
echo "utt2uniq is not in sorted order (fix this yourself)" && exit 1;

! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
Expand Down Expand Up @@ -151,7 +154,7 @@ function filter_utts {
fi
fi

for x in utt2spk feats.scp vad.scp text segments utt2lang $maybe_wav; do
for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang $maybe_wav; do
if [ -f $data/$x ]; then
cp $data/$x $data/.backup/$x
if ! cmp -s $data/$x <( utils/filter_scp.pl $tmpdir/utts $data/$x ) ; then
Expand All @@ -168,8 +171,6 @@ filter_utts
filter_speakers
filter_recordings



utils/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt

echo "fix_data_dir.sh: old files are kept in $data/.backup"
2 changes: 1 addition & 1 deletion egs/wsj/s5/utils/validate_data_dir.sh
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ if [ -f $data/utt2warp ]; then
fi

# check some optionally-required things
for f in vad.scp utt2lang; do
for f in vad.scp utt2lang utt2uniq; do
if [ -f $data/$f ]; then
check_sorted_and_uniq $data/$f
if ! cmp -s <( awk '{print $1}' $data/utt2spk ) \
Expand Down
2 changes: 1 addition & 1 deletion src/hmm/posterior-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void TestPosteriorIo() {
KALDI_ASSERT(post[i].size() == post2[i].size());
for (int32 j = 0; j < post[i].size(); j++) {
KALDI_ASSERT(post[i][j].first == post2[i][j].first &&
fabs(post[i][j].second - post2[i][j].second < 0.01));
fabs(post[i][j].second - post2[i][j].second) < 0.01);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/nnet/nnet-component-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ int main() {
using namespace kaldi;
using namespace kaldi::nnet1;

for (int32 loop = 0; loop < 2; loop++) {
for (kaldi::int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId("no"); // use no GPU
Expand Down
2 changes: 1 addition & 1 deletion src/nnet3/nnet-component-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ int main() {
using namespace kaldi;
using namespace kaldi::nnet3;

for (int32 loop = 0; loop < 2; loop++) {
for (kaldi::int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId("no");
Expand Down
2 changes: 1 addition & 1 deletion src/nnet3/nnet-compute-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ int main() {
//SetVerboseLevel(2);


for (int32 loop = 0; loop < 2; loop++) {
for (kaldi::int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId("no");
Expand Down
2 changes: 1 addition & 1 deletion src/nnet3/nnet-derivative-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ int main() {
//SetVerboseLevel(2);


for (int32 loop = 0; loop < 2; loop++) {
for (kaldi::int32 loop = 0; loop < 2; loop++) {
#if HAVE_CUDA == 1
if (loop == 0)
CuDevice::Instantiate().SelectGpuId("no");
Expand Down

0 comments on commit 1bfb6c4

Please sign in to comment.