From 246b9825340fdd5968bd81cd9d514fcf4d799064 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 3 Jan 2019 18:17:12 -0500
Subject: [PATCH 01/18] adding extra corpus for LM, minor cleaning

---
 .../v1/local/chain/tuning/run_e2e_cnn_1a.sh   | 50 ++++---------------
 egs/madcat_ar/v1/local/train_lm.sh            | 18 ++++---
 egs/madcat_ar/v1/run_end2end.sh               | 41 ++++++++-------
 3 files changed, 45 insertions(+), 64 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 2891e50da9e..dc6ed4b7c2b 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -3,24 +3,24 @@
 
 # This script does end2end chain training (i.e. from scratch)
 
-# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
 # System                      e2e_cnn_1a
-# WER                              7.81
-# CER                              2.05
-# Final train prob              -0.0812
-# Final valid prob              -0.0708
+# WER                              5.73
+# WER (rescored)                   5.67
+# CER                              1.45
+# CER (rescored)                   1.42
+# Final train prob              -0.0934
+# Final valid prob              -0.0746
 # Final train prob (xent)
 # Final valid prob (xent)
 # Parameters                      2.94M
 
 # steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
-# exp/chain/e2e_cnn_1a/: num-iters=98 nj=6..16 num-params=2.9M dim=40->330 combine=-0.073->-0.073 (over 2) logprob:train/valid[64,97,final]=(-0.084,-0.080,-0.081/-0.073,-0.070,-0.071)
-
+# exp/chain/e2e_cnn_1a/: num-iters=98 nj=6..16 num-params=2.9M dim=40->330 combine=-0.071->-0.070 (over 5) logprob:train/valid[64,97,final]=(-0.089,-0.084,-0.093/-0.075,-0.073,-0.075)
 set -e
 
 # configs for 'chain'
 stage=0
-nj=70
 train_stage=-10
 get_egs_stage=-10
 affix=1a
@@ -31,9 +31,6 @@ minibatch_size=150=128,64/300=128,64/600=64,32/1200=32,16
 common_egs_dir=
 cmvn_opts="--norm-means=false --norm-vars=false"
 train_set=train
-lang_decode=data/lang
-lang_rescore=data/lang_rescore_6g
-
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -67,7 +64,7 @@ if [ $stage -le 0 ]; then
 fi
 
 if [ $stage -le 1 ]; then
-  steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
                                        --shared-phones true \
                                        --type mono \
                                        data/$train_set $lang $treedir
@@ -107,9 +104,6 @@ EOF
 fi
 
 if [ $stage -le 3 ]; then
-  # no need to store the egs in a shared storage because we always
-  # remove them. Anyway, it takes only 5 minutes to generate them.
-
   steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
     --cmd "$cmd" \
     --feat.cmvn-opts "$cmvn_opts" \
@@ -138,29 +132,3 @@ if [ $stage -le 3 ]; then
     --tree-dir $treedir \
     --dir $dir  || exit 1;
 fi
-
-if [ $stage -le 4 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 $lang_decode \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-
-  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
-                                data/test $dir/decode_test{,_rescored} || exit 1
-fi
-
-echo "Done. Date: $(date). Results:"
-local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/train_lm.sh b/egs/madcat_ar/v1/local/train_lm.sh
index b7fc0b09a46..903b288a834 100755
--- a/egs/madcat_ar/v1/local/train_lm.sh
+++ b/egs/madcat_ar/v1/local/train_lm.sh
@@ -64,6 +64,12 @@ if [ $stage -le 0 ]; then
   # we can later fold the dev data into this.
   cat data/train/text | cut -d " " -f 2- >  ${dir}/data/text/train.txt
 
+  if [ -d "data/local/gigawordcorpus/arb_gw_5/data" ]; then
+    cat data/local/gigawordcorpus/arb_gw_5/data/nhr_arb_combined.txt | \
+      utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \
+      | sed 's/@@//g' > ${dir}/data/text/corpus_text.txt
+  fi
+
   # for reporting perplexities, we'll use the "real" dev set.
   # (the validation data is used as ${dir}/data/text/dev.txt to work
   # out interpolation weights.)
@@ -72,7 +78,7 @@ if [ $stage -le 0 ]; then
   cut -d " " -f 2-  < data/test/text  > ${dir}/data/real_dev_set.txt
 
   # get the wordlist from MADCAT text
-  cat ${dir}/data/text/train.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
+  cat ${dir}/data/text/{train,corpus_text}.txt | tr '[:space:]' '[\n*]' | grep -v "^\s*$" | sort | uniq -c | sort -bnr > ${dir}/data/word_count
   cat ${dir}/data/word_count | awk '{print $2}' > ${dir}/data/wordlist
 fi
 
@@ -83,7 +89,7 @@ if [ $stage -le 1 ]; then
   # Note: if you have more than one order, use a certain amount of words as the
   # vocab and want to restrict max memory for 'sort',
   echo "$0: training the unpruned LM"
-  min_counts='train=1'
+  min_counts='corpus_text=2 train=1'
   wordlist=${dir}/data/wordlist
 
   lm_name="`basename ${wordlist}`_${order}"
@@ -103,8 +109,8 @@ fi
 
 if [ $stage -le 2 ]; then
   echo "$0: pruning the LM (to larger size)"
-  # Using 1 million n-grams for a big LM for rescoring purposes.
-  size=1000000
+  # Using 20 million n-grams for a big LM for rescoring purposes.
+  size=20000000
   prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big
 
   get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity'
@@ -114,9 +120,9 @@ fi
 
 if [ $stage -le 3 ]; then
   echo "$0: pruning the LM (to smaller size)"
-  # Using 500k n-grams for a smaller LM for graph building.  Prune from the
+  # Using 10 million n-grams for a smaller LM for graph building.  Prune from the
   # bigger-pruned LM, it'll be faster.
-  size=500000
+  size=10000000
   prune_lm_dir.py --target-num-ngrams=$size ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small
 
   get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity'
diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh
index bb2b4f86db1..62f4eeb7c71 100755
--- a/egs/madcat_ar/v1/run_end2end.sh
+++ b/egs/madcat_ar/v1/run_end2end.sh
@@ -36,6 +36,8 @@ if [ $stage -le 0 ]; then
     echo "Exiting with status 1 to avoid data corruption"
     exit 1;
   fi
+
+  echo "$0: preparing data...$(date)"
   local/prepare_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
                          --download_dir2 $download_dir2 --download_dir3 $download_dir3 \
                          --use_extra_corpus_text $use_extra_corpus_text
@@ -64,7 +66,7 @@ if [ $stage -le 1 ]; then
   image/get_image2num_frames.py data/train
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
 
-  for set in test train; do
+  for set in test dev train; do
     echo "$0: Extracting features and calling compute_cmvn_stats for dataset:  $set. $(date)"
     local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$set
     steps/compute_cmvn_stats.sh data/$set || exit 1;
@@ -99,28 +101,33 @@ if [ $stage -le 2 ]; then
 fi
 
 if [ $stage -le 3 ]; then
+  echo "$0: Calling the flat-start chain recipe... $(date)."
+  local/chain/run_e2e_cnn.sh
+fi
+
+lang_decode=data/lang
+lang_rescore=data/lang_rescore_6g
+decode_e2e=true
+if [ $stage -le 4 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
   utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang
+                     data/local/dict/lexicon.txt $lang_decode
   utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \
-                               data/lang data/lang_rescore_6g
+                               data/lang $lang_rescore
 fi
 
-if [ $stage -le 4 ]; then
-  echo "$0: Calling the flat-start chain recipe... $(date)."
-  local/chain/run_e2e_cnn.sh --nj $nj
-fi
+if [ $stage -le 5 ] && $decode_e2e; then
+  echo "$0: $(date) stage 5: decoding end2end setup..."
+  utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode \
+    exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1;
 
-if [ $stage -le 5 ]; then
-  echo "$0: Aligning the training data using the e2e chain model...$(date)."
-  steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
-                       --use-gpu false \
-                       --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
-fi
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 --nj $nj --cmd "$cmd" \
+    exp/chain/e2e_cnn_1a/graph data/test exp/chain/e2e_cnn_1a/decode_test || exit 1;
+
+  steps/lmrescore_const_arpa.sh --cmd "$cmd" $lang_decode $lang_rescore \
+    data/test exp/chain/e2e_cnn_1a/decode_test{,_rescored} || exit 1
 
-if [ $stage -le 6 ]; then
-  echo "$0: Building a tree and training a regular chain model using the e2e alignments...$(date)"
-  local/chain/run_cnn_e2eali.sh --nj $nj
+  echo "$0: Done. Date: $(date). Results:"
+  local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
 fi

From ff212a05a82c168cb40e8e3a60b356acda50d44c Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Tue, 8 Jan 2019 23:23:43 -0500
Subject: [PATCH 02/18] making BPE and word-based setups similar

---
 egs/iam/v1/local/augment_data.sh              |  34 +++
 egs/iam/v1/local/chain/run_cnn.sh             |   1 +
 egs/iam/v1/local/chain/run_cnn_chainali.sh    |   1 +
 egs/iam/v1/local/chain/run_cnn_e2eali.sh      |   1 +
 egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh   |   2 +-
 egs/iam/v1/local/chain/run_e2e_cnn.sh         |   1 +
 .../v1/local/chain/{ => tuning}/run_cnn_1a.sh |   0
 .../chain/{ => tuning}/run_cnn_chainali_1a.sh |   0
 .../chain/{ => tuning}/run_cnn_chainali_1b.sh |   0
 .../chain/{ => tuning}/run_cnn_chainali_1c.sh |   0
 .../chain/{ => tuning}/run_cnn_chainali_1d.sh |   0
 .../chain/{ => tuning}/run_cnn_e2eali_1a.sh   |   0
 .../local/chain/tuning/run_cnn_e2eali_1b.sh   | 244 +++++++++++++++++
 .../local/chain/tuning/run_cnn_e2eali_1c.sh   | 251 ++++++++++++++++++
 .../run_e2e_cnn_1a.sh}                        |  81 +++---
 egs/iam/v1/local/extract_features.sh          |  48 ++++
 egs/iam/v1/local/gen_topo.py                  |  92 +++++++
 egs/iam/v1/local/make_features.py             | 193 ++++++++++++--
 egs/iam/v1/local/prepare_data.sh              |   5 +-
 egs/iam/v1/run_end2end.sh                     |  53 ++--
 20 files changed, 911 insertions(+), 96 deletions(-)
 create mode 100755 egs/iam/v1/local/augment_data.sh
 create mode 120000 egs/iam/v1/local/chain/run_cnn.sh
 create mode 120000 egs/iam/v1/local/chain/run_cnn_chainali.sh
 create mode 120000 egs/iam/v1/local/chain/run_cnn_e2eali.sh
 create mode 120000 egs/iam/v1/local/chain/run_e2e_cnn.sh
 rename egs/iam/v1/local/chain/{ => tuning}/run_cnn_1a.sh (100%)
 rename egs/iam/v1/local/chain/{ => tuning}/run_cnn_chainali_1a.sh (100%)
 rename egs/iam/v1/local/chain/{ => tuning}/run_cnn_chainali_1b.sh (100%)
 rename egs/iam/v1/local/chain/{ => tuning}/run_cnn_chainali_1c.sh (100%)
 rename egs/iam/v1/local/chain/{ => tuning}/run_cnn_chainali_1d.sh (100%)
 rename egs/iam/v1/local/chain/{ => tuning}/run_cnn_e2eali_1a.sh (100%)
 create mode 100755 egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
 create mode 100755 egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
 rename egs/iam/v1/local/chain/{run_flatstart_cnn1a.sh => tuning/run_e2e_cnn_1a.sh} (68%)
 create mode 100755 egs/iam/v1/local/extract_features.sh
 create mode 100755 egs/iam/v1/local/gen_topo.py

diff --git a/egs/iam/v1/local/augment_data.sh b/egs/iam/v1/local/augment_data.sh
new file mode 100755
index 00000000000..31e4a8217ca
--- /dev/null
+++ b/egs/iam/v1/local/augment_data.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright   2018 Hossein Hadian
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script performs data augmentation.
+
+nj=4
+cmd=run.pl
+feat_dim=40
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+srcdir=$1
+outdir=$2
+datadir=$3
+aug_set=aug1
+mkdir -p $datadir/augmentations
+echo "copying $srcdir to $datadir/augmentations/$aug_set, allowed length, creating feats.scp"
+
+for set in $aug_set; do
+  image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
+    $srcdir $datadir/augmentations/$set
+  cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
+    --fliplr false --augment true $datadir/augmentations/$set
+done
+
+echo " combine original data and data from different augmentations"
+utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/$aug_set
+cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
diff --git a/egs/iam/v1/local/chain/run_cnn.sh b/egs/iam/v1/local/chain/run_cnn.sh
new file mode 120000
index 00000000000..df6f0a468c1
--- /dev/null
+++ b/egs/iam/v1/local/chain/run_cnn.sh
@@ -0,0 +1 @@
+tuning/run_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/iam/v1/local/chain/run_cnn_chainali.sh b/egs/iam/v1/local/chain/run_cnn_chainali.sh
new file mode 120000
index 00000000000..41b712609c2
--- /dev/null
+++ b/egs/iam/v1/local/chain/run_cnn_chainali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_chainali_1d.sh
\ No newline at end of file
diff --git a/egs/iam/v1/local/chain/run_cnn_e2eali.sh b/egs/iam/v1/local/chain/run_cnn_e2eali.sh
new file mode 120000
index 00000000000..ad51803ab0e
--- /dev/null
+++ b/egs/iam/v1/local/chain/run_cnn_e2eali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_e2eali_1c.sh
\ No newline at end of file
diff --git a/egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh
index 6d8cca876bf..1e3aed66c6b 100755
--- a/egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh
@@ -23,7 +23,7 @@ nj=30
 train_set=train
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-e2echain_model_dir=exp/chain/e2e_cnn_1a
+e2echain_model_dir=exp/chain/e2e_cnn_1b
 common_egs_dir=
 reporting_email=
 
diff --git a/egs/iam/v1/local/chain/run_e2e_cnn.sh b/egs/iam/v1/local/chain/run_e2e_cnn.sh
new file mode 120000
index 00000000000..d26ba0182ce
--- /dev/null
+++ b/egs/iam/v1/local/chain/run_e2e_cnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/iam/v1/local/chain/run_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
similarity index 100%
rename from egs/iam/v1/local/chain/run_cnn_1a.sh
rename to egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
diff --git a/egs/iam/v1/local/chain/run_cnn_chainali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
similarity index 100%
rename from egs/iam/v1/local/chain/run_cnn_chainali_1a.sh
rename to egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
diff --git a/egs/iam/v1/local/chain/run_cnn_chainali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
similarity index 100%
rename from egs/iam/v1/local/chain/run_cnn_chainali_1b.sh
rename to egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
diff --git a/egs/iam/v1/local/chain/run_cnn_chainali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
similarity index 100%
rename from egs/iam/v1/local/chain/run_cnn_chainali_1c.sh
rename to egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
diff --git a/egs/iam/v1/local/chain/run_cnn_chainali_1d.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
similarity index 100%
rename from egs/iam/v1/local/chain/run_cnn_chainali_1d.sh
rename to egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
diff --git a/egs/iam/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
similarity index 100%
rename from egs/iam/v1/local/chain/run_cnn_e2eali_1a.sh
rename to egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
new file mode 100755
index 00000000000..6d8cca876bf
--- /dev/null
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+
+# e2eali_1b is the same as e2eali_1a but uses unconstrained egs
+
+# local/chain/compare_wer.sh /home/hhadian/kaldi-rnnlm/egs/iam/v1/exp/chain/cnn_e2eali_1a exp/chain/cnn_e2eali_1b
+# System                      cnn_e2eali_1a cnn_e2eali_1b
+# WER                             12.79     12.23
+# CER                              5.73      5.48
+# Final train prob              -0.0556   -0.0367
+# Final valid prob              -0.0795   -0.0592
+# Final train prob (xent)       -0.9178   -0.8382
+# Final valid prob (xent)       -1.0604   -0.9853
+# Parameters                      3.95M     3.95M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
+# exp/chain/cnn_e2eali_1b: num-iters=21 nj=2..4 num-params=4.0M dim=40->360 combine=-0.038->-0.038 (over 1) xent:train/valid[13,20,final]=(-1.34,-0.967,-0.838/-1.40,-1.07,-0.985) logprob:train/valid[13,20,final]=(-0.075,-0.054,-0.037/-0.083,-0.072,-0.059)
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+e2echain_model_dir=exp/chain/e2e_cnn_1a
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=450
+# training options
+srand=0
+remove_egs=true
+lang_test=lang_unk
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.075"
+  tdnn_opts="l2-regularize=0.075"
+  output_opts="l2-regularize=0.1"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
+  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=false \
+    --chain.lm-opts="--num-extra-lm-states=500" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=4 \
+    --trainer.frames-per-iter=1000000 \
+    --trainer.optimization.num-jobs-initial=2 \
+    --trainer.optimization.num-jobs-final=4 \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=64,32 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 data/$lang_test \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+    --extra-left-context $chunk_left_context \
+    --extra-right-context $chunk_right_context \
+    --extra-left-context-initial 0 \
+    --extra-right-context-final 0 \
+    --frames-per-chunk $frames_per_chunk \
+    --nj $nj --cmd "$cmd" \
+    $dir/graph data/test $dir/decode_test || exit 1;
+fi
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
new file mode 100755
index 00000000000..cf90193f7eb
--- /dev/null
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -0,0 +1,251 @@
+#!/bin/bash
+
+# e2eali_1c is the same as e2eali_1b but has more CNN layers, different filter size
+# smaller lm-opts, minibatch, frams-per-iter, less epochs and more initial/finaljobs.
+
+# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1c
+# System                      cnn_e2eali_1c (dict_50k) cnn_e2eali_1c (dict_500k)
+# WER                             12.20                    9.62
+# CER                              5.29                    4.33
+# Final train prob              -0.0494                 -0.0494
+# Final valid prob              -0.0644                 -0.0644
+# Final train prob (xent)       -0.4852                 -0.4852
+# Final valid prob (xent)       -0.5437                 -0.5437
+# Parameters                      4.33M                   4.33M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1c
+# exp/chain/cnn_e2eali_1c: num-iters=30 nj=3..5 num-params=4.3M dim=40->376 combine=-0.052->-0.052 (over 1) xent:train/valid[19,29,final]=(-0.715,-0.508,-0.485/-0.717,-0.562,-0.544) logprob:train/valid[19,29,final]=(-0.089,-0.054,-0.049/-0.100,-0.070,-0.064)
+
+
+set -e -o pipefail
+
+stage=0
+
+nj=30
+train_set=train
+decode_val=true
+nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
+affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+e2echain_model_dir=exp/chain/e2e_cnn_1b
+common_egs_dir=
+reporting_email=
+
+# chain options
+train_stage=-10
+xent_regularize=0.1
+frame_subsampling_factor=4
+# training chunk-options
+chunk_width=340,300,200,100
+num_leaves=500
+# we don't need extra left/right context for TDNN systems.
+chunk_left_context=0
+chunk_right_context=0
+tdnn_dim=550
+# training options
+srand=0
+remove_egs=true
+lang_decode=data/lang_test
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
+dropout_schedule='0,0@0.20,0.2@0.50,0'
+# End configuration section.
+echo "$0 $@"  # Print the command line for logging
+
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+ali_dir=exp/chain/e2e_ali_train
+lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
+dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
+train_data_dir=data/${train_set}
+tree_dir=exp/chain${nnet3_affix}/tree_e2e
+
+# the 'lang' directory is created by this script.
+# If you create such a directory with a non-standard topology
+# you should probably name it differently.
+lang=data/lang_chain
+for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
+done
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: creating lang directory $lang with chain-type topology"
+  # Create a version of the lang/ directory that has one state per phone in the
+  # topo file. [note, it really has two states.. the first one is only repeated
+  # once, the second one has zero or more repeats.]
+  if [ -d $lang ]; then
+    if [ $lang/L.fst -nt data/lang/L.fst ]; then
+      echo "$0: $lang already exists, not overwriting it; continuing"
+    else
+      echo "$0: $lang already exists and seems to be older than data/lang..."
+      echo " ... not sure what to do.  Exiting."
+      exit 1;
+    fi
+  else
+    cp -r data/lang $lang
+    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
+    # Use our special topology... note that later on may have to tune this
+    # topology.
+    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  # Get the alignments as lattices (gives the chain training more freedom).
+  # use the same num-jobs as the alignments
+  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
+                            --acoustic-scale 1.0 \
+                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
+                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+  echo "" >$lat_dir/splice_opts
+fi
+
+if [ $stage -le 3 ]; then
+  # Build a tree using our new topology.  We know we have alignments for the
+  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
+  # those.  The num-leaves is always somewhat less than the num-leaves from
+  # the GMM baseline.
+  if [ -f $tree_dir/final.mdl ]; then
+    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
+    exit 1;
+  fi
+
+  steps/nnet3/chain/build_tree.sh \
+    --frame-subsampling-factor $frame_subsampling_factor \
+    --alignment-subsampling-factor 1 \
+    --context-opts "--context-width=2 --central-position=1" \
+    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    $lang $ali_dir $tree_dir
+fi
+
+
+if [ $stage -le 4 ]; then
+  mkdir -p $dir
+  echo "$0: creating neural net configs using the xconfig parser";
+
+  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
+  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
+  tdnn_opts="l2-regularize=0.03"
+  output_opts="l2-regularize=0.04"
+  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  mkdir -p $dir/configs
+  cat <<EOF > $dir/configs/network.xconfig
+  input dim=40 name=input
+
+  conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
+  conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
+  conv-relu-batchnorm-dropout-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common3 height-subsample-out=2
+  conv-relu-batchnorm-dropout-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
+  relu-batchnorm-dropout-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+  relu-batchnorm-dropout-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts dropout-proportion=0.0
+
+  ## adding the layers for chain branch
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
+
+  # adding the layers for xent branch
+  # This block prints the configs for a separate output that will be
+  # trained with a cross-entropy objective in the 'chain' mod?els... this
+  # has the effect of regularizing the hidden parts of the model.  we use
+  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+  # 0.5 / args.xent_regularize is suitable as it means the xent
+  # final-layer learns at a rate independent of the regularization
+  # constant; and the 0.5 was tuned so as to make the relative progress
+  # similar in the xent and regular final layers.
+  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
+  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
+EOF
+  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
+fi
+
+
+if [ $stage -le 5 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+    utils/create_split_dir.pl \
+     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+  fi
+
+  steps/nnet3/chain/train.py --stage=$train_stage \
+    --cmd="$cmd" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+    --chain.xent-regularize $xent_regularize \
+    --chain.leaky-hmm-coefficient=0.1 \
+    --chain.l2-regularize=0.00005 \
+    --chain.apply-deriv-weights=true \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.alignment-subsampling-factor=1 \
+    --chain.left-tolerance 3 \
+    --chain.right-tolerance 3 \
+    --trainer.srand=$srand \
+    --trainer.max-param-change=2.0 \
+    --trainer.num-epochs=5 \
+    --trainer.frames-per-iter=1500000 \
+    --trainer.optimization.num-jobs-initial=3 \
+    --trainer.optimization.num-jobs-final=5 \
+    --trainer.dropout-schedule $dropout_schedule \
+    --trainer.optimization.initial-effective-lrate=0.001 \
+    --trainer.optimization.final-effective-lrate=0.0001 \
+    --trainer.optimization.shrink-value=1.0 \
+    --trainer.num-chunk-per-minibatch=32,16 \
+    --trainer.optimization.momentum=0.0 \
+    --egs.chunk-width=$chunk_width \
+    --egs.chunk-left-context=$chunk_left_context \
+    --egs.chunk-right-context=$chunk_right_context \
+    --egs.chunk-left-context-initial=0 \
+    --egs.chunk-right-context-final=0 \
+    --egs.dir="$common_egs_dir" \
+    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
+    --cleanup.remove-egs=$remove_egs \
+    --use-gpu=true \
+    --reporting.email="$reporting_email" \
+    --feat-dir=$train_data_dir \
+    --tree-dir=$tree_dir \
+    --lat-dir=$lat_dir \
+    --dir=$dir  || exit 1;
+fi
+
+if [ $stage -le 6 ]; then
+  # The reason we are using data/lang here, instead of $lang, is just to
+  # emphasize that it's not actually important to give mkgraph.sh the
+  # lang directory with the matched topology (since it gets the
+  # topology file from the model).  So you could give it a different
+  # lang directory, one that contained a wordlist and LM of your choice,
+  # as long as phones.txt was compatible.
+
+  utils/mkgraph.sh \
+    --self-loop-scale 1.0 $lang_decode \
+    $dir $dir/graph || exit 1;
+fi
+
+if [ $stage -le 7 ]; then
+  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
+fi
+
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/run_flatstart_cnn1a.sh b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
similarity index 68%
rename from egs/iam/v1/local/chain/run_flatstart_cnn1a.sh
rename to egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 56c897137f4..4836d76fa6e 100755
--- a/egs/iam/v1/local/chain/run_flatstart_cnn1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -2,20 +2,20 @@
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
-
-# local/chain/compare_wer.sh exp/chain/cnn_1a exp/chain/cnn_chainali_1c exp/chain/e2e_cnn_1a
-# System                         cnn_1a cnn_chainali_1c e2e_cnn_1a
-# WER                             18.52     12.72     13.87
-# CER                             10.07      5.99      6.54
-# Final train prob              -0.0077   -0.0291   -0.0371
-# Final valid prob              -0.0970   -0.0359   -0.0636
-# Final train prob (xent)       -0.5484   -0.9781
-# Final valid prob (xent)       -0.9643   -1.1544
-# Parameters                      4.36M     3.96M     9.13M
+# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# System                      e2e_cnn_1a
+# WER                             13.59
+# WER (rescored)                  13.27
+# CER                              6.92
+# CER (rescored)                   6.71
+# Final train prob               0.0345
+# Final valid prob               0.0269
+# Final train prob (xent)
+# Final valid prob (xent)
+# Parameters                      9.52M
 
 # steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
-# exp/chain/e2e_cnn_1a: num-iters=21 nj=2..4 num-params=9.1M dim=40->12640 combine=-0.033->-0.033 (over 1) logprob:train/valid[13,20,final]=(-0.058,-0.042,-0.035/-0.070,-0.064,-0.059)
-
+# exp/chain/e2e_cnn_1b: num-iters=42 nj=2..4 num-params=9.5M dim=40->12640 combine=0.041->0.041 (over 2) logprob:train/valid[27,41,final]=(0.032,0.035,0.035/0.025,0.026,0.027)
 set -e
 
 # configs for 'chain'
@@ -23,20 +23,16 @@ stage=0
 train_stage=-10
 get_egs_stage=-10
 affix=1a
+nj=30
 
 # training options
 tdnn_dim=450
-num_epochs=4
-num_jobs_initial=2
-num_jobs_final=4
 minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
 common_egs_dir=
-l2_regularize=0.00005
-frames_per_iter=1000000
-cmvn_opts="--norm-means=true --norm-vars=true"
 train_set=train
-lang_test=lang_unk
-
+decode_val=true
+lang_decode=data/lang_test
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -85,13 +81,8 @@ fi
 if [ $stage -le 2 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
-
-  cnn_opts="l2-regularize=0.075"
-  tdnn_opts="l2-regularize=0.075"
-  output_opts="l2-regularize=0.1"
-  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="height-offsets=-2,-1,0,1,2 num-filters-out=70"
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
@@ -99,14 +90,11 @@ if [ $stage -le 2 ]; then
   conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-
+  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn4 input=Append(-4,0,4) dim=$tdnn_dim
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
@@ -121,9 +109,9 @@ if [ $stage -le 3 ]; then
 
   steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
     --cmd "$cmd" \
-    --feat.cmvn-opts "$cmvn_opts" \
+    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
     --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize $l2_regularize \
+    --chain.l2-regularize 0.00005 \
     --chain.apply-deriv-weights false \
     --egs.dir "$common_egs_dir" \
     --egs.stage $get_egs_stage \
@@ -131,11 +119,11 @@ if [ $stage -le 3 ]; then
     --chain.frame-subsampling-factor 4 \
     --chain.alignment-subsampling-factor 4 \
     --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter $frames_per_iter \
-    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 1000000 \
+    --trainer.num-epochs 4 \
     --trainer.optimization.momentum 0 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.num-jobs-initial 2 \
+    --trainer.optimization.num-jobs-final 4 \
     --trainer.optimization.initial-effective-lrate 0.001 \
     --trainer.optimization.final-effective-lrate 0.0001 \
     --trainer.optimization.shrink-value 1.0 \
@@ -155,15 +143,16 @@ if [ $stage -le 4 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj 30 --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
 fi
 
 echo "Done. Date: $(date). Results:"
diff --git a/egs/iam/v1/local/extract_features.sh b/egs/iam/v1/local/extract_features.sh
new file mode 100755
index 00000000000..1741ad3f9b2
--- /dev/null
+++ b/egs/iam/v1/local/extract_features.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright   2017 Yiwen Shao
+#             2018 Ashish Arora
+
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
+nj=4
+cmd=run.pl
+feat_dim=40
+augment=false
+fliplr=false
+echo "$0 $@"
+
+. ./cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh || exit 1;
+
+data=$1
+featdir=$data/data
+scp=$data/images.scp
+logdir=$data/log
+
+mkdir -p $logdir
+mkdir -p $featdir
+
+# make $featdir an absolute pathname
+featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
+
+for n in $(seq $nj); do
+    split_scps="$split_scps $logdir/images.$n.scp"
+done
+
+# split images.scp
+utils/split_scp.pl $scp $split_scps || exit 1;
+
+$cmd JOB=1:$nj $logdir/extract_features.JOB.log \
+  local/make_features.py $logdir/images.JOB.scp \
+    --allowed_len_file_path $data/allowed_lengths.txt \
+    --feat-dim $feat_dim --fliplr $fliplr --augment $augment \| \
+    copy-feats --compress=true --compression-method=7 \
+    ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
+
+## aggregates the output scp's to get feats.scp
+for n in $(seq $nj); do
+  cat $featdir/images.$n.scp || exit 1;
+done > $data/feats.scp || exit 1
diff --git a/egs/iam/v1/local/gen_topo.py b/egs/iam/v1/local/gen_topo.py
new file mode 100755
index 00000000000..540bfbcf270
--- /dev/null
+++ b/egs/iam/v1/local/gen_topo.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+# Copyright 2017 (author: Chun-Chieh Chang)
+
+# Generate a topology file.  This allows control of the number of states in the
+# non-silence HMMs, and in the silence HMMs. This is a modified version of
+# 'utils/gen_topo.pl'. The difference is that this creates two topologies for
+# the non-silence HMMs. The number of states for punctuations is different than
+# the number of states for other characters.
+
+from __future__ import print_function
+import argparse
+import string
+
+parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
+                                             "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
+                                             "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
+                                 epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
+parser.add_argument("num_nonsil_states", type=int, help="number of states for nonsilence phones");
+parser.add_argument("num_sil_states", type=int, help="number of states for silence phones");
+parser.add_argument("num_punctuation_states", type=int, help="number of states for punctuation");
+parser.add_argument("nonsilence_phones", type=str,
+                    help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
+parser.add_argument("silence_phones", type=str,
+                    help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
+parser.add_argument("phone_list", type=str, help="file containing all phones and their corresponding number.");
+
+args = parser.parse_args()
+
+silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
+nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
+all_phones = silence_phones +  nonsilence_phones
+
+punctuation_phones = []
+exclude = set("!(),.?;:'-\"")
+with open(args.phone_list) as f:
+    for line in f:
+        line = line.strip()
+        phone = line.split(' ')[0]
+        if len(phone) == 1 and phone in exclude:
+            punctuation_phones.append(int(line.split(' ')[1]))
+# For nonsilence phones that are not punctuations
+print("<Topology>")
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in nonsilence_phones if x not in punctuation_phones]))
+print("</ForPhones>")
+for x in range(0, args.num_nonsil_states):
+    xp1 = x + 1
+    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
+print("<State> " + str(args.num_nonsil_states) + " </State>")
+print("</TopologyEntry>")
+
+# For nonsilence phones that ar punctuations
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in nonsilence_phones if x in punctuation_phones]))
+print("</ForPhones>")
+for x in range(0, args.num_punctuation_states):
+    xp1 = x + 1
+    print("<State> " + str(x) + " <PdfClass> " + str(x) + " <Transition> " + str(x) + " 0.75 <Transition> " + str(xp1) + " 0.25 </State>")
+print("<State> " + str(args.num_punctuation_states) + " </State>")
+print("</TopologyEntry>")
+
+# For silence phones
+print("<TopologyEntry>")
+print("<ForPhones>")
+print(" ".join([str(x) for x in silence_phones]))
+print("</ForPhones>")
+if(args.num_sil_states > 1):
+    transp = 1.0 / (args.num_sil_states - 1)
+    
+    state_str = "<State> 0 <PdfClass> 0 "
+    for x in range(0, (args.num_sil_states - 1)):
+        state_str = state_str + "<Transition> " + str(x) + " " + str(transp) + " "
+    state_str = state_str + "</State>"
+    print(state_str)
+
+    for x in range(1, (args.num_sil_states - 1)):
+        state_str = "<State> " + str(x) + " <PdfClass> " + str(x) + " "
+        for y in range(1, args.num_sil_states):
+            state_str = state_str + "<Transition> " + str(y) + " " + str(transp) + " "
+        state_str = state_str + "</State>"
+        print(state_str)
+    second_last = args.num_sil_states - 1
+    print("<State> " + str(second_last) + " <PdfClass> " + str(second_last) + " <Transition> " + str(second_last) + " 0.75 <Transition> " + str(args.num_sil_states) + " 0.25 </State>")
+    print("<State> " + str(args.num_sil_states) + " </State>")
+else:
+    print("<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>")
+    print("<State> " + str(args.num_sil_states) + " </State>")
+print("</TopologyEntry>")
+print("</Topology>")
diff --git a/egs/iam/v1/local/make_features.py b/egs/iam/v1/local/make_features.py
index 84e012daedb..3ce501732cf 100755
--- a/egs/iam/v1/local/make_features.py
+++ b/egs/iam/v1/local/make_features.py
@@ -2,6 +2,7 @@
 
 # Copyright      2017  Chun Chieh Chang
 #                2017  Ashish Arora
+#                2017  Yiwen Shao
 #                2018  Hossein Hadian
 
 """ This script converts images to Kaldi-format feature matrices. The input to
@@ -14,20 +15,27 @@
     to enforce the images to have the specified length in that file by padding
     white pixels (the --padding option will be ignored in this case). This relates
     to end2end chain training.
-
     eg. local/make_features.py data/train --feat-dim 40
 """
-
+import random
 import argparse
 import os
 import sys
+import scipy.io as sio
 import numpy as np
 from scipy import misc
+from scipy.ndimage.interpolation import affine_transform
+import math
+from signal import signal, SIGPIPE, SIG_DFL
+signal(SIGPIPE, SIG_DFL)
 
 parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
                                                 writes them to standard output in text format.""")
-parser.add_argument('dir', type=str,
-                    help='Source data directory (containing images.scp)')
+parser.add_argument('images_scp_path', type=str,
+                    help='Path of images.scp file')
+parser.add_argument('--allowed_len_file_path', type=str, default=None,
+                    help='If supplied, each images will be padded to reach the '
+                    'target length (this overrides --padding).')
 parser.add_argument('--out-ark', type=str, default='-',
                     help='Where to write the output feature file')
 parser.add_argument('--feat-dim', type=int, default=40,
@@ -35,8 +43,10 @@
 parser.add_argument('--padding', type=int, default=5,
                     help='Number of white pixels to pad on the left'
                     'and right side of the image.')
-
-
+parser.add_argument('--fliplr', type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="Flip the image left-right for right to left languages")
+parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
+                   help="performs image augmentation")
 args = parser.parse_args()
 
 
@@ -56,18 +66,12 @@ def write_kaldi_matrix(file_handle, matrix, key):
             file_handle.write("\n")
     file_handle.write(" ]\n")
 
-def get_scaled_image(im, allowed_lengths = None):
-    scale_size = args.feat_dim
-    sx = im.shape[1]
-    sy = im.shape[0]
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
+
+def horizontal_pad(im, allowed_lengths = None):
     if allowed_lengths is None:
         left_padding = right_padding = args.padding
     else:  # Find an allowed length for the image
-        imlen = im.shape[1]
+        imlen = im.shape[1] # width
         allowed_len = 0
         for l in allowed_lengths:
             if l > imlen:
@@ -77,28 +81,153 @@ def get_scaled_image(im, allowed_lengths = None):
             #  No allowed length was found for the image (the image is too long)
             return None
         padding = allowed_len - imlen
-        left_padding = padding // 2
+        left_padding = int(padding // 2)
         right_padding = padding - left_padding
-    dim_y = im.shape[0]
+    dim_y = im.shape[0] # height
     im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
                                            dtype=int), im), axis=1)
     im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
                                                     dtype=int)), axis=1)
     return im_pad1
 
-### main ###
-data_list_path = os.path.join(args.dir, 'images.scp')
+def get_scaled_image_aug(im, mode='normal'):
+    scale_size = args.feat_dim
+    sx = im.shape[1]
+    sy = im.shape[0]
+    scale = (1.0 * scale_size) / sy
+    nx = int(scale_size)
+    ny = int(scale * sx) 
+    scale_size = random.randint(10, 30)
+    scale = (1.0 * scale_size) / sy
+    down_nx = int(scale_size)
+    down_ny = int(scale * sx)
+    if mode == 'normal':
+        im = misc.imresize(im, (nx, ny))
+        return im
+    else:
+        im_scaled_down = misc.imresize(im, (down_nx, down_ny))
+        im_scaled_up = misc.imresize(im_scaled_down, (nx, ny))
+        return im_scaled_up
+    return im
+
+def contrast_normalization(im, low_pct, high_pct):
+    element_number = im.size
+    rows = im.shape[0]
+    cols = im.shape[1]
+    im_contrast = np.zeros(shape=im.shape)
+    low_index = int(low_pct * element_number)
+    high_index = int(high_pct * element_number)
+    sorted_im = np.sort(im, axis=None)
+    low_thred = sorted_im[low_index]
+    high_thred = sorted_im[high_index]
+    for i in range(rows):
+        for j in range(cols):
+            if im[i, j] > high_thred:
+                im_contrast[i, j] = 255  # lightest to white
+            elif im[i, j] < low_thred:
+                im_contrast[i, j] = 0  # darkest to black
+            else:
+                # linear normalization
+                im_contrast[i, j] = (im[i, j] - low_thred) * \
+                    255 / (high_thred - low_thred)
+    return im_contrast
+
+
+def geometric_moment(frame, p, q):
+    m = 0
+    for i in range(frame.shape[1]):
+        for j in range(frame.shape[0]):
+            m += (i ** p) * (j ** q) * frame[i][i]
+    return m
+
+
+def central_moment(frame, p, q):
+    u = 0
+    x_bar = geometric_moment(frame, 1, 0) / \
+        geometric_moment(frame, 0, 0)  # m10/m00
+    y_bar = geometric_moment(frame, 0, 1) / \
+        geometric_moment(frame, 0, 0)  # m01/m00
+    for i in range(frame.shape[1]):
+        for j in range(frame.shape[0]):
+            u += ((i - x_bar)**p) * ((j - y_bar)**q) * frame[i][j]
+    return u
+
+
+def height_normalization(frame, w, h):
+    frame_normalized = np.zeros(shape=(h, w))
+    alpha = 4
+    x_bar = geometric_moment(frame, 1, 0) / \
+        geometric_moment(frame, 0, 0)  # m10/m00
+    y_bar = geometric_moment(frame, 0, 1) / \
+        geometric_moment(frame, 0, 0)  # m01/m00
+    sigma_x = (alpha * ((central_moment(frame, 2, 0) /
+                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u20/m00)
+    sigma_y = (alpha * ((central_moment(frame, 0, 2) /
+                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u02/m00)
+    for x in range(w):
+        for y in range(h):
+            i = int((x / w - 0.5) * sigma_x + x_bar)
+            j = int((y / h - 0.5) * sigma_y + y_bar)
+            frame_normalized[x][y] = frame[i][j]
+    return frame_normalized
 
+
+def find_slant_project(im):
+    rows = im.shape[0]
+    cols = im.shape[1]
+    std_max = 0
+    alpha_max = 0
+    col_disp = np.zeros(90, int)
+    proj = np.zeros(shape=(90, cols + 2 * rows), dtype=int)
+    for r in range(rows):
+        for alpha in range(-45, 45, 1):
+            col_disp[alpha] = int(r * math.tan(alpha / 180.0 * math.pi))
+        for c in range(cols):
+            if im[r, c] < 100:
+                for alpha in range(-45, 45, 1):
+                    proj[alpha + 45, c + col_disp[alpha] + rows] += 1
+    for alpha in range(-45, 45, 1):
+        proj_histogram, bin_array = np.histogram(proj[alpha + 45, :], bins=10)
+        proj_std = np.std(proj_histogram)
+        if proj_std > std_max:
+            std_max = proj_std
+            alpha_max = alpha
+    proj_std = np.std(proj, axis=1)
+    return -alpha_max
+
+
+def horizontal_shear(im, degree):
+    rad = degree / 180.0 * math.pi
+    padding_x = int(abs(np.tan(rad)) * im.shape[0])
+    padding_y = im.shape[0]
+    if rad > 0:
+        im_pad = np.concatenate(
+            (255 * np.ones((padding_y, padding_x), dtype=int), im), axis=1)
+    elif rad < 0:
+        im_pad = np.concatenate(
+            (im, 255 * np.ones((padding_y, padding_x), dtype=int)), axis=1)
+    else:
+        im_pad = im
+    shear_matrix = np.array([[1, 0],
+                             [np.tan(rad), 1]])
+    sheared_im = affine_transform(im_pad, shear_matrix, cval=255.0)
+    return sheared_im
+
+
+### main ###
+random.seed(1)
+data_list_path = args.images_scp_path
 if args.out_ark == '-':
     out_fh = sys.stdout
 else:
-    out_fh = open(args.out_ark,'wb')
+    out_fh = open(args.out_ark,'w')
 
 allowed_lengths = None
-if os.path.isfile(os.path.join(args.dir, 'allowed_lengths.txt')):
+allowed_len_handle = args.allowed_len_file_path
+if os.path.isfile(allowed_len_handle):
     print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
     allowed_lengths = []
-    with open(os.path.join(args.dir,'allowed_lengths.txt')) as f:
+    with open(allowed_len_handle) as f:
         for line in f:
             allowed_lengths.append(int(line.strip()))
     print("Read {} allowed lengths and will apply them to the "
@@ -106,6 +235,7 @@ def get_scaled_image(im, allowed_lengths = None):
 
 num_fail = 0
 num_ok = 0
+aug_setting = ['normal', 'scaled']
 with open(data_list_path) as f:
     for line in f:
         line = line.strip()
@@ -113,15 +243,24 @@ def get_scaled_image(im, allowed_lengths = None):
         image_id = line_vect[0]
         image_path = line_vect[1]
         im = misc.imread(image_path)
-        im_scaled = get_scaled_image(im, allowed_lengths)
-
-        if im_scaled is None:
+        if args.fliplr:
+            im = np.fliplr(im)
+        if args.augment:
+            im_aug = get_scaled_image_aug(im, aug_setting[0])
+            im_contrast = contrast_normalization(im_aug, 0.05, 0.2)
+            slant_degree = find_slant_project(im_contrast)
+            im_sheared = horizontal_shear(im_contrast, slant_degree)
+            im_aug = im_sheared
+        else:
+            im_aug = get_scaled_image_aug(im, aug_setting[0])
+        im_horizontal_padded = horizontal_pad(im_aug, allowed_lengths)
+        if im_horizontal_padded is None:
             num_fail += 1
             continue
-        data = np.transpose(im_scaled, (1, 0))
+        data = np.transpose(im_horizontal_padded, (1, 0))
         data = np.divide(data, 255.0)
         num_ok += 1
         write_kaldi_matrix(out_fh, data, image_id)
 
-print('Generated features for {} images. Failed for {} (iamge too '
+print('Generated features for {} images. Failed for {} (image too '
       'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/iam/v1/local/prepare_data.sh b/egs/iam/v1/local/prepare_data.sh
index 73d711c73f0..9c01ac90f28 100755
--- a/egs/iam/v1/local/prepare_data.sh
+++ b/egs/iam/v1/local/prepare_data.sh
@@ -165,6 +165,7 @@ if [ $stage -le 0 ]; then
   local/process_data.py data/local data/test --dataset test || exit 1
   local/process_data.py data/local data/val --dataset validation || exit 1
 
-  utils/utt2spk_to_spk2utt.pl data/train/utt2spk > data/train/spk2utt
-  utils/utt2spk_to_spk2utt.pl data/test/utt2spk > data/test/spk2utt
+  image/fix_data_dir.sh data/train
+  image/fix_data_dir.sh data/test
+  image/fix_data_dir.sh data/val
 fi
diff --git a/egs/iam/v1/run_end2end.sh b/egs/iam/v1/run_end2end.sh
index 6df93e739f4..22f5c8f11af 100755
--- a/egs/iam/v1/run_end2end.sh
+++ b/egs/iam/v1/run_end2end.sh
@@ -16,7 +16,7 @@ iam_database=/export/corpora5/handwriting_ocr/IAM
 # This corpus is of written NZ English that can be purchased here:
 # "https://www.victoria.ac.nz/lals/resources/corpora-default"
 wellington_database=/export/corpora5/Wellington/WWC/
-
+train_set=train_aug
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
@@ -32,45 +32,58 @@ if [ $stage -le 0 ]; then
     --wellington-dir "$wellington_database" \
     --username "$username" --password "$password"
 fi
-mkdir -p data/{train,test}/data
+mkdir -p data/{train,test,val}/data
 
 if [ $stage -le 1 ]; then
-  image/get_image2num_frames.py data/train  # This will be needed for the next command
+  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+  image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
   # The next command creates a "allowed_lengths.txt" file in data/train
   # which will be used by local/make_features.py to enforce the images to
   # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  echo "$0: Preparing the test and train feature files..."
-  for dataset in train test; do
-    local/make_features.py data/$dataset --feat-dim 40 | \
-      copy-feats --compress=true --compression-method=7 \
-                 ark:- ark,scp:data/$dataset/data/images.ark,data/$dataset/feats.scp
-    steps/compute_cmvn_stats.sh data/$dataset
+  echo "$(date) Extracting features, creating feats.scp file"
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train
+  steps/compute_cmvn_stats.sh data/train || exit 1;
+  for set in val test; do
+    local/extract_features.sh --nj $nj --cmd "$cmd" --augment true \
+    --feat-dim 40 data/${set}
+    steps/compute_cmvn_stats.sh data/${set} || exit 1;
   done
   utils/fix_data_dir.sh data/train
 fi
 
 if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
+  done
+fi
+
+if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   # We do this stage before dict preparation because prepare_dict.sh
   # generates the lexicon from pocolm's wordlist
   local/train_lm.sh --vocab-size 50k
 fi
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 4 ]; then
   echo "$0: Preparing dictionary and lang..."
-
   # This is for training. Use a large vocab size, e.g. 500k to include all the
   # training words:
   local/prepare_dict.sh --vocab-size 500k --dir data/local/dict
-  utils/prepare_lang.sh --sil-prob 0.95 \
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.95 \
                         data/local/dict "<unk>" data/lang/temp data/lang
 
+  silphonelist=`cat data/lang/phones/silence.csl`
+  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo
+
   # This is for decoding. We use a 50k lexicon to be consistent with the papers
   # reporting WERs on IAM.
   local/prepare_dict.sh --vocab-size 50k --dir data/local/dict_50k
-  utils/prepare_lang.sh --sil-prob 0.95 data/local/dict_50k \
-                        "<unk>" data/lang_test/temp data/lang_test
+  utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.95 \
+                        data/local/dict_50k "<unk>" data/lang_test/temp data/lang_test
   utils/format_lm.sh data/lang_test data/local/local_lm/data/arpa/3gram_big.arpa.gz \
                      data/local/dict_50k/lexicon.txt data/lang_test
 
@@ -82,20 +95,20 @@ if [ $stage -le 3 ]; then
   cp data/lang_test/G.fst data/lang_unk/G.fst
 fi
 
-if [ $stage -le 4 ]; then
+if [ $stage -le 5 ]; then
   echo "$0: Calling the flat-start chain recipe..."
-  local/chain/run_flatstart_cnn1a.sh
+  local/chain/run_e2e_cnn_1b.sh --train_set $train_set
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 6 ]; then
   echo "$0: Aligning the training data using the e2e chain model..."
   steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
                        --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
+                       data/$train_set data/lang exp/chain/e2e_cnn_1b exp/chain/e2e_ali_train
 fi
 
-if [ $stage -le 6 ]; then
+if [ $stage -le 7 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  local/chain/run_cnn_e2eali_1a.sh
+  local/chain/run_cnn_e2eali_1d.sh --train_set $train_set
 fi

From 3c3185b9e36f790ff9e49951e88732b8053f28da Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 9 Jan 2019 00:06:26 -0500
Subject: [PATCH 03/18] adding changes in run.sh

---
 egs/iam/v1/run.sh         | 60 ++++++++++++++++++++++++++-------------
 egs/iam/v1/run_end2end.sh | 16 ++++-------
 2 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/egs/iam/v1/run.sh b/egs/iam/v1/run.sh
index b943870f530..7907f018c04 100755
--- a/egs/iam/v1/run.sh
+++ b/egs/iam/v1/run.sh
@@ -20,6 +20,7 @@ iam_database=/export/corpora5/handwriting_ocr/IAM
 # This corpus is of written NZ English that can be purchased here:
 # "https://www.victoria.ac.nz/lals/resources/corpora-default"
 wellington_database=/export/corpora5/Wellington/WWC/
+train_set=train_aug
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
@@ -30,33 +31,54 @@ wellington_database=/export/corpora5/Wellington/WWC/
 ./local/check_tools.sh
 
 if [ $stage -le 0 ]; then
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
     --username "$username" --password "$password"
 fi
-mkdir -p data/{train,test}/data
+mkdir -p data/{train,test,val}/data
 
 if [ $stage -le 1 ]; then
-  echo "$0: Preparing the test and train feature files..."
-  for dataset in train test; do
-    local/make_features.py data/$dataset --feat-dim 40 | \
-      copy-feats --compress=true --compression-method=7 \
-                 ark:- ark,scp:data/$dataset/data/images.ark,data/$dataset/feats.scp
-    steps/compute_cmvn_stats.sh data/$dataset
+  echo "$0: $(date) stage 1: getting allowed image widths for e2e training..."
+  image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
+  # The next command creates a "allowed_lengths.txt" file in data/train
+  # which will be used by local/make_features.py to enforce the images to
+  # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
+  image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
+  echo "$0: $(date) Extracting features, creating feats.scp file"
+  local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train
+  steps/compute_cmvn_stats.sh data/train || exit 1;
+  for set in val test; do
+    local/extract_features.sh --nj $nj --cmd "$cmd" --augment true \
+    --feat-dim 40 data/${set}
+    steps/compute_cmvn_stats.sh data/${set} || exit 1;
   done
+  utils/fix_data_dir.sh data/train
 fi
 
 if [ $stage -le 2 ]; then
+  for set in train; do
+    echo "$0: $(date) stage 2: Performing augmentation, it will double training data"
+    local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
+    steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
+  done
+fi
+
+if [ $stage -le 3 ]; then
   echo "$0: Estimating a language model for decoding..."
   # We do this stage before dict preparation because prepare_dict.sh
   # generates the lexicon from pocolm's wordlist
   local/train_lm.sh --vocab-size 50k
 fi
 
-if [ $stage -le 3 ]; then
+if [ $stage -le 4 ]; then
   echo "$0: Preparing dictionary and lang..."
-
   # This is for training. Use a large vocab size, e.g. 500k to include all the
   # training words:
   local/prepare_dict.sh --vocab-size 500k --dir data/local/dict  # this is for training
@@ -81,7 +103,7 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ]; then
-  steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train \
+  steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/$train_set \
     data/lang exp/mono
 fi
 
@@ -93,10 +115,10 @@ if [ $stage -le 5 ] && $decode_gmm; then
 fi
 
 if [ $stage -le 6 ]; then
-  steps/align_si.sh --nj $nj --cmd $cmd data/train data/lang \
+  steps/align_si.sh --nj $nj --cmd $cmd data/$train_set data/lang \
     exp/mono exp/mono_ali
 
-  steps/train_deltas.sh --cmd $cmd 500 20000 data/train data/lang \
+  steps/train_deltas.sh --cmd $cmd 500 20000 data/$train_set data/lang \
     exp/mono_ali exp/tri
 fi
 
@@ -108,12 +130,12 @@ if [ $stage -le 7 ] && $decode_gmm; then
 fi
 
 if [ $stage -le 8 ]; then
-  steps/align_si.sh --nj $nj --cmd $cmd data/train data/lang \
+  steps/align_si.sh --nj $nj --cmd $cmd data/$train_set data/lang \
     exp/tri exp/tri_ali
 
   steps/train_lda_mllt.sh --cmd $cmd \
     --splice-opts "--left-context=3 --right-context=3" 500 20000 \
-    data/train data/lang exp/tri_ali exp/tri2
+    data/$train_set data/lang exp/tri_ali exp/tri2
 fi
 
 if [ $stage -le 9 ] && $decode_gmm; then
@@ -125,10 +147,10 @@ fi
 
 if [ $stage -le 10 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
-    data/train data/lang exp/tri2 exp/tri2_ali
+    data/$train_set data/lang exp/tri2 exp/tri2_ali
 
   steps/train_sat.sh --cmd $cmd 500 20000 \
-    data/train data/lang exp/tri2_ali exp/tri3
+    data/$train_set data/lang exp/tri2_ali exp/tri3
 fi
 
 if [ $stage -le 11 ] && $decode_gmm; then
@@ -140,13 +162,13 @@ fi
 
 if [ $stage -le 12 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
-    data/train data/lang exp/tri3 exp/tri3_ali
+    data/$train_set data/lang exp/tri3 exp/tri3_ali
 fi
 
 if [ $stage -le 13 ]; then
-  local/chain/run_cnn_1a.sh --lang-test lang_unk
+  local/chain/run_cnn.sh --lang-test lang_unk --train_set $train_set
 fi
 
 if [ $stage -le 14 ]; then
-  local/chain/run_cnn_chainali_1c.sh --chain-model-dir exp/chain/cnn_1a --stage 2
+  local/chain/run_cnn_chainali.sh --chain-model-dir exp/chain/cnn_1a --stage 2 --train_set $train_set
 fi
diff --git a/egs/iam/v1/run_end2end.sh b/egs/iam/v1/run_end2end.sh
index 22f5c8f11af..58461c740b6 100755
--- a/egs/iam/v1/run_end2end.sh
+++ b/egs/iam/v1/run_end2end.sh
@@ -22,10 +22,7 @@ train_set=train_aug
 . ./path.sh
 . ./utils/parse_options.sh  # e.g. this parses the above options
                             # if supplied.
-
-
 ./local/check_tools.sh
-
 if [ $stage -le 0 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
@@ -35,13 +32,13 @@ fi
 mkdir -p data/{train,test,val}/data
 
 if [ $stage -le 1 ]; then
-  echo "$(date) stage 1: getting allowed image widths for e2e training..."
+  echo "$0: $(date) stage 1: getting allowed image widths for e2e training..."
   image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command
   # The next command creates a "allowed_lengths.txt" file in data/train
   # which will be used by local/make_features.py to enforce the images to
   # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
-  echo "$(date) Extracting features, creating feats.scp file"
+  echo "$0: $(date) Extracting features, creating feats.scp file"
   local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train
   steps/compute_cmvn_stats.sh data/train || exit 1;
   for set in val test; do
@@ -54,7 +51,7 @@ fi
 
 if [ $stage -le 2 ]; then
   for set in train; do
-    echo "$(date) stage 2: Performing augmentation, it will double training data"
+    echo "$0: $(date) stage 2: Performing augmentation, it will double training data"
     local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data
     steps/compute_cmvn_stats.sh data/${set}_aug || exit 1;
   done
@@ -74,7 +71,6 @@ if [ $stage -le 4 ]; then
   local/prepare_dict.sh --vocab-size 500k --dir data/local/dict
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.95 \
                         data/local/dict "<unk>" data/lang/temp data/lang
-
   silphonelist=`cat data/lang/phones/silence.csl`
   nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
   local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo
@@ -97,7 +93,7 @@ fi
 
 if [ $stage -le 5 ]; then
   echo "$0: Calling the flat-start chain recipe..."
-  local/chain/run_e2e_cnn_1b.sh --train_set $train_set
+  local/chain/run_e2e_cnn.sh --train_set $train_set
 fi
 
 if [ $stage -le 6 ]; then
@@ -105,10 +101,10 @@ if [ $stage -le 6 ]; then
   steps/nnet3/align.sh --nj 50 --cmd "$cmd" \
                        --use-gpu false \
                        --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \
-                       data/$train_set data/lang exp/chain/e2e_cnn_1b exp/chain/e2e_ali_train
+                       data/$train_set data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
 fi
 
 if [ $stage -le 7 ]; then
   echo "$0: Building a tree and training a regular chain model using the e2e alignments..."
-  local/chain/run_cnn_e2eali_1d.sh --train_set $train_set
+  local/chain/run_cnn_e2eali.sh --train_set $train_set
 fi

From 0862c616eaf9ca584ec9877fff1c6ea23b3ca721 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 9 Jan 2019 00:53:40 -0500
Subject: [PATCH 04/18] updating result in 1b

---
 egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh   | 244 ------------------
 .../local/chain/tuning/run_cnn_e2eali_1b.sh   |  27 +-
 2 files changed, 11 insertions(+), 260 deletions(-)
 delete mode 100755 egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh

diff --git a/egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh
deleted file mode 100755
index 1e3aed66c6b..00000000000
--- a/egs/iam/v1/local/chain/run_cnn_e2eali_1b.sh
+++ /dev/null
@@ -1,244 +0,0 @@
-#!/bin/bash
-
-# e2eali_1b is the same as e2eali_1a but uses unconstrained egs
-
-# local/chain/compare_wer.sh /home/hhadian/kaldi-rnnlm/egs/iam/v1/exp/chain/cnn_e2eali_1a exp/chain/cnn_e2eali_1b
-# System                      cnn_e2eali_1a cnn_e2eali_1b
-# WER                             12.79     12.23
-# CER                              5.73      5.48
-# Final train prob              -0.0556   -0.0367
-# Final valid prob              -0.0795   -0.0592
-# Final train prob (xent)       -0.9178   -0.8382
-# Final valid prob (xent)       -1.0604   -0.9853
-# Parameters                      3.95M     3.95M
-
-# steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
-# exp/chain/cnn_e2eali_1b: num-iters=21 nj=2..4 num-params=4.0M dim=40->360 combine=-0.038->-0.038 (over 1) xent:train/valid[13,20,final]=(-1.34,-0.967,-0.838/-1.40,-1.07,-0.985) logprob:train/valid[13,20,final]=(-0.075,-0.054,-0.037/-0.083,-0.072,-0.059)
-
-set -e -o pipefail
-
-stage=0
-
-nj=30
-train_set=train
-nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-e2echain_model_dir=exp/chain/e2e_cnn_1b
-common_egs_dir=
-reporting_email=
-
-# chain options
-train_stage=-10
-xent_regularize=0.1
-frame_subsampling_factor=4
-# training chunk-options
-chunk_width=340,300,200,100
-num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
-tdnn_dim=450
-# training options
-srand=0
-remove_egs=true
-lang_test=lang_unk
-# End configuration section.
-echo "$0 $@"  # Print the command line for logging
-
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-
-if ! cuda-compiled; then
-  cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-ali_dir=exp/chain/e2e_ali_train
-lat_dir=exp/chain${nnet3_affix}/e2e_${train_set}_lats
-dir=exp/chain${nnet3_affix}/cnn_e2eali${affix}
-train_data_dir=data/${train_set}
-tree_dir=exp/chain${nnet3_affix}/tree_e2e
-
-# the 'lang' directory is created by this script.
-# If you create such a directory with a non-standard topology
-# you should probably name it differently.
-lang=data/lang_chain
-for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
-  [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
-done
-
-
-if [ $stage -le 1 ]; then
-  echo "$0: creating lang directory $lang with chain-type topology"
-  # Create a version of the lang/ directory that has one state per phone in the
-  # topo file. [note, it really has two states.. the first one is only repeated
-  # once, the second one has zero or more repeats.]
-  if [ -d $lang ]; then
-    if [ $lang/L.fst -nt data/lang/L.fst ]; then
-      echo "$0: $lang already exists, not overwriting it; continuing"
-    else
-      echo "$0: $lang already exists and seems to be older than data/lang..."
-      echo " ... not sure what to do.  Exiting."
-      exit 1;
-    fi
-  else
-    cp -r data/lang $lang
-    silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
-    nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
-    # Use our special topology... note that later on may have to tune this
-    # topology.
-    steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
-  fi
-fi
-
-if [ $stage -le 2 ]; then
-  # Get the alignments as lattices (gives the chain training more freedom).
-  # use the same num-jobs as the alignments
-  steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
-                            --acoustic-scale 1.0 \
-                            --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
-  echo "" >$lat_dir/splice_opts
-fi
-
-if [ $stage -le 3 ]; then
-  # Build a tree using our new topology.  We know we have alignments for the
-  # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
-  # those.  The num-leaves is always somewhat less than the num-leaves from
-  # the GMM baseline.
-  if [ -f $tree_dir/final.mdl ]; then
-    echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
-    exit 1;
-  fi
-
-  steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
-    --alignment-subsampling-factor 1 \
-    --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
-    $lang $ali_dir $tree_dir
-fi
-
-
-if [ $stage -le 4 ]; then
-  mkdir -p $dir
-  echo "$0: creating neural net configs using the xconfig parser";
-
-  num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
-  cnn_opts="l2-regularize=0.075"
-  tdnn_opts="l2-regularize=0.075"
-  output_opts="l2-regularize=0.1"
-  common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
-  mkdir -p $dir/configs
-  cat <<EOF > $dir/configs/network.xconfig
-  input dim=40 name=input
-
-  conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
-  conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
-  conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
-  conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-
-  ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
-
-  # adding the layers for xent branch
-  # This block prints the configs for a separate output that will be
-  # trained with a cross-entropy objective in the 'chain' mod?els... this
-  # has the effect of regularizing the hidden parts of the model.  we use
-  # 0.5 / args.xent_regularize as the learning rate factor- the factor of
-  # 0.5 / args.xent_regularize is suitable as it means the xent
-  # final-layer learns at a rate independent of the regularization
-  # constant; and the 0.5 was tuned so as to make the relative progress
-  # similar in the xent and regular final layers.
-  relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
-  output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
-EOF
-  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
-fi
-
-
-if [ $stage -le 5 ]; then
-  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
-    utils/create_split_dir.pl \
-     /export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
-  fi
-
-  steps/nnet3/chain/train.py --stage=$train_stage \
-    --cmd="$cmd" \
-    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
-    --chain.xent-regularize $xent_regularize \
-    --chain.leaky-hmm-coefficient=0.1 \
-    --chain.l2-regularize=0.00005 \
-    --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=1 \
-    --chain.left-tolerance 3 \
-    --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
-    --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
-    --trainer.optimization.num-jobs-initial=2 \
-    --trainer.optimization.num-jobs-final=4 \
-    --trainer.optimization.initial-effective-lrate=0.001 \
-    --trainer.optimization.final-effective-lrate=0.0001 \
-    --trainer.optimization.shrink-value=1.0 \
-    --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
-    --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
-    --egs.dir="$common_egs_dir" \
-    --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
-    --cleanup.remove-egs=$remove_egs \
-    --use-gpu=true \
-    --reporting.email="$reporting_email" \
-    --feat-dir=$train_data_dir \
-    --tree-dir=$tree_dir \
-    --lat-dir=$lat_dir \
-    --dir=$dir  || exit 1;
-fi
-
-if [ $stage -le 6 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 7 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-fi
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index 6d8cca876bf..0a19e0fb20d 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -1,29 +1,27 @@
 #!/bin/bash
 
 # e2eali_1b is the same as e2eali_1a but uses unconstrained egs
-
-# local/chain/compare_wer.sh /home/hhadian/kaldi-rnnlm/egs/iam/v1/exp/chain/cnn_e2eali_1a exp/chain/cnn_e2eali_1b
-# System                      cnn_e2eali_1a cnn_e2eali_1b
-# WER                             12.79     12.23
-# CER                              5.73      5.48
-# Final train prob              -0.0556   -0.0367
-# Final valid prob              -0.0795   -0.0592
-# Final train prob (xent)       -0.9178   -0.8382
-# Final valid prob (xent)       -1.0604   -0.9853
-# Parameters                      3.95M     3.95M
+# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1b
+# System                      cnn_e2eali_1b (dict_50k) cnn_e2eali_1b (dict_500k)
+# WER                             11.41                   10.25
+# CER                              4.87                    4.60
+# Final train prob              -0.0384                 -0.0384
+# Final valid prob              -0.0444                 -0.0444
+# Final train prob (xent)       -0.8084                 -0.8084
+# Final valid prob (xent)       -0.8470                 -0.8470
+# Parameters                      3.97M                   3.97M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
-# exp/chain/cnn_e2eali_1b: num-iters=21 nj=2..4 num-params=4.0M dim=40->360 combine=-0.038->-0.038 (over 1) xent:train/valid[13,20,final]=(-1.34,-0.967,-0.838/-1.40,-1.07,-0.985) logprob:train/valid[13,20,final]=(-0.075,-0.054,-0.037/-0.083,-0.072,-0.059)
+# exp/chain/cnn_e2eali_1b: num-iters=42 nj=2..4 num-params=4.0M dim=40->376 combine=-0.039->-0.039 (over 1) xent:train/valid[27,41,final]=(-1.28,-0.846,-0.808/-1.27,-0.871,-0.847) logprob:train/valid[27,41,final]=(-0.064,-0.043,-0.038/-0.065,-0.051,-0.044)
 
 set -e -o pipefail
-
 stage=0
 
 nj=30
 train_set=train
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-e2echain_model_dir=exp/chain/e2e_cnn_1a
+e2echain_model_dir=exp/chain/e2e_cnn_1b
 common_egs_dir=
 reporting_email=
 
@@ -141,7 +139,6 @@ if [ $stage -le 4 ]; then
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
-
   conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
@@ -152,11 +149,9 @@ if [ $stage -le 4 ]; then
   relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
   relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
   relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
-
   # adding the layers for xent branch
   # This block prints the configs for a separate output that will be
   # trained with a cross-entropy objective in the 'chain' mod?els... this

From 214bd1232de03714051ec396d1e9a2a06860b44b Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 9 Jan 2019 01:01:13 -0500
Subject: [PATCH 05/18] updating results

---
 .../v1/local/chain/tuning/run_cnn_e2eali_1b.sh    |  2 +-
 egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh   | 15 +++++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index 0a19e0fb20d..dc830319f69 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -21,7 +21,7 @@ nj=30
 train_set=train
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-e2echain_model_dir=exp/chain/e2e_cnn_1b
+e2echain_model_dir=exp/chain/e2e_cnn_1a
 common_egs_dir=
 reporting_email=
 
diff --git a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 4836d76fa6e..cc27c8e55c4 100755
--- a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -2,20 +2,19 @@
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
-# ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
 # System                      e2e_cnn_1a
-# WER                             13.59
-# WER (rescored)                  13.27
-# CER                              6.92
-# CER (rescored)                   6.71
-# Final train prob               0.0345
-# Final valid prob               0.0269
+# WER                             15.24
+# CER                              7.27
+# Final train prob              -0.0209
+# Final valid prob              -0.0417
 # Final train prob (xent)
 # Final valid prob (xent)
 # Parameters                      9.52M
 
 # steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
-# exp/chain/e2e_cnn_1b: num-iters=42 nj=2..4 num-params=9.5M dim=40->12640 combine=0.041->0.041 (over 2) logprob:train/valid[27,41,final]=(0.032,0.035,0.035/0.025,0.026,0.027)
+# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.5M dim=40->12640 combine=-0.021->-0.021 (over 1) logprob:train/valid[27,41,final]=(-0.025,-0.021,-0.021/-0.044,-0.043,-0.042)
+
 set -e
 
 # configs for 'chain'

From f0b87b302265492267e1a184a889ec0cef946b51 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 9 Jan 2019 17:18:36 -0500
Subject: [PATCH 06/18] adding tuning directory

---
 egs/madcat_zh/v1/local/chain/run_cnn.sh                          | 1 +
 egs/madcat_zh/v1/local/chain/run_cnn_chainali.sh                 | 1 +
 egs/madcat_zh/v1/local/chain/run_e2e_cnn.sh                      | 1 +
 egs/madcat_zh/v1/local/chain/{ => tuning}/run_cnn_1a.sh          | 0
 egs/madcat_zh/v1/local/chain/{ => tuning}/run_cnn_chainali_1a.sh | 0
 egs/madcat_zh/v1/local/chain/{ => tuning}/run_cnn_chainali_1b.sh | 0
 .../chain/{run_flatstart_cnn1a.sh => tuning/run_e2e_cnn_1a.sh}   | 0
 7 files changed, 3 insertions(+)
 create mode 120000 egs/madcat_zh/v1/local/chain/run_cnn.sh
 create mode 120000 egs/madcat_zh/v1/local/chain/run_cnn_chainali.sh
 create mode 120000 egs/madcat_zh/v1/local/chain/run_e2e_cnn.sh
 rename egs/madcat_zh/v1/local/chain/{ => tuning}/run_cnn_1a.sh (100%)
 rename egs/madcat_zh/v1/local/chain/{ => tuning}/run_cnn_chainali_1a.sh (100%)
 rename egs/madcat_zh/v1/local/chain/{ => tuning}/run_cnn_chainali_1b.sh (100%)
 rename egs/madcat_zh/v1/local/chain/{run_flatstart_cnn1a.sh => tuning/run_e2e_cnn_1a.sh} (100%)

diff --git a/egs/madcat_zh/v1/local/chain/run_cnn.sh b/egs/madcat_zh/v1/local/chain/run_cnn.sh
new file mode 120000
index 00000000000..df6f0a468c1
--- /dev/null
+++ b/egs/madcat_zh/v1/local/chain/run_cnn.sh
@@ -0,0 +1 @@
+tuning/run_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/madcat_zh/v1/local/chain/run_cnn_chainali.sh b/egs/madcat_zh/v1/local/chain/run_cnn_chainali.sh
new file mode 120000
index 00000000000..86568421fe1
--- /dev/null
+++ b/egs/madcat_zh/v1/local/chain/run_cnn_chainali.sh
@@ -0,0 +1 @@
+tuning/run_cnn_chainali_1b.sh
\ No newline at end of file
diff --git a/egs/madcat_zh/v1/local/chain/run_e2e_cnn.sh b/egs/madcat_zh/v1/local/chain/run_e2e_cnn.sh
new file mode 120000
index 00000000000..d26ba0182ce
--- /dev/null
+++ b/egs/madcat_zh/v1/local/chain/run_e2e_cnn.sh
@@ -0,0 +1 @@
+tuning/run_e2e_cnn_1a.sh
\ No newline at end of file
diff --git a/egs/madcat_zh/v1/local/chain/run_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
similarity index 100%
rename from egs/madcat_zh/v1/local/chain/run_cnn_1a.sh
rename to egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
diff --git a/egs/madcat_zh/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
similarity index 100%
rename from egs/madcat_zh/v1/local/chain/run_cnn_chainali_1a.sh
rename to egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
diff --git a/egs/madcat_zh/v1/local/chain/run_cnn_chainali_1b.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
similarity index 100%
rename from egs/madcat_zh/v1/local/chain/run_cnn_chainali_1b.sh
rename to egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
diff --git a/egs/madcat_zh/v1/local/chain/run_flatstart_cnn1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
similarity index 100%
rename from egs/madcat_zh/v1/local/chain/run_flatstart_cnn1a.sh
rename to egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh

From ff073765ea630ab00b5ca8d1785008337305df86 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 9 Jan 2019 20:02:59 -0500
Subject: [PATCH 07/18] making it similar to madcat chinese

---
 egs/madcat_zh/v1/local/download_data.sh    |  38 ------
 egs/madcat_zh/v1/local/extract_features.sh |  12 +-
 egs/madcat_zh/v1/local/make_features.py    | 138 ---------------------
 egs/madcat_zh/v1/local/prepare_data.sh     |  53 ++++----
 egs/madcat_zh/v1/run_end2end.sh            |  31 ++++-
 5 files changed, 58 insertions(+), 214 deletions(-)
 delete mode 100755 egs/madcat_zh/v1/local/download_data.sh
 delete mode 100755 egs/madcat_zh/v1/local/make_features.py

diff --git a/egs/madcat_zh/v1/local/download_data.sh b/egs/madcat_zh/v1/local/download_data.sh
deleted file mode 100755
index 6b4055f7205..00000000000
--- a/egs/madcat_zh/v1/local/download_data.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-
-# Copyright      2018  Ashish Arora
-# Apache 2.0
-
-# This script downloads data splits for MADCAT Chinese dataset.
-# It also check if madcat chinese data is present or not.
-
-download_dir1=/export/corpora/LDC/LDC2014T13/data
-train_split_url=http://www.openslr.org/resources/50/madcat.train.raw.lineid
-test_split_url=http://www.openslr.org/resources/50/madcat.test.raw.lineid
-dev_split_url=http://www.openslr.org/resources/50/madcat.dev.raw.lineid
-data_split_dir=data/download/datasplits
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh || exit 1;
-
-if [ -d $data_split_dir ]; then
-  echo "$0: Not downloading the data splits as it is already there."
-else
-  if [ ! -f $data_split_dir/madcat.train.raw.lineid ]; then
-    mkdir -p $data_split_dir
-    echo "$0: Downloading the data splits..."
-    wget -P $data_split_dir $train_split_url || exit 1;
-    wget -P $data_split_dir $test_split_url || exit 1;
-    wget -P $data_split_dir $dev_split_url || exit 1;
-  fi
-  echo "$0: Done downloading the data splits"
-fi
-
-if [ -d $download_dir1 ]; then
-  echo "$0: madcat chinese data directory is present."
-else
-  if [ ! -f $download_dir1/madcat/*.madcat.xml ]; then
-    echo "$0: please download madcat data..."
-  fi
-fi
diff --git a/egs/madcat_zh/v1/local/extract_features.sh b/egs/madcat_zh/v1/local/extract_features.sh
index 0660ae4b412..9fe588f31b8 100755
--- a/egs/madcat_zh/v1/local/extract_features.sh
+++ b/egs/madcat_zh/v1/local/extract_features.sh
@@ -1,10 +1,16 @@
 #!/bin/bash
+
 # Copyright   2017 Yiwen Shao
 #             2018 Ashish Arora
 
+# Apache 2.0
+# This script runs the make features script in parallel. 
+
 nj=4
 cmd=run.pl
 feat_dim=40
+augment='no_aug'
+verticle_shift=0
 echo "$0 $@"
 
 . ./cmd.sh
@@ -29,11 +35,11 @@ done
 # split images.scp
 utils/split_scp.pl $scp $split_scps || exit 1;
 
-echo "$0: Preparing the test and train feature files..."
 $cmd JOB=1:$nj $logdir/extract_features.JOB.log \
-  local/make_features.py $logdir/images.JOB.scp \
+  image/ocr/make_features.py $logdir/images.JOB.scp \
     --allowed_len_file_path $data/allowed_lengths.txt \
-    --feat-dim $feat_dim \| \
+    --feat-dim $feat_dim --augment_type $augment \
+    --vertical-shift $verticle_shift \| \
     copy-feats --compress=true --compression-method=7 \
     ark:- ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp
 
diff --git a/egs/madcat_zh/v1/local/make_features.py b/egs/madcat_zh/v1/local/make_features.py
deleted file mode 100755
index a21276d32c2..00000000000
--- a/egs/madcat_zh/v1/local/make_features.py
+++ /dev/null
@@ -1,138 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright      2017  Chun Chieh Chang
-#                2017  Ashish Arora
-#                2018  Hossein Hadian
-
-""" This script converts images to Kaldi-format feature matrices. The input to
-    this script is the path to a data directory, e.g. "data/train". This script
-    reads the images listed in images.scp and writes them to standard output
-    (by default) as Kaldi-formatted matrices (in text form). It also scales the
-    images so they have the same height (via --feat-dim). It can optionally pad
-    the images (on left/right sides) with white pixels.
-    If an 'image2num_frames' file is found in the data dir, it will be used
-    to enforce the images to have the specified length in that file by padding
-    white pixels (the --padding option will be ignored in this case). This relates
-    to end2end chain training.
-
-    eg. local/make_features.py data/train --feat-dim 40
-"""
-
-import argparse
-import os
-import sys
-import numpy as np
-from scipy import misc
-
-parser = argparse.ArgumentParser(description="""Converts images (in 'dir'/images.scp) to features and
-                                                writes them to standard output in text format.""")
-parser.add_argument('images_scp_path', type=str,
-                    help='Path of images.scp file')
-parser.add_argument('--allowed_len_file_path', type=str, default=None,
-                    help='If supplied, each images will be padded to reach the '
-                    'target length (this overrides --padding).')
-parser.add_argument('--out-ark', type=str, default='-',
-                    help='Where to write the output feature file')
-parser.add_argument('--feat-dim', type=int, default=40,
-                    help='Size to scale the height of all images')
-parser.add_argument('--padding', type=int, default=5,
-                    help='Number of white pixels to pad on the left'
-                    'and right side of the image.')
-
-
-args = parser.parse_args()
-
-
-def write_kaldi_matrix(file_handle, matrix, key):
-    file_handle.write(key + " [ ")
-    num_rows = len(matrix)
-    if num_rows == 0:
-        raise Exception("Matrix is empty")
-    num_cols = len(matrix[0])
-
-    for row_index in range(len(matrix)):
-        if num_cols != len(matrix[row_index]):
-            raise Exception("All the rows of a matrix are expected to "
-                            "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
-        if row_index != num_rows - 1:
-            file_handle.write("\n")
-    file_handle.write(" ]\n")
-
-
-def get_scaled_image(im):
-    scale_size = args.feat_dim
-    sx = im.shape[1]  # width
-    sy = im.shape[0]  # height
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
-    return im
-
-
-def horizontal_pad(im, allowed_lengths = None):
-    if allowed_lengths is None:
-        left_padding = right_padding = args.padding
-    else:  # Find an allowed length for the image
-        imlen = im.shape[1] # width
-        allowed_len = 0
-        for l in allowed_lengths:
-            if l > imlen:
-                allowed_len = l
-                break
-        if allowed_len == 0:
-            #  No allowed length was found for the image (the image is too long)
-            return None
-        padding = allowed_len - imlen
-        left_padding = int(padding // 2)
-        right_padding = padding - left_padding
-    dim_y = im.shape[0] # height
-    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
-                                           dtype=int), im), axis=1)
-    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
-                                                    dtype=int)), axis=1)
-    return im_pad1
-
-
-### main ###
-
-data_list_path = args.images_scp_path
-
-if args.out_ark == '-':
-    out_fh = sys.stdout
-else:
-    out_fh = open(args.out_ark,'wb')
-
-allowed_lengths = None
-allowed_len_handle = args.allowed_len_file_path
-if os.path.isfile(allowed_len_handle):
-    print("Found 'allowed_lengths.txt' file...", file=sys.stderr)
-    allowed_lengths = []
-    with open(allowed_len_handle) as f:
-        for line in f:
-            allowed_lengths.append(int(line.strip()))
-    print("Read {} allowed lengths and will apply them to the "
-          "features.".format(len(allowed_lengths)), file=sys.stderr)
-
-num_fail = 0
-num_ok = 0
-with open(data_list_path) as f:
-    for line in f:
-        line = line.strip()
-        line_vect = line.split(' ')
-        image_id = line_vect[0]
-        image_path = line_vect[1]
-        im = misc.imread(image_path)
-        im_scaled = get_scaled_image(im)
-        im_horizontal_padded = horizontal_pad(im_scaled, allowed_lengths)
-        if im_horizontal_padded is None:
-            num_fail += 1
-            continue
-        data = np.transpose(im_horizontal_padded, (1, 0))
-        data = np.divide(data, 255.0)
-        num_ok += 1
-        write_kaldi_matrix(out_fh, data, image_id)
-
-print('Generated features for {} images. Failed for {} (image too '
-      'long).'.format(num_ok, num_fail), file=sys.stderr)
diff --git a/egs/madcat_zh/v1/local/prepare_data.sh b/egs/madcat_zh/v1/local/prepare_data.sh
index c1accfb5e6c..6b4055f7205 100755
--- a/egs/madcat_zh/v1/local/prepare_data.sh
+++ b/egs/madcat_zh/v1/local/prepare_data.sh
@@ -1,43 +1,38 @@
 #!/bin/bash
 
-# Copyright      2017  Chun Chieh Chang
-#                2017  Ashish Arora
-#                2017  Hossein Hadian
+# Copyright      2018  Ashish Arora
 # Apache 2.0
 
-# This script downloads the Madcat Chinese handwriting database and prepares the training
-# and test data (i.e text, images.scp, utt2spk and spk2utt) by calling process_data.py.
-# It also downloads the LOB and Brown text corpora. It downloads the database files
-# only if they do not already exist in download directory.
+# This script downloads data splits for MADCAT Chinese dataset.
+# It also check if madcat chinese data is present or not.
 
-#  Eg. local/prepare_data.sh
-#  Eg. text file: 000_a01-000u-00 A MOVE to stop Mr. Gaitskell from
-#      utt2spk file: 000_a01-000u-00 000
-#      images.scp file: 000_a01-000u-00 data/local/lines/a01/a01-000u/a01-000u-00.png
-#      spk2utt file: 000 000_a01-000u-00 000_a01-000u-01 000_a01-000u-02 000_a01-000u-03
-
-stage=0
-download_dir=/export/corpora/LDC/LDC2014T13
+download_dir1=/export/corpora/LDC/LDC2014T13/data
+train_split_url=http://www.openslr.org/resources/50/madcat.train.raw.lineid
+test_split_url=http://www.openslr.org/resources/50/madcat.test.raw.lineid
+dev_split_url=http://www.openslr.org/resources/50/madcat.dev.raw.lineid
 data_split_dir=data/download/datasplits
 
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh || exit 1;
 
-if [[ ! -d $download_dir ]]; then
-  echo "$0: Warning: Couldn't find $download_dir."
-  echo ""
+if [ -d $data_split_dir ]; then
+  echo "$0: Not downloading the data splits as it is already there."
+else
+  if [ ! -f $data_split_dir/madcat.train.raw.lineid ]; then
+    mkdir -p $data_split_dir
+    echo "$0: Downloading the data splits..."
+    wget -P $data_split_dir $train_split_url || exit 1;
+    wget -P $data_split_dir $test_split_url || exit 1;
+    wget -P $data_split_dir $dev_split_url || exit 1;
+  fi
+  echo "$0: Done downloading the data splits"
 fi
 
-mkdir -p data/{train,test,dev}/lines
-if [ $stage -le 1 ]; then
-  local/process_data.py $download_dir $data_split_dir/madcat.train.raw.lineid data/train || exit 1
-  local/process_data.py $download_dir $data_split_dir/madcat.test.raw.lineid data/test || exit 1
-  local/process_data.py $download_dir $data_split_dir/madcat.dev.raw.lineid data/dev || exit 1
-
-  for dataset in train test dev; do
-    echo "$0: Fixing data directory for dataset: $dataset"
-    echo "Date: $(date)."
-    image/fix_data_dir.sh data/$dataset
-  done
+if [ -d $download_dir1 ]; then
+  echo "$0: madcat chinese data directory is present."
+else
+  if [ ! -f $download_dir1/madcat/*.madcat.xml ]; then
+    echo "$0: please download madcat data..."
+  fi
 fi
diff --git a/egs/madcat_zh/v1/run_end2end.sh b/egs/madcat_zh/v1/run_end2end.sh
index 7f759e54b57..fbfda3e6c76 100755
--- a/egs/madcat_zh/v1/run_end2end.sh
+++ b/egs/madcat_zh/v1/run_end2end.sh
@@ -12,18 +12,35 @@ password=
 # in "local/prepare_data.sh" to download the database:
 madcat_database=/export/corpora/LDC/LDC2014T13
 data_split_dir=data/download/datasplits
+overwrite=false
+corpus_dir=/export/corpora5/handwriting_ocr/corpus_data/zh/
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
 . ./utils/parse_options.sh  # e.g. this parses the above options
                             # if supplied.
+./local/check_tools.sh
 
 
-#./local/check_tools.sh
+# Start from stage=-1 for using extra corpus text
+if [ $stage -le -1 ]; then
+  echo "$(date): getting corpus text for language modelling..."
+  mkdir -p data/local/text/cleaned
+  cat $corpus_dir/* > data/local/text/zh.txt
+  head -20000 data/local/text/zh.txt > data/local/text/cleaned/val.txt
+  tail -n +20000 data/local/text/zh.txt > data/local/text/cleaned/corpus.txt
+fi
 
 if [ $stage -le 0 ]; then
-  local/download_data.sh --download-dir1 $madcat_database/data --data-split-dir $data_split_dir
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+
+  echo "$0: Preparing data..."
+  local/prepare_data.sh --download-dir1 $madcat_database/data --data-split-dir $data_split_dir
 
   for dataset in train test dev; do
     local/extract_lines.sh --nj $nj --cmd $cmd \
@@ -32,18 +49,20 @@ if [ $stage -le 0 ]; then
       data/${dataset}/lines
   done
 
-  echo "$0: Preparing data..."
-  local/prepare_data.sh --download-dir "$madcat_database"
+  echo "$0: Processing data..."
+  for set in dev train test; do
+    local/process_data.py $madcat_database $data_split_dir/madcat.$set.raw.lineid data/$set
+    image/fix_data_dir.sh data/$set
+  done
+
 fi
 
 mkdir -p data/{train,test}/data
 if [ $stage -le 1 ]; then
   image/get_image2num_frames.py --feat-dim 80 data/train  # This will be needed for the next command
-  
   # The next command creates a "allowed_lengths.txt" file in data/train
   # which will be used by local/make_features.py to enforce the images to
   # have allowed lengths. The allowed lengths will be spaced by 10% difference in length.
-  
   image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train
   echo "$0: Preparing the test and train feature files..."
   for dataset in train test; do

From b86beb2d878532bc1565ac9e5c9968a25ba10965 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 9 Jan 2019 23:43:14 -0500
Subject: [PATCH 08/18] updating parameters

---
 .../v1/local/chain/tuning/run_cnn_1a.sh       | 46 ++++++-----------
 .../local/chain/tuning/run_cnn_chainali_1a.sh | 41 ++++-----------
 .../local/chain/tuning/run_cnn_chainali_1b.sh | 50 ++++++-------------
 .../v1/local/chain/tuning/run_e2e_cnn_1a.sh   | 43 ++++++----------
 egs/madcat_zh/v1/local/prepare_data.sh        | 16 ++++--
 5 files changed, 69 insertions(+), 127 deletions(-)

diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
index 43d083099a3..d17b3e3c9c5 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh
@@ -20,7 +20,7 @@ set -e -o pipefail
 stage=0
 
 nj=50
-train_set=train_60
+train_set=train
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
@@ -32,28 +32,16 @@ reporting_email=
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-alignment_subsampling_factor=1
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_test=lang_test
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -122,7 +110,7 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
     --cmd "$cmd" $num_leaves ${train_data_dir} \
     $lang $ali_dir $tree_dir
@@ -183,13 +171,13 @@ if [ $stage -le 5 ]; then
     --chain.leaky-hmm-coefficient=0.1 \
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=$frame_subsampling_factor \
-    --trainer.srand=$srand \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=4 \
+    --chain.alignment-subsampling-factor=4 \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
+    --trainer.num-epochs=2 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=3 \
     --trainer.optimization.num-jobs-final=12 \
     --trainer.optimization.initial-effective-lrate=0.001 \
@@ -198,13 +186,9 @@ if [ $stage -le 5 ]; then
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=false \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -222,18 +206,18 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/lang_test \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
-    $dir/graph data/test_60 $dir/decode_test || exit 1;
+    $dir/graph data/test $dir/decode_test || exit 1;
 fi
+
+echo "$0: Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
+
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index ddc51965423..d53949dd3de 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -12,9 +12,8 @@
 set -e -o pipefail
 
 stage=0
-
 nj=30
-train_set=train_60
+train_set=train
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
@@ -27,28 +26,18 @@ reporting_email=
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-alignment_subsampling_factor=1
 # training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
 # we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_test=lang_test
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -118,7 +107,7 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
     --cmd "$cmd" $num_leaves ${train_data_dir} \
     $lang $ali_dir $tree_dir
@@ -137,7 +126,6 @@ if [ $stage -le 4 ]; then
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
-
   conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
@@ -181,12 +169,12 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=$alignment_subsampling_factor \
-    --trainer.srand=$srand \
+    --chain.frame-subsampling-factor=4 \
+    --chain.alignment-subsampling-factor=1 \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
+    --trainer.num-epochs=2 \
+    --trainer.frames-per-iter=2000000 \
     --trainer.optimization.num-jobs-initial=8 \
     --trainer.optimization.num-jobs-final=16 \
     --trainer.optimization.initial-effective-lrate=0.001 \
@@ -195,13 +183,9 @@ if [ $stage -le 5 ]; then
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=false \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -217,20 +201,15 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/lang_test \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
-    $dir/graph data/test_60 $dir/decode_test || exit 1;
+    $dir/graph data/test $dir/decode_test || exit 1;
 fi
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
index bdad37f0c1d..5a3b85422f6 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -17,9 +17,8 @@
 set -e -o pipefail
 
 stage=0
-
 nj=30
-train_set=train_60
+train_set=train
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
@@ -28,32 +27,20 @@ ali=tri3_ali
 chain_model_dir=exp/chain${nnet3_affix}/cnn${affix}
 common_egs_dir=
 reporting_email=
-
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-alignment_subsampling_factor=1
 # training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_test=lang_test
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -124,7 +111,7 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
     --cmd "$cmd" $num_leaves ${train_data_dir} \
     $lang $ali_dir $tree_dir
@@ -188,28 +175,24 @@ if [ $stage -le 5 ]; then
     --chain.leaky-hmm-coefficient=0.1 \
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
-    --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=$alignment_subsampling_factor \
-    --trainer.srand=$srand \
+    --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
+    --chain.frame-subsampling-factor=4 \
+    --chain.alignment-subsampling-factor=1 \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
-    --trainer.num-epochs=4 \
-    --trainer.frames-per-iter=1000000 \
-    --trainer.optimization.num-jobs-initial=3 \
-    --trainer.optimization.num-jobs-final=12 \
+    --trainer.num-epochs=2 \
+    --trainer.frames-per-iter=2000000 \
+    --trainer.optimization.num-jobs-initial=6 \
+    --trainer.optimization.num-jobs-final=16 \
     --trainer.optimization.initial-effective-lrate=0.001 \
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=false \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -227,18 +210,17 @@ if [ $stage -le 6 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/lang_test \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
     --frames-per-chunk $frames_per_chunk \
     --nj $nj --cmd "$cmd" \
-    $dir/graph data/test_60 $dir/decode_test || exit 1;
+    $dir/graph data/test $dir/decode_test || exit 1;
 fi
+
+echo "$0: Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 427086c3be8..0f6de7ba563 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -19,16 +19,9 @@ affix=1a
 
 # training options
 tdnn_dim=450
-num_epochs=4
-num_jobs_initial=3
-num_jobs_final=12
 minibatch_size=150=48,24/300=24,12/600=12,6/1200=4,4
 common_egs_dir=
-l2_regularize=0.00005
-frames_per_iter=1000000
-cmvn_opts="--norm-means=true --norm-vars=true"
 train_set=train
-lang_test=lang_test
 
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -78,17 +71,12 @@ fi
 if [ $stage -le 2 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
-
-  opts="l2-regularize=0.075"
-  opts_2="l2-regularize=0.075"
-  opts_3="l2-regularize=0.1"
-  common1="$opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
-  common2="$opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
-  common3="$opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
+  common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
+  common2="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
+  common3="required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=80 name=input
-
   conv-relu-batchnorm-layer name=cnn1 height-in=80 height-out=80 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-layer name=cnn2 height-in=80 height-out=40 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-layer name=cnn3 height-in=40 height-out=40 time-offsets=-4,-2,0,2,4 $common2
@@ -97,13 +85,12 @@ if [ $stage -le 2 ]; then
   conv-relu-batchnorm-layer name=cnn6 height-in=20 height-out=20 time-offsets=-1,0,1 $common3
   conv-relu-batchnorm-layer name=cnn7 height-in=20 height-out=20 time-offsets=-1,0,1 $common3
   conv-relu-batchnorm-layer name=cnn8 height-in=20 height-out=10 time-offsets=-1,0,1 $common3 height-subsample-out=2
-  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $opts_2
-  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $opts_2
-  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $opts_2
-
+  relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim
+  relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim
   ## adding the layers for chain branch
-  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $opts_2
-  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $opts_3
+  relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
+  output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
 EOF
 
   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
@@ -115,9 +102,9 @@ if [ $stage -le 3 ]; then
 
   steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
     --cmd "$cmd" \
-    --feat.cmvn-opts "$cmvn_opts" \
+    --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
     --chain.leaky-hmm-coefficient 0.1 \
-    --chain.l2-regularize $l2_regularize \
+    --chain.l2-regularize 0.00005 \
     --chain.apply-deriv-weights false \
     --egs.dir "$common_egs_dir" \
     --egs.stage $get_egs_stage \
@@ -125,11 +112,11 @@ if [ $stage -le 3 ]; then
     --chain.frame-subsampling-factor 4 \
     --chain.alignment-subsampling-factor 4 \
     --trainer.num-chunk-per-minibatch $minibatch_size \
-    --trainer.frames-per-iter $frames_per_iter \
-    --trainer.num-epochs $num_epochs \
+    --trainer.frames-per-iter 2000000 \
+    --trainer.num-epochs 2 \
     --trainer.optimization.momentum 0 \
-    --trainer.optimization.num-jobs-initial $num_jobs_initial \
-    --trainer.optimization.num-jobs-final $num_jobs_final \
+    --trainer.optimization.num-jobs-initial 6 \
+    --trainer.optimization.num-jobs-final 12 \
     --trainer.optimization.initial-effective-lrate 0.001 \
     --trainer.optimization.final-effective-lrate 0.0001 \
     --trainer.optimization.shrink-value 1.0 \
@@ -149,7 +136,7 @@ if [ $stage -le 4 ]; then
   # as long as phones.txt was compatible.
 
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/lang_test \
     $dir $dir/graph || exit 1;
 fi
 
diff --git a/egs/madcat_zh/v1/local/prepare_data.sh b/egs/madcat_zh/v1/local/prepare_data.sh
index 6b4055f7205..ba35b90b173 100755
--- a/egs/madcat_zh/v1/local/prepare_data.sh
+++ b/egs/madcat_zh/v1/local/prepare_data.sh
@@ -1,10 +1,20 @@
 #!/bin/bash
 
-# Copyright      2018  Ashish Arora
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+#                2017  Hossein Hadian
 # Apache 2.0
 
-# This script downloads data splits for MADCAT Chinese dataset.
-# It also check if madcat chinese data is present or not.
+# This script downloads the Madcat Chinese handwriting database and prepares the training
+# and test data (i.e text, images.scp, utt2spk and spk2utt) by calling process_data.py.
+# It also downloads the LOB and Brown text corpora. It downloads the database files
+# only if they do not already exist in download directory.
+
+#  Eg. local/prepare_data.sh
+#  Eg. text file: 000_a01-000u-00 A MOVE to stop Mr. Gaitskell from
+#      utt2spk file: 000_a01-000u-00 000
+#      images.scp file: 000_a01-000u-00 data/local/lines/a01/a01-000u/a01-000u-00.png
+#      spk2utt file: 000 000_a01-000u-00 000_a01-000u-01 000_a01-000u-02 000_a01-000u-03
 
 download_dir1=/export/corpora/LDC/LDC2014T13/data
 train_split_url=http://www.openslr.org/resources/50/madcat.train.raw.lineid

From 360710c5f92b044ea4bad9bebde2367503da6947 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 9 Jan 2019 23:50:57 -0500
Subject: [PATCH 09/18] updating run.sh

---
 egs/madcat_zh/v1/run.sh | 61 ++++++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/egs/madcat_zh/v1/run.sh b/egs/madcat_zh/v1/run.sh
index f591dcccb35..62bfa0ab05e 100755
--- a/egs/madcat_zh/v1/run.sh
+++ b/egs/madcat_zh/v1/run.sh
@@ -16,16 +16,36 @@ decode_gmm=true
 # The datasplits can be found on http://www.openslr.org/51/
 madcat_database=/export/corpora/LDC/LDC2014T13
 data_split_dir=data/download/datasplits
+overwrite=false
+corpus_dir=/export/corpora5/handwriting_ocr/corpus_data/zh/
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
 . ./utils/parse_options.sh  # e.g. this parses the above options
                             # if supplied.
+./local/check_tools.sh
+
+# Start from stage=-1 for using extra corpus text
+if [ $stage -le -1 ]; then
+  echo "$(date): getting corpus text for language modelling..."
+  mkdir -p data/local/text/cleaned
+  cat $corpus_dir/* > data/local/text/zh.txt
+  head -20000 data/local/text/zh.txt > data/local/text/cleaned/val.txt
+  tail -n +20000 data/local/text/zh.txt > data/local/text/cleaned/corpus.txt
+fi
 
 mkdir -p data/{train,test,dev}/lines
 if [ $stage -le 0 ]; then
-  local/download_data.sh --download-dir1 $madcat_database/data --data-split-dir $data_split_dir
+
+  if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+
+   echo "$0: Preparing data..."
+  local/prepare_data.sh --download-dir1 $madcat_database/data --data-split-dir $data_split_dir
 
   for dataset in train test dev; do
     local/extract_lines.sh --nj $nj --cmd $cmd \
@@ -34,20 +54,23 @@ if [ $stage -le 0 ]; then
       data/${dataset}/lines
   done
 
-  echo "$0: Preparing data..."
-  local/prepare_data.sh --download-dir $madcat_database
+  echo "$0: Processing data..."
+  for set in dev train test; do
+    local/process_data.py $madcat_database $data_split_dir/madcat.$set.raw.lineid data/$set
+    image/fix_data_dir.sh data/$set
+  done
 fi
 
 # This script uses feat-dim of 60 while the end2end version uses a feat-dim of 80
-mkdir -p data/{train_60,test_60,dev_60}/data
+mkdir -p data/{train,test,dev}/data
 if [ $stage -le 1 ]; then
   for dataset in train test dev; do
     for prepared in utt2spk text images.scp spk2utt; do
-      cp data/$dataset/$prepared data/${dataset}_60/$prepared
+      cp data/$dataset/$prepared data/${dataset}/$prepared
     done
 
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 60 data/${dataset}_60
-    steps/compute_cmvn_stats.sh data/${dataset}_60
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 60 data/${dataset}
+    steps/compute_cmvn_stats.sh data/${dataset}
   done
 fi
 
@@ -67,56 +90,56 @@ if [ $stage -le 3 ]; then
 fi
 
 if [ $stage -le 4 ]; then
-  steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train_60 \
+  steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/train \
     data/lang exp/mono
 fi
 
 if [ $stage -le 5 ] && $decode_gmm; then
   utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
 
-  steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test_60 \
+  steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test \
     exp/mono/decode_test
 fi
 
 if [ $stage -le 6 ]; then
-  steps/align_si.sh --nj $nj --cmd $cmd data/train_60 data/lang \
+  steps/align_si.sh --nj $nj --cmd $cmd data/train data/lang \
     exp/mono exp/mono_ali
 
   steps/train_deltas.sh --cmd $cmd --context-opts "--context-width=2 --central-position=1" \
-    50000 20000 data/train_60 data/lang \
+    50000 20000 data/train data/lang \
     exp/mono_ali exp/tri
 fi
 
 if [ $stage -le 7 ] && $decode_gmm; then
   utils/mkgraph.sh data/lang_test exp/tri exp/tri/graph
 
-  steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test_60 \
+  steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test \
     exp/tri/decode_test
 fi
 
 if [ $stage -le 8 ]; then
-  steps/align_si.sh --nj $nj --cmd $cmd data/train_60 data/lang \
+  steps/align_si.sh --nj $nj --cmd $cmd data/train data/lang \
     exp/tri exp/tri_ali
 
   steps/train_lda_mllt.sh --cmd $cmd \
     --splice-opts "--left-context=3 --right-context=3" \
     --context-opts "--context-width=2 --central-position=1" 50000 20000 \
-    data/train_60 data/lang exp/tri_ali exp/tri2
+    data/train data/lang exp/tri_ali exp/tri2
 fi
 
 if [ $stage -le 9 ] && $decode_gmm; then
   utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph
 
   steps/decode.sh --nj $nj --cmd $cmd exp/tri2/graph \
-    data/test_60 exp/tri2/decode_test
+    data/test exp/tri2/decode_test
 fi
 
 if [ $stage -le 10 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
-    data/train_60 data/lang exp/tri2 exp/tri2_ali
+    data/train data/lang exp/tri2 exp/tri2_ali
 
   steps/train_sat.sh --cmd $cmd --context-opts "--context-width=2 --central-position=1" \
-    50000 20000 data/train_60 data/lang \
+    50000 20000 data/train data/lang \
     exp/tri2_ali exp/tri3
 fi
 
@@ -124,12 +147,12 @@ if [ $stage -le 11 ] && $decode_gmm; then
   utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph
 
   steps/decode_fmllr.sh --nj $nj --cmd $cmd exp/tri3/graph \
-    data/test_60 exp/tri3/decode_test
+    data/test exp/tri3/decode_test
 fi
 
 if [ $stage -le 12 ]; then
   steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \
-    data/train_60 data/lang exp/tri3 exp/tri3_ali
+    data/train data/lang exp/tri3 exp/tri3_ali
 fi
 
 if [ $stage -le 13 ]; then

From 051f3fea4dd5a1967fcd6475015ad28f5a17f2b0 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 10 Jan 2019 00:03:45 -0500
Subject: [PATCH 10/18] updating run.sh

---
 egs/madcat_zh/v1/run.sh | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/egs/madcat_zh/v1/run.sh b/egs/madcat_zh/v1/run.sh
index 62bfa0ab05e..b3ef370c830 100755
--- a/egs/madcat_zh/v1/run.sh
+++ b/egs/madcat_zh/v1/run.sh
@@ -61,16 +61,11 @@ if [ $stage -le 0 ]; then
   done
 fi
 
-# This script uses feat-dim of 60 while the end2end version uses a feat-dim of 80
 mkdir -p data/{train,test,dev}/data
 if [ $stage -le 1 ]; then
   for dataset in train test dev; do
-    for prepared in utt2spk text images.scp spk2utt; do
-      cp data/$dataset/$prepared data/${dataset}/$prepared
-    done
-
-    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 60 data/${dataset}
-    steps/compute_cmvn_stats.sh data/${dataset}
+    local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 60 data/$dataset
+    steps/compute_cmvn_stats.sh data/$dataset
   done
 fi
 

From 0d492b141fbdcd71217019adb2ec4f6c62937005 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 10 Jan 2019 01:01:02 -0500
Subject: [PATCH 11/18] adding aachen splits

---
 egs/iam/v1/local/chain/compare_wer.sh     | 46 ++++++++++++
 egs/iam/v1/local/prepare_data.sh          | 36 +++++++---
 egs/iam/v1/local/process_aachen_splits.py | 88 +++++++++++++++++++++++
 egs/iam/v1/run.sh                         | 12 +++-
 egs/iam/v1/run_end2end.sh                 | 12 +++-
 5 files changed, 183 insertions(+), 11 deletions(-)
 create mode 100755 egs/iam/v1/local/process_aachen_splits.py

diff --git a/egs/iam/v1/local/chain/compare_wer.sh b/egs/iam/v1/local/chain/compare_wer.sh
index ad90710b13f..2ce14e13694 100755
--- a/egs/iam/v1/local/chain/compare_wer.sh
+++ b/egs/iam/v1/local/chain/compare_wer.sh
@@ -27,6 +27,14 @@ for x in $*; do
 done
 echo
 
+echo -n "# WER (rescored)             "
+for x in $*; do
+  wer="--"
+  [ -d $x/decode_test_rescored ] && wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
 echo -n "# CER                        "
 for x in $*; do
   cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
@@ -34,6 +42,44 @@ for x in $*; do
 done
 echo
 
+echo -n "# CER (rescored)             "
+for x in $*; do
+  cer="--"
+  [ -d $x/decode_test_rescored ] && cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# WER val                    "
+for x in $*; do
+  wer=$(cat $x/decode_val/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# WER (rescored) val         "
+for x in $*; do
+  wer="--"
+  [ -d $x/decode_val_rescored ] && wer=$(cat $x/decode_val_rescored/scoring_kaldi/best_wer | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+echo -n "# CER val                    "
+for x in $*; do
+  cer=$(cat $x/decode_val/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
+echo -n "# CER (rescored) val         "
+for x in $*; do
+  cer="--"
+  [ -d $x/decode_val_rescored ] && cer=$(cat $x/decode_val_rescored/scoring_kaldi/best_cer | awk '{print $2}')
+  printf "% 10s" $cer
+done
+echo
+
 if $used_epochs; then
   exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
 fi
diff --git a/egs/iam/v1/local/prepare_data.sh b/egs/iam/v1/local/prepare_data.sh
index 9c01ac90f28..dc07f07e318 100755
--- a/egs/iam/v1/local/prepare_data.sh
+++ b/egs/iam/v1/local/prepare_data.sh
@@ -18,6 +18,7 @@
 
 stage=0
 download_dir=data/download
+process_aachen_split=false
 wellington_dir=
 username=
 password=       # username and password for downloading the IAM database
@@ -53,6 +54,8 @@ ascii_url=http://www.fki.inf.unibe.ch/DBs/iamDB/data/ascii/ascii.tgz
 brown_corpus_url=http://www.sls.hawaii.edu/bley-vroman/brown.txt
 lob_corpus_url=http://ota.ox.ac.uk/text/0167.zip
 wellington_corpus_loc=/export/corpora5/Wellington/WWC/
+aachen_split_url=http://www.openslr.org/resources/56/splits.zip
+aachen_splits=data/local/aachensplits
 mkdir -p $download_dir data/local
 
 # download and extact images and transcription
@@ -144,6 +147,18 @@ else
   echo "$0: Wellington Corpus not included because wellington_dir not provided"
 fi
 
+if [ -d $aachen_splits ]; then
+  echo "$0: Not downloading the Aachen splits as it is already there."
+else
+  if [ ! -f $aachen_splits/splits.zip ]; then
+    echo "$0: Downloading Aachen splits ..."
+    mkdir -p $aachen_splits
+    wget -P $aachen_splits/ $aachen_split_url || exit 1;
+  fi
+  unzip $aachen_splits/splits.zip -d $aachen_splits || exit 1;
+  echo "$0: Done downloading and extracting Aachen splits"
+fi
+
 mkdir -p data/{train,test,val}
 file_name=largeWriterIndependentTextLineRecognitionTask
 
@@ -160,12 +175,17 @@ cat $train_old > $train_new
 cat $test_old > $test_new
 cat $val1_old $val2_old > $val_new
 
-if [ $stage -le 0 ]; then
-  local/process_data.py data/local data/train --dataset train || exit 1
-  local/process_data.py data/local data/test --dataset test || exit 1
-  local/process_data.py data/local data/val --dataset validation || exit 1
-
-  image/fix_data_dir.sh data/train
-  image/fix_data_dir.sh data/test
-  image/fix_data_dir.sh data/val
+if $process_aachen_split; then
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/train --dataset train || exit 1
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/test --dataset test || exit 1
+    local/process_aachen_splits.py data/local $aachen_splits/splits data/val --dataset validation || exit 1
+else
+    local/process_data.py data/local data/train --dataset train || exit 1
+    local/process_data.py data/local data/test --dataset test || exit 1
+    local/process_data.py data/local data/val --dataset validation || exit 1
 fi
+
+image/fix_data_dir.sh data/train
+image/fix_data_dir.sh data/test
+image/fix_data_dir.sh data/val
+
diff --git a/egs/iam/v1/local/process_aachen_splits.py b/egs/iam/v1/local/process_aachen_splits.py
new file mode 100755
index 00000000000..cb6a6d4f0d8
--- /dev/null
+++ b/egs/iam/v1/local/process_aachen_splits.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+# Copyright      2017  Chun Chieh Chang
+#                2017  Ashish Arora
+
+""" This script reads the extracted IAM database files and creates
+    the following files (for the data subset selected via --dataset):
+    text, utt2spk, images.scp.
+
+  Eg. local/process_aachen_splits.py data/local data/train data --dataset train
+  Eg. text file: 000_a01-000u-00 A MOVE to stop Mr. Gaitskell from
+      utt2spk file: 000_a01-000u-00 000
+      images.scp file: 000_a01-000u-00 data/local/lines/a01/a01-000u/a01-000u-00.png
+"""
+
+import argparse
+import os
+import sys
+import xml.dom.minidom as minidom
+
+parser = argparse.ArgumentParser(description="""Creates text, utt2spk
+                                                and images.scp files.""")
+parser.add_argument('database_path', type=str,
+                    help='Path to the downloaded (and extracted) IAM data')
+parser.add_argument('split_path', type=str,
+                    help='location of the train/test/val set')
+parser.add_argument('out_dir', type=str,
+                    help='location to write output files.')
+parser.add_argument('--dataset', type=str, default='train',
+                    choices=['train', 'test','validation'],
+                    help='Subset of data to process.')
+args = parser.parse_args()
+
+text_file = os.path.join(args.out_dir + '/', 'text')
+text_fh = open(text_file, 'w')
+
+utt2spk_file = os.path.join(args.out_dir + '/', 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w')
+
+image_file = os.path.join(args.out_dir + '/', 'images.scp')
+image_fh = open(image_file, 'w')
+
+dataset_path = os.path.join(args.split_path,
+                            args.dataset + '.uttlist')
+
+text_file_path = os.path.join(args.database_path,
+                              'ascii','lines.txt')
+text_dict = {}
+def process_text_file_for_word_model():
+  with open (text_file_path, 'rt') as in_file:
+    for line in in_file:
+      if line[0]=='#':
+        continue
+      line = line.strip()
+      utt_id = line.split(' ')[0]
+      text_vect = line.split(' ')[8:]
+      text = "".join(text_vect)
+      text = text.replace("|", " ")
+      text_dict[utt_id] = text
+
+
+### main ###
+
+print("Processing '{}' data...".format(args.dataset))
+process_text_file_for_word_model()
+
+with open(dataset_path) as f:
+  for line in f:
+    line = line.strip()
+    line_vect = line.split('-')
+    xml_file = line_vect[0] + '-' + line_vect[1]
+    xml_path = os.path.join(args.database_path, 'xml', xml_file + '.xml')
+    doc = minidom.parse(xml_path)
+    form_elements = doc.getElementsByTagName('form')[0]
+    writer_id = form_elements.getAttribute('writer-id')
+    outerfolder = form_elements.getAttribute('id')[0:3]
+    innerfolder = form_elements.getAttribute('id')
+    lines_path = os.path.join(args.database_path, 'lines',
+                              outerfolder, innerfolder)
+    for file in os.listdir(lines_path):
+      if file.endswith(".png"):
+        image_file_path = os.path.join(lines_path, file)
+        base_name = os.path.splitext(os.path.basename(image_file_path))[0]
+        text =  text_dict[base_name]
+        utt_id = writer_id + '_' + base_name
+        text_fh.write(utt_id + ' ' + text + '\n')
+        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+        image_fh.write(utt_id + ' ' + image_file_path + '\n')
diff --git a/egs/iam/v1/run.sh b/egs/iam/v1/run.sh
index 7907f018c04..049223aba07 100755
--- a/egs/iam/v1/run.sh
+++ b/egs/iam/v1/run.sh
@@ -21,6 +21,8 @@ iam_database=/export/corpora5/handwriting_ocr/IAM
 # "https://www.victoria.ac.nz/lals/resources/corpora-default"
 wellington_database=/export/corpora5/Wellington/WWC/
 train_set=train_aug
+process_aachen_split=false
+overwrite=false
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
@@ -40,7 +42,8 @@ if [ $stage -le 0 ]; then
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
-    --username "$username" --password "$password"
+    --username "$username" --password "$password" \
+    --process_aachen_split $process_aachen_split
 fi
 mkdir -p data/{train,test,val}/data
 
@@ -84,7 +87,9 @@ if [ $stage -le 4 ]; then
   local/prepare_dict.sh --vocab-size 500k --dir data/local/dict  # this is for training
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.95 \
                         data/local/dict "<unk>" data/lang/temp data/lang
-
+  silphonelist=`cat data/lang/phones/silence.csl`
+  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo
   # This is for decoding. We use a 50k lexicon to be consistent with the papers
   # reporting WERs on IAM:
   local/prepare_dict.sh --vocab-size 50k --dir data/local/dict_50k  # this is for decoding
@@ -99,6 +104,9 @@ if [ $stage -le 4 ]; then
   utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 \
                         --unk-fst exp/unk_lang_model/unk_fst.txt \
                         data/local/dict_50k "<unk>" data/lang_unk/temp data/lang_unk
+  silphonelist=`cat data/lang/phones/silence.csl`
+  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang_unk/topo
   cp data/lang_test/G.fst data/lang_unk/G.fst
 fi
 
diff --git a/egs/iam/v1/run_end2end.sh b/egs/iam/v1/run_end2end.sh
index 58461c740b6..fa64ea944a1 100755
--- a/egs/iam/v1/run_end2end.sh
+++ b/egs/iam/v1/run_end2end.sh
@@ -6,6 +6,8 @@ stage=0
 nj=20
 username=
 password=
+process_aachen_split=false
+overwrite=false
 # iam_database points to the database path on the JHU grid. If you have not
 # already downloaded the database you can set it to a local directory
 # like "data/download" and follow the instructions
@@ -24,10 +26,18 @@ train_set=train_aug
                             # if supplied.
 ./local/check_tools.sh
 if [ $stage -le 0 ]; then
+
+ if [ -f data/train/text ] && ! $overwrite; then
+    echo "$0: Not processing, probably script have run from wrong stage"
+    echo "Exiting with status 1 to avoid data corruption"
+    exit 1;
+  fi
+
   echo "$0: Preparing data..."
   local/prepare_data.sh --download-dir "$iam_database" \
     --wellington-dir "$wellington_database" \
-    --username "$username" --password "$password"
+    --username "$username" --password "$password" \
+    --process_aachen_split $process_aachen_split
 fi
 mkdir -p data/{train,test,val}/data
 

From 3775cfe46d6482330c99abe77260c02547490a30 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 10 Jan 2019 01:13:26 -0500
Subject: [PATCH 12/18] minor change

---
 egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 0f6de7ba563..9b5106a892d 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -56,7 +56,7 @@ if [ $stage -le 0 ]; then
 fi
 
 if [ $stage -le 1 ]; then
-  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 30 --cmd "$cmd" \
+  steps/nnet3/chain/e2e/prepare_e2e.sh --nj 70 --cmd "$cmd" \
                                        --shared-phones true \
                                        --type mono \
                                        data/$train_set $lang $treedir
@@ -116,7 +116,7 @@ if [ $stage -le 3 ]; then
     --trainer.num-epochs 2 \
     --trainer.optimization.momentum 0 \
     --trainer.optimization.num-jobs-initial 6 \
-    --trainer.optimization.num-jobs-final 12 \
+    --trainer.optimization.num-jobs-final 16 \
     --trainer.optimization.initial-effective-lrate 0.001 \
     --trainer.optimization.final-effective-lrate 0.0001 \
     --trainer.optimization.shrink-value 1.0 \
@@ -143,7 +143,7 @@ fi
 if [ $stage -le 5 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj 30 --cmd "$cmd" \
+    --nj 70 --cmd "$cmd" \
     $dir/graph data/test $dir/decode_test || exit 1;
 fi
 

From 6f77b74d297a1907ea3559a4ac55fe3f9f1b411e Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 10 Jan 2019 01:23:04 -0500
Subject: [PATCH 13/18] adding val decoding

---
 .../local/chain/tuning/run_cnn_chainali_1a.sh | 21 +++++++++++--------
 .../local/chain/tuning/run_cnn_chainali_1b.sh | 19 ++++++++++-------
 .../local/chain/tuning/run_cnn_chainali_1c.sh | 19 ++++++++++-------
 3 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index ee3a1a3d92c..82304351533 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -8,6 +8,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
@@ -32,7 +33,8 @@ tdnn_dim=450
 # training options
 srand=0
 remove_egs=false
-lang_test=lang_test
+lang_test=lang_unk
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -219,12 +221,13 @@ fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
index c6876fbafcb..c7446d7283e 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -22,6 +22,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
@@ -47,6 +48,7 @@ tdnn_dim=450
 srand=0
 remove_egs=false
 lang_test=lang_unk
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -235,12 +237,13 @@ fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
index 54c52d913de..cff2dd7862f 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
@@ -20,6 +20,7 @@ stage=0
 
 nj=30
 train_set=train
+decode_val=true
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
@@ -44,6 +45,7 @@ tdnn_dim=450
 srand=0
 remove_egs=false
 lang_test=lang_unk
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -236,12 +238,13 @@ fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
 fi
+
+echo "Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir

From 51b2043ca3472201ceef1e1b05322d8052b851b8 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 10 Jan 2019 01:25:30 -0500
Subject: [PATCH 14/18] minor change

---
 egs/iam/v1/run.sh         | 2 +-
 egs/iam/v1/run_end2end.sh | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/egs/iam/v1/run.sh b/egs/iam/v1/run.sh
index 049223aba07..85811b6cb3d 100755
--- a/egs/iam/v1/run.sh
+++ b/egs/iam/v1/run.sh
@@ -106,7 +106,7 @@ if [ $stage -le 4 ]; then
                         data/local/dict_50k "<unk>" data/lang_unk/temp data/lang_unk
   silphonelist=`cat data/lang/phones/silence.csl`
   nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
-  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang_unk/topo
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang_unk/phones.txt >data/lang_unk/topo
   cp data/lang_test/G.fst data/lang_unk/G.fst
 fi
 
diff --git a/egs/iam/v1/run_end2end.sh b/egs/iam/v1/run_end2end.sh
index fa64ea944a1..0a8b014715f 100755
--- a/egs/iam/v1/run_end2end.sh
+++ b/egs/iam/v1/run_end2end.sh
@@ -98,6 +98,10 @@ if [ $stage -le 4 ]; then
                             data/local/dict_50k exp/unk_lang_model
   utils/prepare_lang.sh --unk-fst exp/unk_lang_model/unk_fst.txt \
                         data/local/dict_50k "<unk>" data/lang_unk/temp data/lang_unk
+
+  silphonelist=`cat data/lang/phones/silence.csl`
+  nonsilphonelist=`cat data/lang/phones/nonsilence.csl`
+  local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang_unk/phones.txt >data/lang_unk/topo
   cp data/lang_test/G.fst data/lang_unk/G.fst
 fi
 

From 38b85c6ef362b40c3246f164967fd68a767b3872 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 10 Jan 2019 01:26:50 -0500
Subject: [PATCH 15/18] minor change

---
 egs/iam/v1/local/chain/compare_wer.sh | 32 ---------------------------
 1 file changed, 32 deletions(-)

diff --git a/egs/iam/v1/local/chain/compare_wer.sh b/egs/iam/v1/local/chain/compare_wer.sh
index 2ce14e13694..4a2cc29481c 100755
--- a/egs/iam/v1/local/chain/compare_wer.sh
+++ b/egs/iam/v1/local/chain/compare_wer.sh
@@ -27,14 +27,6 @@ for x in $*; do
 done
 echo
 
-echo -n "# WER (rescored)             "
-for x in $*; do
-  wer="--"
-  [ -d $x/decode_test_rescored ] && wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}')
-  printf "% 10s" $wer
-done
-echo
-
 echo -n "# CER                        "
 for x in $*; do
   cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
@@ -42,14 +34,6 @@ for x in $*; do
 done
 echo
 
-echo -n "# CER (rescored)             "
-for x in $*; do
-  cer="--"
-  [ -d $x/decode_test_rescored ] && cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}')
-  printf "% 10s" $cer
-done
-echo
-
 echo -n "# WER val                    "
 for x in $*; do
   wer=$(cat $x/decode_val/scoring_kaldi/best_wer | awk '{print $2}')
@@ -57,14 +41,6 @@ for x in $*; do
 done
 echo
 
-echo -n "# WER (rescored) val         "
-for x in $*; do
-  wer="--"
-  [ -d $x/decode_val_rescored ] && wer=$(cat $x/decode_val_rescored/scoring_kaldi/best_wer | awk '{print $2}')
-  printf "% 10s" $wer
-done
-echo
-
 echo -n "# CER val                    "
 for x in $*; do
   cer=$(cat $x/decode_val/scoring_kaldi/best_cer | awk '{print $2}')
@@ -72,14 +48,6 @@ for x in $*; do
 done
 echo
 
-echo -n "# CER (rescored) val         "
-for x in $*; do
-  cer="--"
-  [ -d $x/decode_val_rescored ] && cer=$(cat $x/decode_val_rescored/scoring_kaldi/best_cer | awk '{print $2}')
-  printf "% 10s" $cer
-done
-echo
-
 if $used_epochs; then
   exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
 fi

From a04aa19e40ab8134d53789da08d281252c04300a Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 10 Jan 2019 17:05:08 -0500
Subject: [PATCH 16/18] adding result

---
 .../v1/local/chain/tuning/run_e2e_cnn_1a.sh   | 41 +++++--------------
 egs/madcat_zh/v1/run_end2end.sh               | 22 ++++++++--
 2 files changed, 28 insertions(+), 35 deletions(-)

diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index 9b5106a892d..ffc9a4c8a14 100755
--- a/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_zh/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -1,16 +1,18 @@
 #!/bin/bash
 # Copyright    2017  Hossein Hadian
 
-# local/chain/compare_wer.sh exp/chain/cnn_1a/ exp/chain/cnn_chainali_1b/ exp/chain/e2e_cnn_1a/
-# System                         cnn_1a cnn_chainali_1b e2e_cnn_1a
-# WER                             13.51      6.76     10.55
-# Final train prob              -0.0291   -0.0138   -0.0702
-# Final valid prob              -0.0712   -0.0171   -0.0578
-# Final train prob (xent)       -0.3847   -0.4169
-# Final valid prob (xent)       -0.4962   -0.5040
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
+# System                      e2e_cnn_1a
+# WER                             10.41
+# Final train prob              -0.0536
+# Final valid prob              -0.0489
+# Final train prob (xent)
+# Final valid prob (xent)
 
-set -e
+# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
+# exp/chain/e2e_cnn_1a/: num-iters=63 nj=6..12 num-params=6.1M dim=80->5760 combine=-0.048->-0.048 (over 5) logprob:train/valid[41,62,final]=(-0.062,-0.065,-0.054/-0.058,-0.062,-0.049)
 
+set -e
 # configs for 'chain'
 stage=0
 train_stage=-10
@@ -126,26 +128,3 @@ if [ $stage -le 3 ]; then
     --tree-dir $treedir \
     --dir $dir  || exit 1;
 fi
-
-if [ $stage -le 4 ]; then
-  # The reason we are using data/lang here, instead of $lang, is just to
-  # emphasize that it's not actually important to give mkgraph.sh the
-  # lang directory with the matched topology (since it gets the
-  # topology file from the model).  So you could give it a different
-  # lang directory, one that contained a wordlist and LM of your choice,
-  # as long as phones.txt was compatible.
-
-  utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/lang_test \
-    $dir $dir/graph || exit 1;
-fi
-
-if [ $stage -le 5 ]; then
-  frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --nj 70 --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
-fi
-
-echo "Done. Date: $(date). Results:"
-local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_zh/v1/run_end2end.sh b/egs/madcat_zh/v1/run_end2end.sh
index fbfda3e6c76..7e0fc1e25d1 100755
--- a/egs/madcat_zh/v1/run_end2end.sh
+++ b/egs/madcat_zh/v1/run_end2end.sh
@@ -81,13 +81,27 @@ if [ $stage -le 2 ]; then
 fi
 
 if [ $stage -le 3 ]; then
+  echo "$0: calling the flat-start chain recipe..."
+  local/chain/run_e2e_cnn.sh
+fi
+
+lang_decode=data/lang_test
+decode_e2e=true
+if [ $stage -le 4 ]; then
   echo "$0: Estimating a language model for decoding..."
   local/train_lm.sh
   utils/format_lm.sh data/lang data/local/local_lm/data/arpa/3gram_unpruned.arpa.gz \
-                     data/local/dict/lexicon.txt data/lang_test
+                     data/local/dict/lexicon.txt $lang_decode
 fi
 
-if [ $stage -le 4 ]; then
-  echo "$0: calling the flat-start chain recipe..."
-  local/chain/run_flatstart_cnn1a.sh
+if [ $stage -le 5 ] && $decode_e2e; then
+  echo "$0: $(date) stage 5: decoding end2end setup..."
+  utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode \
+    exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1;
+
+  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 --nj $nj --cmd "$cmd" \
+    exp/chain/e2e_cnn_1a/graph data/test exp/chain/e2e_cnn_1a/decode_test || exit 1;
+
+  echo "$0: Done. Date: $(date). Results:"
+  local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
 fi

From 0589950959eb6eabeb21b7de92f8e226dcc44463 Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Thu, 10 Jan 2019 18:04:25 -0500
Subject: [PATCH 17/18] updating results

---
 .../v1/local/chain/tuning/run_e2e_cnn_1a.sh     | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index dc6ed4b7c2b..3caf8ae4494 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -2,18 +2,17 @@
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
-
 # ./local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
-# System                      e2e_cnn_1a
-# WER                              5.73
-# WER (rescored)                   5.67
-# CER                              1.45
-# CER (rescored)                   1.42
-# Final train prob              -0.0934
-# Final valid prob              -0.0746
+# System                      e2e_cnn_1a     e2e_cnn_1a (with extra corpus text)
+# WER                              9.47            5.73
+# WER (rescored)                   8.05            5.67
+# CER                              2.45            1.45
+# CER (rescored)                   2.10            1.42
+# Final train prob              -0.0934         -0.0934
+# Final valid prob              -0.0746         -0.0746
 # Final train prob (xent)
 # Final valid prob (xent)
-# Parameters                      2.94M
+# Parameters                      2.94M           2.94M
 
 # steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
 # exp/chain/e2e_cnn_1a/: num-iters=98 nj=6..16 num-params=2.9M dim=40->330 combine=-0.071->-0.070 (over 5) logprob:train/valid[64,97,final]=(-0.089,-0.084,-0.093/-0.075,-0.073,-0.075)

From 3be4e4135a6d6b2581e4a7c34003442486eb863d Mon Sep 17 00:00:00 2001
From: aarora8 <aarora8@jhu.edu>
Date: Wed, 16 Jan 2019 00:35:05 -0500
Subject: [PATCH 18/18] bug fixes, updating results

---
 egs/iam/v1/local/chain/tuning/run_cnn_1a.sh   | 80 ++++++++----------
 .../local/chain/tuning/run_cnn_chainali_1a.sh | 57 ++++++-------
 .../local/chain/tuning/run_cnn_chainali_1b.sh | 68 +++++----------
 .../local/chain/tuning/run_cnn_chainali_1c.sh | 61 +++++---------
 .../local/chain/tuning/run_cnn_chainali_1d.sh | 84 ++++++++-----------
 .../local/chain/tuning/run_cnn_e2eali_1a.sh   | 74 +++++++---------
 .../local/chain/tuning/run_cnn_e2eali_1b.sh   | 78 +++++++----------
 .../local/chain/tuning/run_cnn_e2eali_1c.sh   | 67 +++++----------
 .../v1/local/chain/tuning/run_e2e_cnn_1a.sh   | 28 +++----
 egs/iam/v1/local/gen_topo.py                  |  2 +-
 egs/iam/v1/local/train_lm.sh                  |  9 +-
 .../v1/local/chain/tuning/run_cnn_1a.sh       | 15 +---
 .../local/chain/tuning/run_cnn_chainali_1a.sh | 16 +---
 egs/madcat_ar/v1/local/process_data.py        |  2 +-
 egs/madcat_ar/v1/run.sh                       | 17 ++--
 15 files changed, 250 insertions(+), 408 deletions(-)

diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
index 41a76920e37..1253bbe5aa3 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh
@@ -4,23 +4,23 @@
 #              2017 Chun Chieh Chang
 #              2017 Ashish Arora
 
-# steps/info/chain_dir_info.pl exp/chain/cnn_1a/
-# exp/chain/cnn_1a/: num-iters=21 nj=2..4 num-params=4.4M dim=40->364 combine=-0.021->-0.015 xent:train/valid[13,20,final]=(-1.05,-0.701,-0.591/-1.30,-1.08,-1.00) logprob:train/valid[13,20,final]=(-0.061,-0.034,-0.030/-0.107,-0.101,-0.098)
-
 # local/chain/compare_wer.sh exp/chain/cnn_1a/
-# System                         cnn_1a
-# WER                             18.52
-# CER                             10.07
-# Final train prob              -0.0077
-# Final valid prob              -0.0970
-# Final train prob (xent)       -0.5484
-# Final valid prob (xent)       -0.9643
-# Parameters                      4.36M
+# System                         cnn_1a(dict_50k)      cnn_1a(dict_50k + unk model)
+# WER                              16.88                    15.18
+# CER                               8.52                    7.58
+# WER val                          16.17                    13.53
+# CER val                           7.15                    5.89
+# Final train prob               -0.0299
+# Final valid prob               -0.0574
+# Final train prob (xent)        -0.3912
+# Final valid prob (xent)        -0.6439
+# Parameters                       4.36M
 
-set -e -o pipefail
+# steps/info/chain_dir_info.pl exp/chain/cnn_1a/
+# exp/chain/cnn_1a/: num-iters=42 nj=2..4 num-params=4.4M dim=40->368 combine=-0.029->-0.029 (over 2) xent:train/valid[27,41,final]=(-0.522,-0.394,-0.391/-0.695,-0.644,-0.644) logprob:train/valid[27,41,final]=(-0.035,-0.030,-0.030/-0.056,-0.057,-0.057)
 
+set -e -o pipefail
 stage=0
-
 nj=30
 train_set=train
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
@@ -34,28 +34,21 @@ reporting_email=
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-alignment_subsampling_factor=1
 # training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
 # training options
-srand=0
-remove_egs=false
-lang_test=lang_unk
+lang_decode=lang_unk
+decode_val=true
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -109,7 +102,7 @@ fi
 if [ $stage -le 2 ]; then
   # Get the alignments as lattices (gives the chain training more freedom).
   # use the same num-jobs as the alignments
-  steps/align_fmllr_lats.sh --nj $nj --cmd "$cmd" ${train_data_dir} \
+  steps/align_fmllr_lats.sh --nj $nj --cmd "$cmd" $train_data_dir \
     data/lang $gmm_dir $lat_dir
   rm $lat_dir/fsts.*.gz # save space
 fi
@@ -124,9 +117,9 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
@@ -134,7 +127,6 @@ fi
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
-
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
   common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
@@ -155,7 +147,6 @@ if [ $stage -le 4 ]; then
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
-
   # adding the layers for xent branch
   # This block prints the configs for a separate output that will be
   # trained with a cross-entropy objective in the 'chain' mod?els... this
@@ -186,9 +177,9 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=$frame_subsampling_factor \
-    --trainer.srand=$srand \
+    --chain.frame-subsampling-factor=4 \
+    --chain.alignment-subsampling-factor=4 \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=1000000 \
@@ -198,15 +189,10 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=false \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -222,20 +208,20 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/$lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
 fi
+
+echo "$0 Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index 82304351533..a8d7f6c6091 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -1,11 +1,23 @@
 #!/bin/bash
 
 # chainali_1a is as 1a except it uses chain alignments (using 1a system) instead of gmm alignments
-
+# local/chain/compare_wer.sh exp/chain/cnn_chainali_1a
+# System                      cnn_chainali_1a (dict_50k)        cnn_chainali_1a(dict_50k + unk_model)
+# WER                             15.93                             14.09
+# CER                              7.79                              6.70
+# WER val                         15.10                             12.63
+# CER val                          6.72                              5.36
+# Final train prob              -0.0220
+# Final valid prob              -0.0157
+# Final train prob (xent)       -0.4238
+# Final valid prob (xent)       -0.6119
+# Parameters                      4.36M
+
+# steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1a
+# exp/chain/cnn_chainali_1a: num-iters=42 nj=2..4 num-params=4.4M dim=40->368 combine=-0.020->-0.020 (over 2) xent:train/valid[27,41,final]=(-0.534,-0.425,-0.424/-0.659,-0.612,-0.612) logprob:train/valid[27,41,final]=(-0.026,-0.022,-0.022/-0.017,-0.016,-0.016)
 set -e -o pipefail
 
 stage=0
-
 nj=30
 train_set=train
 decode_val=true
@@ -21,29 +33,18 @@ reporting_email=
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-alignment_subsampling_factor=1
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_test=lang_unk
+lang_decode=lang_unk
 if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -100,7 +101,7 @@ if [ $stage -le 2 ]; then
   steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
                             --acoustic-scale 1.0 \
                             --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $chain_model_dir $lat_dir
+                            $train_data_dir data/lang $chain_model_dir $lat_dir
   cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
 fi
 
@@ -114,17 +115,15 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
-
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
-
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
   common1="height-offsets=-2,-1,0,1,2 num-filters-out=36"
@@ -133,7 +132,6 @@ if [ $stage -le 4 ]; then
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
-
   conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
@@ -162,7 +160,6 @@ EOF
   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
 fi
 
-
 if [ $stage -le 5 ]; then
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
     utils/create_split_dir.pl \
@@ -177,9 +174,9 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=$alignment_subsampling_factor \
-    --trainer.srand=$srand \
+    --chain.frame-subsampling-factor=4 \
+    --chain.alignment-subsampling-factor=1 \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=1000000 \
@@ -189,15 +186,10 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=false \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -213,9 +205,8 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/$lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -229,5 +220,5 @@ if [ $stage -le 7 ]; then
   done
 fi
 
-echo "Done. Date: $(date). Results:"
+echo "$0 Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
index c7446d7283e..f5dbb93e7b7 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh
@@ -1,25 +1,23 @@
 #!/bin/bash
 
 # chainali_1b is as chainali_1a except it has 3 more cnn layers and 1 less tdnn layer.
-
-# local/chain/compare_wer.sh exp/chain/cnn_1a/ exp/chain/cnn_chainali_1b/
-# System                         cnn_1a cnn_chainali_1b
-# WER                             18.52     14.38
-# CER                             10.07      7.14
-# Final train prob              -0.0077   -0.0113
-# Final valid prob              -0.0970   -0.0400
-# Final train prob (xent)       -0.5484   -0.6043
-# Final valid prob (xent)       -0.9643   -0.9030
-# Parameters                      4.36M     3.96M
+# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b
+# System                      cnn_chainali_1b(dict_50k)   cnn_chainali_1b(dict_50k + unk_model)
+# WER                             15.09                       13.29
+# CER                              7.13                        6.08
+# WER val                         14.80                       11.98
+# CER val                          6.16                        4.87
+# Final train prob              -0.0225
+# Final valid prob              -0.0132
+# Final train prob (xent)       -0.4466
+# Final valid prob (xent)       -0.6048
+# Parameters                      3.96M
 
 # steps/info/chain_dir_info.pl exp/chain/chainali_cnn_1b/
-# exp/chain/chainali_cnn_1b/: num-iters=21 nj=2..4 num-params=4.0M dim=40->364 combine=-0.009->-0.005 xent:train/valid[13,20,final]=(-1.47,-0.728,-0.623/-1.69,-1.02,-0.940) logprob:train/valid[13,20,final]=(-0.068,-0.030,-0.011/-0.086,-0.056,-0.038)
-
+# exp/chain/cnn_chainali_1b: num-iters=42 nj=2..4 num-params=4.0M dim=40->368 combine=-0.019->-0.019 (over 2) xent:train/valid[27,41,final]=(-0.545,-0.448,-0.447/-0.645,-0.605,-0.605) logprob:train/valid[27,41,final]=(-0.026,-0.023,-0.023/-0.014,-0.013,-0.013)
 
 set -e -o pipefail
-
 stage=0
-
 nj=30
 train_set=train
 decode_val=true
@@ -32,32 +30,20 @@ chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
 common_egs_dir=
 reporting_email=
 
-# chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-alignment_subsampling_factor=1
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_test=lang_unk
+lang_decode=lang_unk
 if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -84,7 +70,6 @@ for f in $train_data_dir/feats.scp \
   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 done
 
-
 if [ $stage -le 1 ]; then
   echo "$0: creating lang directory $lang with chain-type topology"
   # Create a version of the lang/ directory that has one state per phone in the
@@ -114,7 +99,7 @@ if [ $stage -le 2 ]; then
   steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
                             --acoustic-scale 1.0 \
                             --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $chain_model_dir $lat_dir
+                            $train_data_dir data/lang $chain_model_dir $lat_dir
   cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
 fi
 
@@ -128,9 +113,9 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
@@ -138,7 +123,6 @@ fi
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
-
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
   common1="required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
@@ -147,7 +131,6 @@ if [ $stage -le 4 ]; then
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
-
   conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
@@ -162,7 +145,6 @@ if [ $stage -le 4 ]; then
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
-
   # adding the layers for xent branch
   # This block prints the configs for a separate output that will be
   # trained with a cross-entropy objective in the 'chain' mod?els... this
@@ -193,9 +175,9 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
-    --chain.alignment-subsampling-factor=$alignment_subsampling_factor \
-    --trainer.srand=$srand \
+    --chain.frame-subsampling-factor=4 \
+    --chain.alignment-subsampling-factor=1 \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=1000000 \
@@ -205,15 +187,10 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=false \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -229,9 +206,8 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/$lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -245,5 +221,5 @@ if [ $stage -le 7 ]; then
   done
 fi
 
-echo "Done. Date: $(date). Results:"
+echo "$0 Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
index cff2dd7862f..1dd83c5078f 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh
@@ -1,23 +1,22 @@
 #!/bin/bash
 
 # chainali_1c is as chainali_1b except it uses l2-regularize
-# local/chain/compare_wer.sh exp/chain/cnn_chainali_1b exp/chain/cnn_chainali_1c
-# System                      cnn_chainali_1b cnn_chainali_1c
-# WER                             14.38     12.72
-# CER                              7.14      5.99
-# Final train prob              -0.0113   -0.0291
-# Final valid prob              -0.0400   -0.0359
-# Final train prob (xent)       -0.6043   -0.9781
-# Final valid prob (xent)       -0.9030   -1.1544
-# Parameters                      3.96M     3.96M
+# local/chain/compare_wer.sh exp/chain/cnn_chainali_1c
+# System                      cnn_chainali_1c (dict_50k)        cnn_chainali_1c(dict_50k + unk_model)
+# WER                             12.95                             11.07
+# CER                              6.04                              4.91
+# WER val                         12.75                              9.78
+# CER val                          5.15                              3.74
+# Final train prob              -0.0217
+# Final valid prob              -0.0060
+# Final train prob (xent)       -0.8303
+# Final valid prob (xent)       -0.8665
+# Parameters                      3.96M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1c
-# exp/chain/cnn_chainali_1c: num-iters=21 nj=2..4 num-params=4.0M dim=40->369 combine=-0.007->-0.007 (over 1) xent:train/valid[13,20,final]=(-1.44,-1.05,-0.997/-1.53,-1.19,-1.15) logprob:train/valid[13,20,final]=(-0.056,-0.020,-0.012/-0.056,-0.025,-0.020)
-
+# exp/chain/cnn_chainali_1c/: num-iters=42 nj=2..4 num-params=4.0M dim=40->368 combine=-0.018->-0.018 (over 1) xent:train/valid[27,41,final]=(-1.22,-0.847,-0.830/-1.19,-0.880,-0.867) logprob:train/valid[27,41,final]=(-0.045,-0.025,-0.022/-0.026,-0.010,-0.006)
 set -e -o pipefail
-
 stage=0
-
 nj=30
 train_set=train
 decode_val=true
@@ -30,31 +29,20 @@ chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
 common_egs_dir=
 reporting_email=
 
-# chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_test=lang_unk
+lang_decode=lang_unk
 if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -110,7 +98,7 @@ if [ $stage -le 2 ]; then
   steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
                             --acoustic-scale 1.0 \
                             --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $chain_model_dir $lat_dir
+                            $train_data_dir data/lang $chain_model_dir $lat_dir
   cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
 fi
 
@@ -124,17 +112,15 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
-
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
-
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
   cnn_opts="l2-regularize=0.075"
@@ -192,11 +178,11 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=1 \
     --chain.left-tolerance 3 \
     --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=1000000 \
@@ -208,13 +194,9 @@ if [ $stage -le 5 ]; then
     --trainer.num-chunk-per-minibatch=64,32 \
     --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=false \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -230,9 +212,8 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/$lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
@@ -246,5 +227,5 @@ if [ $stage -le 7 ]; then
   done
 fi
 
-echo "Done. Date: $(date). Results:"
+echo "$0 Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
index 19de3af7f1d..3979b3d2da0 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh
@@ -1,60 +1,53 @@
 #!/bin/bash
 
 # chainali_1d is as chainali_1c except it uses unconstrained egs
-
-# local/chain/compare_wer.sh /home/hhadian/kaldi-rnnlm/egs/iam/v1/exp/chain/cnn_chainali_1c exp/chain/cnn_chainali_1d
-# System                      cnn_chainali_1c cnn_chainali_1d
-# WER                             13.14     12.33
-# CER                              6.40      5.72
-# Final train prob              -0.0260   -0.0037
-# Final valid prob              -0.0451   -0.0132
-# Final train prob (xent)       -0.9993   -0.8647
-# Final valid prob (xent)       -1.1549   -1.0101
-# Parameters                      3.97M     3.97M
+# local/chain/compare_wer.sh exp/chain/cnn_chainali_1d
+# System                      cnn_chainali_1d (dict_50k)        cnn_chainali_1d(dict_50k + unk_model)
+# WER                             12.95                             11.07
+# CER                              6.04                              4.91
+# WER val                         12.75                              9.78
+# CER val                          5.15                              3.74
+# Final train prob              -0.0217
+# Final valid prob              -0.0060
+# Final train prob (xent)       -0.8303
+# Final valid prob (xent)       -0.8665
+# Parameters                      3.96M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1d
-# exp/chain/cnn_chainali_1d: num-iters=21 nj=2..4 num-params=4.0M dim=40->376 combine=-0.002->-0.002 (over 1) xent:train/valid[13,20,final]=(-1.66,-1.01,-0.865/-1.72,-1.12,-1.01) logprob:train/valid[13,20,final]=(-0.058,-0.019,-0.004/-0.055,-0.027,-0.013)
-
+# exp/chain/cnn_chainali_1d/: num-iters=42 nj=2..4 num-params=4.0M dim=40->368 combine=-0.018->-0.018 (over 1) xent:train/valid[27,41,final]=(-1.22,-0.847,-0.830/-1.19,-0.880,-0.867) logprob:train/valid[27,41,final]=(-0.045,-0.025,-0.022/-0.026,-0.010,-0.006)
 
 set -e -o pipefail
 
 stage=0
-
 nj=30
 train_set=train
 gmm=tri3        # this is the source gmm-dir that we'll use for alignments; it
                 # should have alignments for the specified training data.
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
-affix=_1c_uc  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
+affix=_1d  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 ali=tri3_ali
-chain_model_dir=exp/chain${nnet3_affix}/cnn_1a_uc
+chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
 common_egs_dir=
 reporting_email=
 
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
 # training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
-remove_egs=false
-lang_test=lang_unk
+lang_decode=lang_unk
+decode_val=true
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
+
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -80,7 +73,6 @@ for f in $train_data_dir/feats.scp \
   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 done
 
-
 if [ $stage -le 1 ]; then
   echo "$0: creating lang directory $lang with chain-type topology"
   # Create a version of the lang/ directory that has one state per phone in the
@@ -110,7 +102,7 @@ if [ $stage -le 2 ]; then
   steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
                             --acoustic-scale 1.0 \
                             --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $chain_model_dir $lat_dir
+                            $train_data_dir data/lang $chain_model_dir $lat_dir
   cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
 fi
 
@@ -124,13 +116,12 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
-
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
@@ -146,7 +137,6 @@ if [ $stage -le 4 ]; then
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
-
   conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
@@ -157,7 +147,6 @@ if [ $stage -le 4 ]; then
   relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
   relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
   relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
-
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
@@ -192,11 +181,11 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=1 \
     --chain.left-tolerance 3 \
     --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=1000000 \
@@ -206,15 +195,10 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=false \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -230,20 +214,20 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/$lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
 fi
+
+echo "$0 Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
index ba28f681708..f95f6a90ca1 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh
@@ -1,27 +1,26 @@
 #!/bin/bash
 
-# e2eali_1a is the same as chainali_1c but uses the e2e chain model to get the
-# lattice alignments and to build a tree
-
-# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a exp/chain/cnn_chainali_1c exp/chain/cnn_e2eali_1a
-# System                      e2e_cnn_1a cnn_chainali_1c cnn_e2eali_1a
-# WER                             13.87     12.72     12.70
-# CER                              6.54      5.99      5.75
-# Final train prob              -0.0371   -0.0291   -0.0557
-# Final valid prob              -0.0636   -0.0359   -0.0770
-# Final train prob (xent)                 -0.9781   -0.8847
-# Final valid prob (xent)                 -1.1544   -1.0370
-# Parameters                      9.13M     3.96M     3.95M
+# local/chain/compare_wer.sh exp/chain/cnn_e2eali_1a
+# System                      cnn_e2eali_1a_(dict_50k) cnn_e2eali_1a_(dict_50k + unk model)
+# WER                             13.30                    11.94
+# CER                              5.95                     5.15
+# WER val                         12.85                    10.71
+# CER val                          5.09                     4.03
+# Final train prob              -0.0562
+# Final valid prob              -0.0634
+# Final train prob (xent)       -0.8196
+# Final valid prob (xent)       -0.8816
+# Parameters                      3.96M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1a
-# exp/chain/cnn_e2eali_1a: num-iters=21 nj=2..4 num-params=4.0M dim=40->360 combine=-0.056->-0.056 (over 1) xent:train/valid[13,20,final]=(-1.47,-0.978,-0.918/-1.54,-1.10,-1.06) logprob:train/valid[13,20,final]=(-0.106,-0.065,-0.056/-0.113,-0.086,-0.079)
+# exp/chain/cnn_e2eali_1a: num-iters=42 nj=2..4 num-params=4.0M dim=40->368 combine=-0.058->-0.058 (over 1) xent:train/valid[27,41,final]=(-2.67,-0.841,-0.820/-2.71,-0.892,-0.882) logprob:train/valid[27,41,final]=(-0.240,-0.060,-0.056/-0.245,-0.068,-0.063)
 
 set -e -o pipefail
 
 stage=0
-
 nj=30
 train_set=train
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1a  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
@@ -32,26 +31,19 @@ reporting_email=
 train_stage=-10
 xent_regularize=0.1
 frame_subsampling_factor=4
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
 remove_egs=true
-lang_test=lang_unk
+lang_decode=lang_unk
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -74,7 +66,6 @@ for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 done
 
-
 if [ $stage -le 1 ]; then
   echo "$0: creating lang directory $lang with chain-type topology"
   # Create a version of the lang/ directory that has one state per phone in the
@@ -119,18 +110,16 @@ if [ $stage -le 3 ]; then
   fi
 
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --alignment-subsampling-factor 1 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
-
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
-
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
   cnn_opts="l2-regularize=0.075"
@@ -188,11 +177,11 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=1 \
     --chain.left-tolerance 3 \
     --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=1000000 \
@@ -202,12 +191,7 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
     --cleanup.remove-egs=$remove_egs \
@@ -226,20 +210,20 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/$lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
 fi
+
+echo "$0 Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
index dc830319f69..81700ce2180 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh
@@ -2,23 +2,25 @@
 
 # e2eali_1b is the same as e2eali_1a but uses unconstrained egs
 # local/chain/compare_wer.sh exp/chain/cnn_e2eali_1b
-# System                      cnn_e2eali_1b (dict_50k) cnn_e2eali_1b (dict_500k)
-# WER                             11.41                   10.25
-# CER                              4.87                    4.60
-# Final train prob              -0.0384                 -0.0384
-# Final valid prob              -0.0444                 -0.0444
-# Final train prob (xent)       -0.8084                 -0.8084
-# Final valid prob (xent)       -0.8470                 -0.8470
-# Parameters                      3.97M                   3.97M
+# System                      cnn_e2eali_1b (dict_50k) cnn_e2eali_1b (dict_50k + unk model)
+# WER                             12.46                    11.20
+# CER                              5.53                     4.76
+# WER val                         12.71                    10.49
+# CER val                          4.97                     3.92
+# Final train prob              -0.0381
+# Final valid prob              -0.0443
+# Final train prob (xent)       -0.7860
+# Final valid prob (xent)       -0.8290
+# Parameters                      3.96M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1b
-# exp/chain/cnn_e2eali_1b: num-iters=42 nj=2..4 num-params=4.0M dim=40->376 combine=-0.039->-0.039 (over 1) xent:train/valid[27,41,final]=(-1.28,-0.846,-0.808/-1.27,-0.871,-0.847) logprob:train/valid[27,41,final]=(-0.064,-0.043,-0.038/-0.065,-0.051,-0.044)
+# exp/chain/cnn_e2eali_1b: num-iters=42 nj=2..4 num-params=4.0M dim=40->368 combine=-0.039->-0.039 (over 2) xent:train/valid[27,41,final]=(-1.19,-0.805,-0.786/-1.19,-0.846,-0.829) logprob:train/valid[27,41,final]=(-0.060,-0.041,-0.038/-0.062,-0.048,-0.044)
 
 set -e -o pipefail
 stage=0
-
 nj=30
 train_set=train
+decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1b  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
 e2echain_model_dir=exp/chain/e2e_cnn_1a
@@ -28,27 +30,17 @@ reporting_email=
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=450
-# training options
-srand=0
-remove_egs=true
-lang_test=lang_unk
+lang_decode=lang_unk
+if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
-
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -71,7 +63,6 @@ for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 done
 
-
 if [ $stage -le 1 ]; then
   echo "$0: creating lang directory $lang with chain-type topology"
   # Create a version of the lang/ directory that has one state per phone in the
@@ -101,7 +92,7 @@ if [ $stage -le 2 ]; then
   steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
                             --acoustic-scale 1.0 \
                             --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+                            $train_data_dir data/lang $e2echain_model_dir $lat_dir
   echo "" >$lat_dir/splice_opts
 fi
 
@@ -114,20 +105,17 @@ if [ $stage -le 3 ]; then
     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
     exit 1;
   fi
-
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --alignment-subsampling-factor 1 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
-
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
-
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
   cnn_opts="l2-regularize=0.075"
@@ -167,7 +155,6 @@ EOF
   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
 fi
 
-
 if [ $stage -le 5 ]; then
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
     utils/create_split_dir.pl \
@@ -182,11 +169,11 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=false \
     --chain.lm-opts="--num-extra-lm-states=500" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=1 \
     --chain.left-tolerance 3 \
     --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=1000000 \
@@ -196,15 +183,10 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=true \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -220,20 +202,20 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
-    --self-loop-scale 1.0 data/$lang_test \
+    --self-loop-scale 1.0 data/$lang_decode \
     $dir $dir/graph || exit 1;
 fi
 
 if [ $stage -le 7 ]; then
   frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
-  steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
-    --extra-left-context $chunk_left_context \
-    --extra-right-context $chunk_right_context \
-    --extra-left-context-initial 0 \
-    --extra-right-context-final 0 \
-    --frames-per-chunk $frames_per_chunk \
-    --nj $nj --cmd "$cmd" \
-    $dir/graph data/test $dir/decode_test || exit 1;
+  for decode_set in test $maybe_val; do
+    steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
+      --frames-per-chunk $frames_per_chunk \
+      --nj $nj --cmd "$cmd" \
+      $dir/graph data/$decode_set $dir/decode_$decode_set || exit 1;
+  done
 fi
+
+echo "$0 Done. Date: $(date). Results:"
+local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
index cf90193f7eb..047d673db17 100755
--- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
+++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh
@@ -2,60 +2,47 @@
 
 # e2eali_1c is the same as e2eali_1b but has more CNN layers, different filter size
 # smaller lm-opts, minibatch, frams-per-iter, less epochs and more initial/finaljobs.
-
 # local/chain/compare_wer.sh exp/chain/cnn_e2eali_1c
-# System                      cnn_e2eali_1c (dict_50k) cnn_e2eali_1c (dict_500k)
-# WER                             12.20                    9.62
-# CER                              5.29                    4.33
-# Final train prob              -0.0494                 -0.0494
-# Final valid prob              -0.0644                 -0.0644
-# Final train prob (xent)       -0.4852                 -0.4852
-# Final valid prob (xent)       -0.5437                 -0.5437
-# Parameters                      4.33M                   4.33M
+# System                      cnn_e2eali_1c (dict_50k)        cnn_e2eali_1c(dict_50k + unk_model)
+# WER                             12.10                           9.90
+# CER                              5.23                           4.16
+# WER val                         12.15                           9.60
+# CER val                          4.78                           3.56
+# Final train prob              -0.0470
+# Final valid prob              -0.0657
+# Final train prob (xent)       -0.4713
+# Final valid prob (xent)       -0.5437
+# Parameters                      4.32M
 
 # steps/info/chain_dir_info.pl exp/chain/cnn_e2eali_1c
-# exp/chain/cnn_e2eali_1c: num-iters=30 nj=3..5 num-params=4.3M dim=40->376 combine=-0.052->-0.052 (over 1) xent:train/valid[19,29,final]=(-0.715,-0.508,-0.485/-0.717,-0.562,-0.544) logprob:train/valid[19,29,final]=(-0.089,-0.054,-0.049/-0.100,-0.070,-0.064)
-
-
+# exp/chain/cnn_e2eali_1c: num-iters=30 nj=3..5 num-params=4.3M dim=40->368 combine=-0.051->-0.051 (over 1) xent:train/valid[19,29,final]=(-0.722,-0.500,-0.471/-0.748,-0.568,-0.544) logprob:train/valid[19,29,final]=(-0.090,-0.053,-0.047/-0.106,-0.071,-0.066)
 set -e -o pipefail
 
 stage=0
-
 nj=30
 train_set=train
 decode_val=true
 nnet3_affix=    # affix for exp dirs, e.g. it was _cleaned in tedlium.
 affix=_1c  #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
-e2echain_model_dir=exp/chain/e2e_cnn_1b
+e2echain_model_dir=exp/chain/e2e_cnn_1a
 common_egs_dir=
 reporting_email=
 
 # chain options
 train_stage=-10
 xent_regularize=0.1
-frame_subsampling_factor=4
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
-chunk_left_context=0
-chunk_right_context=0
 tdnn_dim=550
-# training options
-srand=0
-remove_egs=true
-lang_decode=data/lang_test
+lang_decode=data/lang_unk
 if $decode_val; then maybe_val=val; else maybe_val= ; fi
 dropout_schedule='0,0@0.20,0.2@0.50,0'
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
-
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -78,7 +65,6 @@ for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 done
 
-
 if [ $stage -le 1 ]; then
   echo "$0: creating lang directory $lang with chain-type topology"
   # Create a version of the lang/ directory that has one state per phone in the
@@ -108,7 +94,7 @@ if [ $stage -le 2 ]; then
   steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
                             --acoustic-scale 1.0 \
                             --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
-                            ${train_data_dir} data/lang $e2echain_model_dir $lat_dir
+                            $train_data_dir data/lang $e2echain_model_dir $lat_dir
   echo "" >$lat_dir/splice_opts
 fi
 
@@ -121,20 +107,17 @@ if [ $stage -le 3 ]; then
     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
     exit 1;
   fi
-
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --alignment-subsampling-factor 1 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
-
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
-
   num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
   cnn_opts="l2-regularize=0.03 dropout-proportion=0.0"
@@ -146,7 +129,6 @@ if [ $stage -le 4 ]; then
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
-
   conv-relu-batchnorm-dropout-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-dropout-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-dropout-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
@@ -160,7 +142,6 @@ if [ $stage -le 4 ]; then
   ## adding the layers for chain branch
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
-
   # adding the layers for xent branch
   # This block prints the configs for a separate output that will be
   # trained with a cross-entropy objective in the 'chain' mod?els... this
@@ -176,7 +157,6 @@ EOF
   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
 fi
 
-
 if [ $stage -le 5 ]; then
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
     utils/create_split_dir.pl \
@@ -191,11 +171,11 @@ if [ $stage -le 5 ]; then
     --chain.l2-regularize=0.00005 \
     --chain.apply-deriv-weights=true \
     --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
-    --chain.frame-subsampling-factor=$frame_subsampling_factor \
+    --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=1 \
     --chain.left-tolerance 3 \
     --chain.right-tolerance 3 \
-    --trainer.srand=$srand \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=5 \
     --trainer.frames-per-iter=1500000 \
@@ -206,15 +186,10 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=32,16 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
-    --egs.chunk-left-context=$chunk_left_context \
-    --egs.chunk-right-context=$chunk_right_context \
-    --egs.chunk-left-context-initial=0 \
-    --egs.chunk-right-context-final=0 \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0 --constrained false" \
-    --cleanup.remove-egs=$remove_egs \
+    --cleanup.remove-egs=true \
     --use-gpu=true \
     --reporting.email="$reporting_email" \
     --feat-dir=$train_data_dir \
@@ -230,7 +205,6 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
     --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
@@ -246,6 +220,5 @@ if [ $stage -le 7 ]; then
   done
 fi
 
-
-echo "Done. Date: $(date). Results:"
+echo "$0 Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
index cc27c8e55c4..462ad0522de 100755
--- a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
+++ b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh
@@ -2,22 +2,22 @@
 # Copyright    2017  Hossein Hadian
 
 # This script does end2end chain training (i.e. from scratch)
-# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a
-# System                      e2e_cnn_1a
-# WER                             15.24
-# CER                              7.27
-# Final train prob              -0.0209
-# Final valid prob              -0.0417
+# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
+# System                      e2e_cnn_1a (dict_50k)  e2e_cnn_1a (dict_50k + unk_model)
+# WER                             15.21                  14.41
+# CER                              7.43                   6.82
+# WER val                         14.84                  13.51
+# CER val                          6.41                   5.60
+# Final train prob              -0.0206
+# Final valid prob              -0.0393
 # Final train prob (xent)
 # Final valid prob (xent)
 # Parameters                      9.52M
 
 # steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
-# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.5M dim=40->12640 combine=-0.021->-0.021 (over 1) logprob:train/valid[27,41,final]=(-0.025,-0.021,-0.021/-0.044,-0.043,-0.042)
+# exp/chain/e2e_cnn_1a: num-iters=42 nj=2..4 num-params=9.5M dim=40->12640 combine=-0.020->-0.020 (over 1) logprob:train/valid[27,41,final]=(-0.025,-0.021,-0.021/-0.044,-0.040,-0.039)
 
 set -e
-
-# configs for 'chain'
 stage=0
 train_stage=-10
 get_egs_stage=-10
@@ -30,7 +30,7 @@ minibatch_size=150=100,64/300=50,32/600=25,16/1200=16,8
 common_egs_dir=
 train_set=train
 decode_val=true
-lang_decode=data/lang_test
+lang_decode=data/lang_unk
 if $decode_val; then maybe_val=val; else maybe_val= ; fi
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -85,7 +85,6 @@ if [ $stage -le 2 ]; then
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
   input dim=40 name=input
-
   conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
   conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
   conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
@@ -98,14 +97,12 @@ if [ $stage -le 2 ]; then
   relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
 EOF
-
   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
 fi
 
 if [ $stage -le 3 ]; then
   # no need to store the egs in a shared storage because we always
   # remove them. Anyway, it takes only 5 minutes to generate them.
-
   steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
     --cmd "$cmd" \
     --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
@@ -128,7 +125,7 @@ if [ $stage -le 3 ]; then
     --trainer.optimization.shrink-value 1.0 \
     --trainer.max-param-change 2.0 \
     --cleanup.remove-egs true \
-    --feat-dir data/${train_set} \
+    --feat-dir data/$train_set \
     --tree-dir $treedir \
     --dir $dir  || exit 1;
 fi
@@ -140,7 +137,6 @@ if [ $stage -le 4 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
     --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
@@ -154,5 +150,5 @@ if [ $stage -le 5 ]; then
   done
 fi
 
-echo "Done. Date: $(date). Results:"
+echo "$0 Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/iam/v1/local/gen_topo.py b/egs/iam/v1/local/gen_topo.py
index 540bfbcf270..6fae276d542 100755
--- a/egs/iam/v1/local/gen_topo.py
+++ b/egs/iam/v1/local/gen_topo.py
@@ -36,7 +36,7 @@
 with open(args.phone_list) as f:
     for line in f:
         line = line.strip()
-        phone = line.split(' ')[0]
+        phone = line.split('_')[0]
         if len(phone) == 1 and phone in exclude:
             punctuation_phones.append(int(line.split(' ')[1]))
 # For nonsilence phones that are not punctuations
diff --git a/egs/iam/v1/local/train_lm.sh b/egs/iam/v1/local/train_lm.sh
index a15fbea2af3..911f54c5439 100755
--- a/egs/iam/v1/local/train_lm.sh
+++ b/egs/iam/v1/local/train_lm.sh
@@ -58,9 +58,12 @@ if [ $stage -le 0 ]; then
   rm ${dir}/data/text/* 2>/dev/null || true
 
   # Using LOB and brown corpus.
-  cat data/local/lobcorpus/0167/download/LOB_COCOA/lob.txt | \
-    local/remove_test_utterances_from_lob.py data/test/text data/val/text \
-                                             > ${dir}/data/text/lob.txt
+  if [ ! -f data/local/lob-train-only.txt ]; then
+    cat data/local/lobcorpus/0167/download/LOB_COCOA/lob.txt | \
+      local/remove_test_utterances_from_lob.py data/test/text.old data/val/text.old \
+                                               > data/local/lob-train-only.txt
+  fi
+  cat data/local/lob-train-only.txt > ${dir}/data/text/lob.txt
   cat data/local/browncorpus/brown.txt > ${dir}/data/text/brown.txt
   if [ -d "data/local/wellingtoncorpus" ]; then
     cat data/local/wellingtoncorpus/Wellington_annotation_removed.txt > ${dir}/data/text/wellington.txt
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
index eb140e900e1..d449805be1d 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh
@@ -24,22 +24,17 @@ xent_regularize=0.1
 # training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
-# we don't need extra left/right context for TDNN systems.
 tdnn_dim=450
-# training options
-srand=0
 remove_egs=false
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -108,9 +103,9 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
@@ -169,7 +164,7 @@ if [ $stage -le 5 ]; then
     --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
     --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=4 \
-    --trainer.srand=$srand \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=2000000 \
@@ -179,7 +174,6 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
@@ -199,7 +193,6 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
     --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
@@ -216,5 +209,5 @@ if [ $stage -le 7 ]; then
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
 
-echo "Done. Date: $(date). Results:"
+echo "$0 Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
index 5b3597a3915..23c4d5c2036 100755
--- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
+++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh
@@ -18,24 +18,19 @@ lats_affix=
 # chain options
 train_stage=-10
 xent_regularize=0.1
-# training chunk-options
 chunk_width=340,300,200,100
 num_leaves=500
 tdnn_dim=450
-# training options
-srand=0
 remove_egs=false
 lang_decode=data/lang
 lang_rescore=data/lang_rescore_6g
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
-
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
 
-
 if ! cuda-compiled; then
   cat <<EOF && exit 1
 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
@@ -106,13 +101,12 @@ if [ $stage -le 3 ]; then
      exit 1;
   fi
   steps/nnet3/chain/build_tree.sh \
-    --frame-subsampling-factor $frame_subsampling_factor \
+    --frame-subsampling-factor 4 \
     --context-opts "--context-width=2 --central-position=1" \
-    --cmd "$cmd" $num_leaves ${train_data_dir} \
+    --cmd "$cmd" $num_leaves $train_data_dir \
     $lang $ali_dir $tree_dir
 fi
 
-
 if [ $stage -le 4 ]; then
   mkdir -p $dir
   echo "$0: creating neural net configs using the xconfig parser";
@@ -170,7 +164,7 @@ if [ $stage -le 5 ]; then
     --chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1000" \
     --chain.frame-subsampling-factor=4 \
     --chain.alignment-subsampling-factor=1 \
-    --trainer.srand=$srand \
+    --trainer.srand=0 \
     --trainer.max-param-change=2.0 \
     --trainer.num-epochs=4 \
     --trainer.frames-per-iter=2000000 \
@@ -180,7 +174,6 @@ if [ $stage -le 5 ]; then
     --trainer.optimization.final-effective-lrate=0.0001 \
     --trainer.optimization.shrink-value=1.0 \
     --trainer.num-chunk-per-minibatch=64,32 \
-    --trainer.optimization.momentum=0.0 \
     --egs.chunk-width=$chunk_width \
     --egs.dir="$common_egs_dir" \
     --egs.opts="--frames-overlap-per-eg 0" \
@@ -200,7 +193,6 @@ if [ $stage -le 6 ]; then
   # topology file from the model).  So you could give it a different
   # lang directory, one that contained a wordlist and LM of your choice,
   # as long as phones.txt was compatible.
-
   utils/mkgraph.sh \
     --self-loop-scale 1.0 $lang_decode \
     $dir $dir/graph || exit 1;
@@ -217,5 +209,5 @@ if [ $stage -le 7 ]; then
                                 data/test $dir/decode_test{,_rescored} || exit 1
 fi
 
-echo "Done. Date: $(date). Results:"
+echo "$0 Done. Date: $(date). Results:"
 local/chain/compare_wer.sh $dir
diff --git a/egs/madcat_ar/v1/local/process_data.py b/egs/madcat_ar/v1/local/process_data.py
index 71f7f39d632..a39bcfa87d3 100755
--- a/egs/madcat_ar/v1/local/process_data.py
+++ b/egs/madcat_ar/v1/local/process_data.py
@@ -210,7 +210,7 @@ def get_line_image_location():
                     image_file_path = os.path.join(location, updated_base_name)
                     line = text_line_word_dict[line_id]
                     text = ' '.join(line)
-                    utt_id = "{}_{}_{}_{}".format(writer_id, str(image_num).zfill(6), base_line_image_file_name, str(line_id).zfill(4))
+                    utt_id = "{}_{}_{}_{}".format(writer_id, str(image_num).zfill(6), base_name, str(line_id).zfill(4))
                     text_fh.write(utt_id + ' ' + text + '\n')
                     utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
                     image_fh.write(utt_id + ' ' + image_file_path + '\n')
diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh
index d3937582662..01bfdbed543 100755
--- a/egs/madcat_ar/v1/run.sh
+++ b/egs/madcat_ar/v1/run.sh
@@ -19,15 +19,17 @@ writing_condition1=/export/corpora/LDC/LDC2012T15/docs/writing_conditions.tab
 writing_condition2=/export/corpora/LDC/LDC2013T09/docs/writing_conditions.tab
 writing_condition3=/export/corpora/LDC/LDC2013T15/docs/writing_conditions.tab
 data_splits_dir=data/download/data_splits
+images_scp_dir=data/local
 overwrite=false
+subset=false
+augment=false
+use_extra_corpus_text=true
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . ./path.sh
 . ./utils/parse_options.sh  # e.g. this parses the above options
                             # if supplied.
-
 ./local/check_tools.sh
-
 mkdir -p data/{train,test,dev}/data
 mkdir -p data/local/{train,test,dev}
 
@@ -37,10 +39,9 @@ if [ $stage -le 0 ]; then
     echo "Exiting with status 1 to avoid data corruption"
     exit 1;
   fi
-
-  echo "$0: Downloading data splits...$(date)"
-  local/download_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
-                         --download_dir2 $download_dir2 --download_dir3 $download_dir3
+  local/prepare_data.sh --data_splits $data_splits_dir --download_dir1 $download_dir1 \
+                         --download_dir2 $download_dir2 --download_dir3 $download_dir3 \
+                         --use_extra_corpus_text $use_extra_corpus_text
 
   for set in test train dev; do
     data_split_file=$data_splits_dir/madcat.$set.raw.lineid
@@ -51,11 +52,11 @@ if [ $stage -le 0 ]; then
         --data data/local/$set --subset $subset --augment $augment || exit 1
   done
 
-  echo "$0: Preparing data..."
+  echo "$0: Processing data..."
   for set in dev train test; do
     local/process_data.py $download_dir1 $download_dir2 $download_dir3 \
       $data_splits_dir/madcat.$set.raw.lineid data/$set $images_scp_dir/$set/images.scp \
-      $writing_condition1 $writing_condition2 $writing_condition3 --augment $augment --subset $subset 
+      $writing_condition1 $writing_condition2 $writing_condition3 --augment $augment --subset $subset
     image/fix_data_dir.sh data/${set}
   done
 fi