From 817ce77d40c976bfa92cc8af962b75d954c0396c Mon Sep 17 00:00:00 2001
From: Ondrej Platek <ondrej.platek@seznam.cz>
Date: Tue, 18 Jun 2013 10:56:26 +0000
Subject: [PATCH] Merge branch 'master' into sandbox-oplatek

Conflicts:
	.gitignore
	INSTALL
	README.txt
	egs/babel/s5/local/generate_proxy_keywords.sh
	egs/wsj/s5/steps/train_nnet_cpu.sh
	egs/wsj/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl
	src/Makefile
	src/configure
	src/lat/Makefile
	src/makefiles/cygwin.mk
	src/makefiles/darwin_10_5.mk
	src/makefiles/darwin_10_6.mk
	src/makefiles/darwin_10_7.mk
	src/makefiles/darwin_10_8.mk
	src/makefiles/linux_atlas.mk
	src/makefiles/linux_atlas_64bit.mk
	src/makefiles/linux_clapack.mk
	src/makefiles/linux_openblas.mk
	src/nnet-cpu/mixup-nnet.cc
	src/nnet-cpu/nnet-component-test.cc
	src/nnet-cpu/nnet-component.cc
	src/nnet-cpu/nnet-component.h
	src/nnet-cpu/nnet-nnet.cc
	src/nnet-cpu/nnet-nnet.h
	src/nnet-cpu/nnet-update-parallel.cc
	src/nnet-cpu/nnet-update-parallel.h
	src/nnet-cpubin/nnet-train-parallel.cc
	src/nnet/nnet-pdf-prior.h
	src/nnetbin/nnet-forward.cc
	tools/Makefile
	tools/extras/install_portaudio.sh

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/oplatek@2520 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
---
 INSTALL                                       |   2 +-
 INSTALL.md                                    | 168 ++++++
 README.md                                     |  25 +
 README.txt                                    |   3 +-
 egs/babel/s5/local/annotatedKwlist2KWs.pl     | 124 +++++
 egs/babel/s5/local/buildEditDistanceFst.pl    | 127 +++++
 egs/babel/s5/local/count2logprob.pl           |  94 ++++
 egs/babel/s5/local/subsetATWV.pl              | 120 +++++
 egs/kaldi-vystadial-recipe/.gitingore         |   2 +
 egs/kaldi-vystadial-recipe/README.md          |  54 ++
 egs/kaldi-vystadial-recipe/s5/.gitignore      |   7 +
 egs/kaldi-vystadial-recipe/s5/cmd.sh          |  13 +
 .../s5/conf/decode.config                     |   3 +
 egs/kaldi-vystadial-recipe/s5/conf/mfcc.conf  |   7 +
 .../s5/conf/train_conf.sh                     |  20 +
 .../s5/decode/decode-lattice.sh               | 112 ++++
 .../s5/decode/decode-online.sh                |  97 ++++
 .../s5/local/make_trans.py                    |  50 ++
 .../s5/local/results.py                       | 164 ++++++
 .../s5/local/save_check_conf.sh               |  55 ++
 egs/kaldi-vystadial-recipe/s5/local/score.sh  |  53 ++
 .../s5/local/vystadial_data_prep.sh           |  83 +++
 .../s5/local/vystadial_format_data.sh         |  76 +++
 .../s5/local/vystadial_prepare_dict.sh        |  91 ++++
 egs/kaldi-vystadial-recipe/s5/logs/README     |   1 +
 egs/kaldi-vystadial-recipe/s5/path.sh         |  16 +
 egs/kaldi-vystadial-recipe/s5/run.sh          | 201 +++++++
 .../s5/steps/align_fmllr.sh                   | 147 +++++
 .../s5/steps/align_sgmm.sh                    | 193 +++++++
 .../s5/steps/align_sgmm2.sh                   | 193 +++++++
 .../s5/steps/align_si.sh                      |  89 ++++
 .../s5/steps/compute_cmvn_stats.sh            |  65 +++
 egs/kaldi-vystadial-recipe/s5/steps/decode.sh |  97 ++++
 .../s5/steps/decode_basis_fmllr.sh            | 206 +++++++
 .../s5/steps/decode_biglm.sh                  |  84 +++
 .../s5/steps/decode_combine.sh                |  59 +++
 .../s5/steps/decode_fmllr.sh                  | 198 +++++++
 .../s5/steps/decode_fmmi.sh                   |  95 ++++
 .../s5/steps/decode_fromlats.sh               |  90 ++++
 .../s5/steps/decode_nnet.sh                   | 125 +++++
 .../s5/steps/decode_sgmm.sh                   | 254 +++++++++
 .../s5/steps/decode_sgmm2.sh                  | 190 +++++++
 .../s5/steps/decode_sgmm2_rescore.sh          | 107 ++++
 .../s5/steps/decode_sgmm2_rescore_project.sh  | 172 ++++++
 .../s5/steps/decode_sgmm_rescore.sh           | 107 ++++
 .../s5/steps/decode_si.sh                     |  97 ++++
 .../s5/steps/get_fmllr_basis.sh               |  95 ++++
 .../s5/steps/lmrescore.sh                     | 117 ++++
 .../s5/steps/make_bn_feats.sh                 | 141 +++++
 .../s5/steps/make_denlats.sh                  | 139 +++++
 .../s5/steps/make_denlats_sgmm.sh             | 157 ++++++
 .../s5/steps/make_denlats_sgmm2.sh            | 157 ++++++
 .../s5/steps/make_fbank.sh                    | 111 ++++
 .../s5/steps/make_mfcc.sh                     | 111 ++++
 .../s5/steps/make_plp.sh                      | 111 ++++
 egs/kaldi-vystadial-recipe/s5/steps/mixup.sh  | 146 +++++
 .../s5/steps/rnnlmrescore.sh                  | 176 ++++++
 .../s5/steps/train_deltas.sh                  | 142 +++++
 .../s5/steps/train_diag_ubm.sh                | 125 +++++
 .../s5/steps/train_lda_mllt.sh                | 191 +++++++
 .../s5/steps/train_mmi.sh                     | 144 +++++
 .../s5/steps/train_mmi_fmmi.sh                | 221 ++++++++
 .../s5/steps/train_mmi_fmmi_indirect.sh       | 244 +++++++++
 .../s5/steps/train_mmi_sgmm.sh                | 153 ++++++
 .../s5/steps/train_mmi_sgmm2.sh               | 152 ++++++
 .../s5/steps/train_mono.sh                    | 135 +++++
 .../s5/steps/train_mpe.sh                     | 158 ++++++
 .../s5/steps/train_nnet.sh                    | 284 ++++++++++
 .../s5/steps/train_quick.sh                   | 191 +++++++
 .../s5/steps/train_sat.sh                     | 238 +++++++++
 .../s5/steps/train_sgmm.sh                    | 273 ++++++++++
 .../s5/steps/train_sgmm2.sh                   | 292 ++++++++++
 .../s5/steps/train_ubm.sh                     | 128 +++++
 .../s5/steps/word_align_lattices.sh           |  48 ++
 .../s5/utils/add_disambig.pl                  |  58 ++
 .../s5/utils/add_lex_disambig.pl              | 101 ++++
 .../s5/utils/apply_map.pl                     |  54 ++
 .../s5/utils/best_wer.sh                      |  25 +
 .../s5/utils/combine_data.sh                  |  32 ++
 .../s5/utils/convert_ctm.pl                   |  83 +++
 .../s5/utils/eps2disambig.pl                  |  23 +
 .../s5/utils/filter_scp.pl                    |  41 ++
 .../s5/utils/find_arpa_oovs.pl                |  64 +++
 .../s5/utils/fix_data_dir.sh                  |  80 +++
 .../s5/utils/format_lm.sh                     |  84 +++
 .../s5/utils/format_lm_sri.sh                 | 110 ++++
 .../s5/utils/gen_topo.pl                      |  63 +++
 .../s5/utils/int2sym.pl                       |  71 +++
 egs/kaldi-vystadial-recipe/s5/utils/ln.pl     |  58 ++
 .../s5/utils/make_lexicon_fst.pl              | 122 +++++
 .../s5/utils/make_unigram_grammar.pl          |  54 ++
 .../s5/utils/mkgraph.sh                       | 122 +++++
 .../s5/utils/nnet/analyze_alignments.sh       |  71 +++
 .../s5/utils/nnet/gen_dct_mat.py              |  53 ++
 .../s5/utils/nnet/gen_hamm_mat.py             |  45 ++
 .../s5/utils/nnet/gen_mlp_init.py             |  83 +++
 .../s5/utils/nnet/train_nnet_scheduler.sh     | 119 +++++
 .../s5/utils/parse_options.sh                 |  84 +++
 .../s5/utils/prepare_lang.sh                  | 275 ++++++++++
 egs/kaldi-vystadial-recipe/s5/utils/queue.pl  | 263 +++++++++
 .../s5/utils/remove_oovs.pl                   |  43 ++
 .../s5/utils/rnnlm_compute_scores.sh          |  69 +++
 egs/kaldi-vystadial-recipe/s5/utils/run.pl    | 123 +++++
 egs/kaldi-vystadial-recipe/s5/utils/s2eps.pl  |  27 +
 .../s5/utils/shuffle_list.pl                  |  31 ++
 .../s5/utils/spk2utt_to_utt2spk.pl            |  27 +
 .../s5/utils/split_data.sh                    |  97 ++++
 .../s5/utils/split_scp.pl                     | 221 ++++++++
 .../s5/utils/subset_data_dir.sh               | 119 +++++
 .../s5/utils/subset_scp.pl                    |  84 +++
 .../s5/utils/summarize_warnings.pl            |  46 ++
 .../s5/utils/sym2int.pl                       |  99 ++++
 .../s5/utils/utt2spk_to_spk2utt.pl            |  39 ++
 .../s5/utils/validate_dict_dir.pl             | 142 +++++
 .../s5/utils/validate_lang.pl                 | 501 ++++++++++++++++++
 egs/voxforge/online_demo/.gitignore           |   9 +
 src/.gitignore                                | 115 ++++
 src/Makefile                                  |   5 +
 src/configure                                 |  19 +-
 src/makefiles/cygwin.mk                       |   1 +
 src/makefiles/darwin_10_5.mk                  |   1 +
 src/makefiles/darwin_10_6.mk                  |   1 +
 src/makefiles/darwin_10_7.mk                  |   1 +
 src/makefiles/darwin_10_8.mk                  |   1 +
 src/makefiles/linux_atlas.mk                  |   1 +
 src/makefiles/linux_atlas_64bit.mk            |   1 +
 src/makefiles/linux_clapack.mk                |   1 +
 src/makefiles/linux_openblas.mk               |   4 +-
 src/python-kaldi-decoding/.gitignore          |  17 +
 src/python-kaldi-decoding/Makefile            |  82 +++
 src/python-kaldi-decoding/README.md           |  65 +++
 .../compute-mfcc-feats-test.c                 |   5 +
 .../compute-mfcc-feats.cc                     | 185 +++++++
 .../compute-mfcc-feats.h                      |  16 +
 src/python-kaldi-decoding/compute-wer-test.c  |   5 +
 src/python-kaldi-decoding/compute-wer.cc      | 144 +++++
 src/python-kaldi-decoding/compute-wer.h       |  16 +
 .../gmm-latgen-faster-test.c                  |   5 +
 .../gmm-latgen-faster.cc                      | 196 +++++++
 src/python-kaldi-decoding/gmm-latgen-faster.h |  16 +
 .../lattice-best-path-test.c                  |   5 +
 .../lattice-best-path.cc                      | 136 +++++
 src/python-kaldi-decoding/lattice-best-path.h |  16 +
 .../little_wavs_data_void_en.scp              |   4 +
 .../online-wav-gmm-decode-faster-test.c       |   5 +
 .../online-wav-gmm-decode-faster.cc           | 247 +++++++++
 .../online-wav-gmm-decode-faster.h            |  17 +
 .../ordereddefaultdict.py                     |  46 ++
 src/python-kaldi-decoding/run.py              | 291 ++++++++++
 .../test_cffi_python_dyn.h                    |  39 ++
 src/vystadial-decoder/.ycm_extra_conf.py      | 145 +++++
 src/vystadial-decoder/README.md               |  86 +++
 tools/.gitignore                              |   7 +
 tools/Makefile                                |   2 +-
 tools/extras/install_portaudio.sh             |   2 +-
 155 files changed, 14774 insertions(+), 8 deletions(-)
 create mode 100644 INSTALL.md
 create mode 100644 README.md
 create mode 100755 egs/babel/s5/local/annotatedKwlist2KWs.pl
 create mode 100755 egs/babel/s5/local/buildEditDistanceFst.pl
 create mode 100755 egs/babel/s5/local/count2logprob.pl
 create mode 100755 egs/babel/s5/local/subsetATWV.pl
 create mode 100644 egs/kaldi-vystadial-recipe/.gitingore
 create mode 100644 egs/kaldi-vystadial-recipe/README.md
 create mode 100644 egs/kaldi-vystadial-recipe/s5/.gitignore
 create mode 100644 egs/kaldi-vystadial-recipe/s5/cmd.sh
 create mode 100644 egs/kaldi-vystadial-recipe/s5/conf/decode.config
 create mode 100644 egs/kaldi-vystadial-recipe/s5/conf/mfcc.conf
 create mode 100755 egs/kaldi-vystadial-recipe/s5/conf/train_conf.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/decode/decode-lattice.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/decode/decode-online.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/local/make_trans.py
 create mode 100755 egs/kaldi-vystadial-recipe/s5/local/results.py
 create mode 100755 egs/kaldi-vystadial-recipe/s5/local/save_check_conf.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/local/score.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/local/vystadial_data_prep.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/local/vystadial_format_data.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/local/vystadial_prepare_dict.sh
 create mode 100644 egs/kaldi-vystadial-recipe/s5/logs/README
 create mode 100755 egs/kaldi-vystadial-recipe/s5/path.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/run.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/align_fmllr.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/align_sgmm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/align_sgmm2.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/align_si.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/compute_cmvn_stats.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_basis_fmllr.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_biglm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_combine.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_fmllr.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_fmmi.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_fromlats.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_nnet.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2_rescore.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2_rescore_project.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm_rescore.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/decode_si.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/get_fmllr_basis.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/lmrescore.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/make_bn_feats.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/make_denlats.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/make_denlats_sgmm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/make_denlats_sgmm2.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/make_fbank.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/make_mfcc.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/make_plp.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/mixup.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/rnnlmrescore.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_deltas.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_diag_ubm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_lda_mllt.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_mmi.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_mmi_fmmi.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_mmi_fmmi_indirect.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_mmi_sgmm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_mmi_sgmm2.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_mono.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_mpe.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_nnet.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_quick.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_sat.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_sgmm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_sgmm2.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/train_ubm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/steps/word_align_lattices.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/add_disambig.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/add_lex_disambig.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/apply_map.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/best_wer.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/combine_data.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/convert_ctm.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/eps2disambig.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/filter_scp.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/find_arpa_oovs.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/fix_data_dir.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/format_lm.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/format_lm_sri.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/gen_topo.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/int2sym.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/ln.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/make_lexicon_fst.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/make_unigram_grammar.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/mkgraph.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/nnet/analyze_alignments.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_dct_mat.py
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_hamm_mat.py
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_mlp_init.py
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/nnet/train_nnet_scheduler.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/parse_options.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/prepare_lang.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/queue.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/remove_oovs.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/rnnlm_compute_scores.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/run.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/s2eps.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/shuffle_list.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/spk2utt_to_utt2spk.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/split_data.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/split_scp.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/subset_data_dir.sh
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/subset_scp.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/summarize_warnings.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/sym2int.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/utt2spk_to_spk2utt.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/validate_dict_dir.pl
 create mode 100755 egs/kaldi-vystadial-recipe/s5/utils/validate_lang.pl
 create mode 100644 egs/voxforge/online_demo/.gitignore
 create mode 100644 src/.gitignore
 create mode 100644 src/python-kaldi-decoding/.gitignore
 create mode 100644 src/python-kaldi-decoding/Makefile
 create mode 100644 src/python-kaldi-decoding/README.md
 create mode 100644 src/python-kaldi-decoding/compute-mfcc-feats-test.c
 create mode 100644 src/python-kaldi-decoding/compute-mfcc-feats.cc
 create mode 100644 src/python-kaldi-decoding/compute-mfcc-feats.h
 create mode 100644 src/python-kaldi-decoding/compute-wer-test.c
 create mode 100644 src/python-kaldi-decoding/compute-wer.cc
 create mode 100644 src/python-kaldi-decoding/compute-wer.h
 create mode 100644 src/python-kaldi-decoding/gmm-latgen-faster-test.c
 create mode 100644 src/python-kaldi-decoding/gmm-latgen-faster.cc
 create mode 100644 src/python-kaldi-decoding/gmm-latgen-faster.h
 create mode 100644 src/python-kaldi-decoding/lattice-best-path-test.c
 create mode 100644 src/python-kaldi-decoding/lattice-best-path.cc
 create mode 100644 src/python-kaldi-decoding/lattice-best-path.h
 create mode 100644 src/python-kaldi-decoding/little_wavs_data_void_en.scp
 create mode 100644 src/python-kaldi-decoding/online-wav-gmm-decode-faster-test.c
 create mode 100644 src/python-kaldi-decoding/online-wav-gmm-decode-faster.cc
 create mode 100644 src/python-kaldi-decoding/online-wav-gmm-decode-faster.h
 create mode 100644 src/python-kaldi-decoding/ordereddefaultdict.py
 create mode 100755 src/python-kaldi-decoding/run.py
 create mode 100644 src/python-kaldi-decoding/test_cffi_python_dyn.h
 create mode 100644 src/vystadial-decoder/.ycm_extra_conf.py
 create mode 100644 src/vystadial-decoder/README.md
 create mode 100644 tools/.gitignore

diff --git a/INSTALL b/INSTALL
index faa8f61829a..2dbf318118c 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,4 +1,4 @@
-
+This is the official Kaldi INSTALL. Look also at INSTALL.md for the git mirror installation.
 [for native Windows install, see windows/INSTALL]
 
 (1)
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 00000000000..8a7047558d2
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,168 @@
+Installation TIPS for KALDI and installation INSTRUCTIONS for my additional repositories
+=================================================================================
+Intro
+-----
+Kaldi has very good instructions and tutorial
+for building it from source. It is easy and straightforward.
+However, I needed also to build shared libraries
+and maybe you will face some of my problems too.
+So this is the reasons for writing my building procedure down.
+
+Installing external dependencies
+================================
+See `kaldi-trunk/tools/INSTALL` for info.
+Basically it telss you to use `kaldi-trunk/tools/Makefile`, which I used also.
+
+How have I installed OpenBlas?
+----------------------
+Simple enough:
+```bash
+make openblas
+```
+
+How have I installed Openfst?
+----------------------
+In order to install also shared libraries
+I changed the line 37 in 
+`kaldi-trunk/tools/Makefile`
+
+```sh
+*** Makefile 
+************
+*** 34,38 ****
+
+openfst-1.3.2/Makefile: openfst-1.3.2/.patched
+		cd openfst-1.3.2/; \
+!		./configure --prefix=`pwd` --enable-static --disable-shared --enable-far --enable-ngram-fsts
+
+--- 34,38 ----
+
+openfst-1.3.2/Makefile: openfst-1.3.2/.patched
+		cd openfst-1.3.2/; \
+!		./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts
+
+```
+Than I ran
+```bash
+make openfst_tgt
+```
+
+How have I installed PortAudio?
+--------------------------
+NOTE: Necessary only for Kaldi online decoder
+
+In kaldi-trunk/tools/extras/install_portaudio.sh
+I changed line
+```
+./configure --prefix=`pwd`/install
+```
+to
+```
+./configure --prefix=`pwd`/install --with-pic
+```
+
+Then I ran
+```bash
+extras/install_portaudio.sh
+```
+
+
+How have I built Kaldi?
+------------------
+```bash
+./configure --openblas-root=`pwd`/../tools/OpenBLAS/install --fst-root=`pwd`/../tools/openfst --static-math=no
+```
+
+Edit the `kaldi.mk` and add the `-fPIC` flag.
+TODO It would be nice to do something like
+```bash
+EXTRA_CXXFLAGS=-fPIC make
+EXTRA_CXXFLAGS=-fPIC make ext
+```
+But the local makefiles overrides `EXTRA_CXXFLAGS`.
+
+If you updated from the svn repository do not forget to run `make depend`
+Since by *default it is turned of! I always forget about that!*
+```
+# DO NOT FORGET TO CHANGE kaldi.mk TODO SCRIPT IT!
+# make depend and make ext_depend are necessary only if dependencies changed
+make depend && make ext_depend && make && make ext
+```
+
+How have I updated Kaldi src code?
+----------------------------
+I checkout the kaldi-trunk version.
+
+[Kaldi install instructions](http://kaldi.sourceforge.net/install.html)
+
+Note: If you checkout Kaldi before March 2013 you need to relocate svn. See the instructions in the link above!
+
+
+What setup did I use?
+--------------------
+In order to use Kaldi binaries everywhere I add them to `PATH`. 
+In addition, I needed to add `openfst` directory to `LD_LIBRARY_PATH`, I compiled Kaldi dynamically linked against `openfst`. To conclude, I added following lines to my `.bashrc`.
+```bash
+############# Kaldi ###########
+kaldisrc=/net/work/people/oplatek/kaldi/src
+export PATH="$PATH":$kaldisrc/bin:$kaldisrc/fgmmbin:$kaldisrc/gmmbin:$kaldisrc/nnetbin:$kaldisrc/sgmm2bin:$kaldisrc/tiedbin:$kaldisrc/featbin:$kaldisrc/fstbin:$kaldisrc/latbin:$kaldisrc/onlinebin:$kaldisrc/sgmmbin
+
+### Openfst ###
+openfst=/ha/home/oplatek/50GBmax/kaldi/tools/openfst
+export PATH="$PATH":$openfst/bin
+export LD_LIBRARY_PATH="$LD_LIBRARY_PATH":$openfst/lib 
+```
+
+Which tool for building a Language Model (LM) have I used?
+---------------------------------------------------------
+None. I received built LM in Arpa format.
+
+NOTE: Probably, I should build my own LM. 
+
+
+How have I installed Atlas?
+--------------------
+NOTE: I decided NOT to use Atlas, I USE OpenBlas INSTEAD. It is open source and it allows me to compile both shared and static libraries at one run.
+
+Nevertheless how I install Atlas:
+
+ * I installed version atlas3.10.1.tar.bz2 (available at sourceforge)
+ * I unpackaged it under `kaldi-trunk/tools` which created `kaldi-trunk/tools/ATLAS`
+ * The main problem with building ATLAS was for me disabling CPU throtling.
+ * I solved it by 
+
+```bash
+# running following command under root in my Ubuntu 12.10
+# It does not turn off CPU throttling in fact, but I do not need the things optimaze on my local machine
+# I ran it for all of my 4 cores
+# for n in 0 1 2 3 ; do echo 'performance' > /sys/devices/system/cpu/cpu${n}/cpufreq/scaling_governor ; done
+```
+
+ * Then I needed to install Fortran compiler (The error from configure was little bit covered by consequent errors) by 
+
+```bash
+sudo apt-get install gfortran
+```
+
+ * On Ubuntu 12.04 I had issue with 
+
+```bash
+/usr/include/features.h:323:26: fatal error: bits/predefs.h
+```
+
+   Which I solved by
+
+```bash
+sudo apt-get install --reinstall libc6-dev
+```
+
+ * Finally, in `kaldi-trunk/tools/ATLAS` I run:
+
+```bash
+mkdir build 
+mkdir ../atlas_install
+cd build
+../configure --shared --incdir=`pwd`/../../atlas_install
+make 
+make install
+ ```
diff --git a/README.md b/README.md
new file mode 100644
index 00000000000..b67f5ae3fa9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,25 @@
+ABOUT
+=====
+ * This is a Git mirror of [Svn trunk of Kaldi project](http://sourceforge.net/projects/kaldi/)
+   `svn://svn.code.sf.net/p/kaldi/code/trunk`
+ * In the branch `master` I commit my work. In the branch `svn_mirror` I mirror `svn://svn.code.sf.net/p/kaldi/code/trunk`. In the branch `sandbox-oplatek` I am developing changes which I would like to check in back to Kaldi.
+ * Currently, I mirror the repository manually as often as I needed.
+ * The main purpose for mirroring is that I want to build my own decoder and train my models for decoding based on up-to-date Kaldi version.
+ * Recipe for training the models can be found at `egs/kaldi-vystadial-recipe`
+ * Source code for python wrapper for online-decoder is at `src/python-kaldi-decoding` 
+ * Remarks about new decoder are located at `src/vystadial-decoder`
+ * I use the `Fake submodules` approach to merge the 3 subprojects to this repository. More about `Fake submodules` [at this blog](http://debuggable.com/posts/git-fake-submodules:4b563ee4-f3cc-4061-967e-0e48cbdd56cb).
+ * I mirror the svn via `git svn`. [Nice intro to git svn](http://viget.com/extend/effectively-using-git-with-subversion), [Walk through](http://blog.shinetech.com/2009/02/17/my-git-svn-workflow/) and [Multiple svn-remotes](http://blog.shuningbian.net/2011/05/git-with-multiple-svn-remotes.html)
+
+OTHER INFO
+----------
+ * Read `INSTALL.md` and `INSTALL` first!
+ * For training models read `egs/kaldi-vystadial-recipe/s5/README.md`
+ * For building and developing decoder callable from python read `src/python-kaldi-decoding/README.md`
+ * For information about new decoder read `src/vystadial-decoder/README.md`
+ * This work is done under [Vystadial project](https://sites.google.com/site/filipjurcicek/projects/vystadial).
+
+LICENSE
+--------
+ * We release all the changes at pyKaldi under `Apache license 2.0` license. Kaldi also uses `Apache 2.0` license). 
+ * We also want to publicly release the training data in the autumn 2013.
diff --git a/README.txt b/README.txt
index fa0a7a21b93..e482e3ae176 100644
--- a/README.txt
+++ b/README.txt
@@ -1,4 +1,5 @@
-
+This is oficial Kaldi readme. You are now in Kaldi/trunk mirror.
+Read Kaldi.md and INSTALL.md first!
 
 
 See http://kaldi.sourceforge.net/ for documentation 
diff --git a/egs/babel/s5/local/annotatedKwlist2KWs.pl b/egs/babel/s5/local/annotatedKwlist2KWs.pl
new file mode 100755
index 00000000000..566005bc89a
--- /dev/null
+++ b/egs/babel/s5/local/annotatedKwlist2KWs.pl
@@ -0,0 +1,124 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0.
+#
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $Usage = <<EOU;
+Usage: annotatedKwlist2KWs.pl [options] <kwlist.annot.xml|-> <keywords|-> [category]
+ e.g.: annotatedKwlist2KWs.pl kwlist.annot.list keywords.list "NGram Order:2,3,4"
+
+This script reads an annotated kwlist xml file and writes a list of keywords, according
+to the given categories. The "category" is a "key:value" pair in the annotated kwlist xml
+file. For example
+1. "NGram Order:2,3,4"
+2. "NGram Order:2"
+3. "NGram Order:-"
+where "NGram Order" is the category name. The first line means print keywords that are
+bigram, trigram and 4gram; The second line means print keywords only for bigram; The last
+line means print all possible ngram keywords.
+If no "category" is specified, the script will print out the possible categories.
+
+Allowed options:
+EOU
+
+GetOptions(); 
+
+@ARGV >= 2 || die $Usage;
+
+# Workout the input/output source
+my $kwlist_filename = shift @ARGV;
+my $kws_filename = shift @ARGV;
+
+my $source = "STDIN";
+if ($kwlist_filename ne "-") {
+  open(KWLIST, "<$kwlist_filename") || die "Fail to open kwlist file: $kwlist_filename\n";
+  $source = "KWLIST";
+}
+
+# Process kwlist.annot.xml
+my %attr;
+my %attr_kws;
+my $kwid="";
+my $name="";
+my $value="";
+while (<$source>) {
+  chomp;
+  if (m/<kw kwid=/) {($kwid) = /kwid="(\S+)"/; next;}
+  if (m/<name>/) {($name) = /<name>(.*)<\/name>/; next;}
+  if (m/<value>/) {
+    ($value) = /<value>(.*)<\/value>/;
+    if (defined($attr{$name})) {
+      $attr{"$name"}->{"$value"} = 1;
+    } else {
+      $attr{"$name"} = {"$value", 1};
+    }
+    if (defined($attr_kws{"${name}_$value"})) {
+      $attr_kws{"${name}_$value"}->{"$kwid"} = 1;
+    } else {
+      $attr_kws{"${name}_$value"} = {"$kwid", 1};
+    }
+  }
+}
+
+my $output = "";
+if (@ARGV == 0) {
+  # If no category provided, print out the possible categories
+  $output .= "Possible categories are:\n\n";
+  foreach my $name (keys %attr) {
+    $output .= "$name:";
+    my $count = 0;
+    foreach my $value (keys %{$attr{$name}}) {
+      if ($value eq "") {$value = "\"\"";}
+      if ($count == 0) {
+        $output .= "$value";
+        $count ++; next;
+      } 
+      if ($count == 6) {
+        $output .= ", ...";
+        last;
+      }
+      $output .= ",$value"; $count ++;
+    }
+    $output .= "\n";
+  }
+  print STDERR $output;
+  $output = "";
+} else {
+  my %keywords;
+  while (@ARGV > 0) {
+    my $category = shift @ARGV;
+    my @col = split(/:/, $category);
+    @col == 2 || die "Bad category \"$category\"\n";
+    $name = $col[0];
+    if ($col[1] eq "-") {
+      foreach my $value (keys %{$attr{$name}}) {
+        foreach my $kw (keys %{$attr_kws{"${name}_$value"}}) {
+          $keywords{$kw} = 1;
+        }
+      }
+    } else {
+      my @col1 = split(/,/, $col[1]);
+      foreach my $value (@col1) {
+        foreach my $kw (keys %{$attr_kws{"${name}_$value"}}) {
+          $keywords{$kw} = 1;
+        }
+      }
+    }
+  }
+  foreach my $kw (keys %keywords) {
+    $output .= "$kw\n";
+  }
+}
+
+if ($kwlist_filename ne "-") {close(KWLIST);}
+if ($kws_filename eq "-") { print $output;}
+else {
+  open(O, ">$kws_filename") || die "Fail to open file $kws_filename\n";
+  print O $output;
+  close(O);
+}
diff --git a/egs/babel/s5/local/buildEditDistanceFst.pl b/egs/babel/s5/local/buildEditDistanceFst.pl
new file mode 100755
index 00000000000..be0e3ec2ea8
--- /dev/null
+++ b/egs/babel/s5/local/buildEditDistanceFst.pl
@@ -0,0 +1,127 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0.
+#
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $Usage = <<EOU;
+Usage:    buildEditDistanceFst.pl <phones.txt|-> <fst_out|->
+          Buld a edit distance FST at the phone level.
+
+Allowed options:
+  --confusion-matrix    : Matrix for insertion, deletion and substitution. (string, default="")
+  --ins-cost            : Insertion cost                                   (double, default=1 )
+  --del-cost            : Deletion cost                                    (double, default=1 )
+  --subs-cost           : substitution cost                                (double, default=1 )
+  --boundary-ins-cost   : Cost for insertions at work boundary             (double, default=0.1)
+  --boundary-off        : No insertions at word boundary                   (boolean, default=true)
+EOU
+
+my $confusion_matrix = "";
+my $insertion_cost = 1;
+my $deletion_cost = 1;
+my $substitution_cost = 1;
+my $boundary_ins_cost = 0.1;
+my $boundary_off="true";
+GetOptions('confusion-matrix=s' => \$confusion_matrix,
+  'ins-cost=f'          => \$insertion_cost,
+  'del-cost=f'          => \$deletion_cost,
+  'subs-cost=f'         => \$substitution_cost,
+  'boundary-ins-cost=f' => \$boundary_ins_cost,
+  'boundary-off=s'      => \$boundary_off);
+
+@ARGV == 2 || die $Usage;
+
+$boundary_off eq "true" || $boundary_off eq "false" || die "$0: Bad value for option --boundary-off\n";
+
+# Workout the input and output parameters
+my $phone_in = shift @ARGV;
+my $fst_out = shift @ARGV;
+
+open(I, "<$phone_in") || die "$0: Fail to open lexicon $phone_in\n";
+open(O, ">$fst_out") || die "$0: Fail to write FST $fst_out\n";
+
+# Read confusion matrix
+my %confusion;
+if ($confusion_matrix ne "") {
+  open(M, "<$confusion_matrix") || die "$0: Fail to open confusion matrix $confusion_matrix\n";
+  while (<M>) {
+    chomp;
+    my @col = split();
+    @col == 3 || die "$0: Bad line in confusion matrix \"$_\"\n";
+    $confusion{"$col[0]_$col[1]"} = $col[2];
+  }
+  close(M);
+}
+
+# Start processing
+my @phones;
+while (<I>) {
+  chomp;
+  my @col = split();
+  @col == 1 || die "$0: Bad number of columns in phone list \"$_\"\n";
+  if ($col[0] eq "<eps>") {next;}
+  push(@phones, $col[0]);
+}
+
+# Add insertions, deletions
+my $fst = "";
+foreach my $p (@phones) {
+  if ($confusion_matrix eq "") {
+    $fst .= "1 1 $p <eps> $deletion_cost\n";        # Deletions
+    $fst .= "1 1 <eps> $p $insertion_cost\n";       # Insertions
+    if ($boundary_off eq "false") {
+      $fst .= "0 0 <eps> $p $boundary_ins_cost\n";
+      $fst .= "0 1 <eps> $p $boundary_ins_cost\n";
+      $fst .= "2 2 <eps> $p $boundary_ins_cost\n";
+      $fst .= "1 2 <eps> $p $boundary_ins_cost\n";
+    }
+  } else {
+    my $key = "${p}_<eps>";
+    if (defined($confusion{$key})) {
+      $fst .= "1 1 $p <eps> $confusion{$key}\n";
+    }
+    $key = "<eps>_${p}";
+    if (defined($confusion{$key})) {
+      $fst .= "1 1 <eps> $p $confusion{$key}\n";
+      if ($boundary_off eq "false") {
+        $fst .= "0 0 <eps> $p $confusion{$key}\n";
+        $fst .= "0 1 <eps> $p $confusion{$key}\n";
+        $fst .= "2 2 <eps> $p $confusion{$key}\n";
+        $fst .= "1 2 <eps> $p $confusion{$key}\n";
+      }
+    }
+  }
+}
+foreach my $p1 (@phones) {
+  foreach my $p2 (@phones) {
+    if ($p1 eq $p2) {
+      $fst .= "1 1 $p1 $p2 0\n";
+    } else {
+      if ($confusion_matrix eq "") {
+        $fst .= "1 1 $p1 $p2 $substitution_cost\n";
+      } else {
+        my $key = "${p1}_${p2}";
+        if (defined($confusion{$key})) {
+          $fst .= "1 1 $p1 $p2 $confusion{$key}\n";
+        }
+      }
+    }
+  }
+}
+if ($boundary_off eq "false") {
+  $fst .= "0 1 <eps> <eps> 0\n";
+  $fst .= "1 2 <eps> <eps> 0\n";
+  $fst .= "2\n";
+} else {
+  $fst .= "1\n";
+}
+
+print O $fst;
+
+close(I);
+close(O);
diff --git a/egs/babel/s5/local/count2logprob.pl b/egs/babel/s5/local/count2logprob.pl
new file mode 100755
index 00000000000..378a8b8dd97
--- /dev/null
+++ b/egs/babel/s5/local/count2logprob.pl
@@ -0,0 +1,94 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0.
+#
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $Usage = <<EOU;
+Usage:    count2logprob.pl <confusing_in|-> <counfusing_out|->
+          This script takes in the confusion phone pair counts and converts
+          the counts into negated log probabilities. The counts should be in
+          the following format:
+          p1 p2 count1        // For substitution
+          p3 <eps> count2     // For deletion
+          <eps> p4 count3     // For insertion
+
+Allowed options:
+  --cutoff              : Minimal count to be considered                (int   , default=1)
+EOU
+
+my $cutoff = 1;
+GetOptions('cutoff=i' => \$cutoff);
+
+@ARGV == 2 || die $Usage;
+
+# Workout the input and output parameters
+my $cm_in = shift @ARGV;
+my $cm_out = shift @ARGV;
+
+open(I, "<$cm_in") || die "$0: Fail to open keywords file $cm_in\n";
+open(O, ">$cm_out") || die "$0: Fail to write confusion matrix $cm_out\n";
+
+# Collect counts
+my %ins;
+my %del;
+my %subs;
+my %phone_count;
+my $ins_count = 0;
+my $del_count = 0;
+while (<I>) {
+  chomp;
+  my @col = split();
+  @col == 3 || die "$0: Bad line in confusion matrix file: $_\n";
+  my ($p1, $p2, $count) = ($col[0], $col[1], $col[2]);
+  $count >= $cutoff || next;
+  if ($p1 eq "<eps>" && $p2 ne "<eps>") {
+    $ins{$p2} = $count;
+    $ins_count += $count;
+  } elsif ($p1 ne "<eps>" && $p2 eq "<eps>") {
+    $del{$p1} = $count;
+    $del_count += $count;
+  } elsif ($p1 ne "<eps>" && $p2 ne "<eps>") {
+    $p1 ne $p2 || next;           # Skip same phone convert
+    $subs{"${p1}_$p2"} = $count;
+    if (defined($phone_count{$p1})) {
+      $phone_count{$p1} += $count;
+    } else {
+      $phone_count{$p1} = $count;
+    }
+  }
+}
+
+# Compute negated log probability
+foreach my $key (keys %ins) {
+  $ins{$key} = -log($ins{$key}/$ins_count);
+}
+foreach my $key (keys %del) {
+  $del{$key} = -log($del{$key}/$del_count);
+}
+foreach my $key (keys %subs) {
+  my @col = split(/_/, $key);
+  $subs{$key} = -log($subs{$key}/$phone_count{$col[0]});
+}
+
+# print results
+my $output = "";
+foreach my $key (keys %ins) {
+  $output .= "<eps> $key $ins{$key}\n";
+}
+foreach my $key (keys %del) {
+  $output .= "$key <eps> $del{$key}\n";
+}
+foreach my $key (keys %subs) {
+  my @col = split(/_/, $key);
+  $output .= "$col[0] $col[1] $subs{$key}\n";
+}
+
+print O $output;
+
+close(I);
+close(O);
diff --git a/egs/babel/s5/local/subsetATWV.pl b/egs/babel/s5/local/subsetATWV.pl
new file mode 100755
index 00000000000..364c3805b00
--- /dev/null
+++ b/egs/babel/s5/local/subsetATWV.pl
@@ -0,0 +1,120 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
+# Apache 2.0.
+#
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $Usage = <<EOU;
+Usage: subsetATWV.pl [options] <keywords|-> <bsum.txt>
+ e.g.: subsetATWV.pl keywords.list bsum.txt
+
+This script will compute the ATWV for a subset of the original keywords in bsum.txt.
+Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list 
+is a list of the keywords that you want to compute the ATWV for. For example:
+KW101-0001
+KW101-0002
+...
+
+Allowed options:
+   --subset-name     : Name of the subset                        (string, default = "")
+   --width           : Width of the printed numbers              (int,    default = 5 )
+EOU
+
+my $subset_name = "";
+my $width = 5;
+GetOptions('subset-name=s' => \$subset_name,
+  'width=i'   =>  \$width); 
+
+@ARGV == 2 || die $Usage;
+
+# Workout the input/output source
+my $kws_filename = shift @ARGV;
+my $bsum_filename = shift @ARGV;
+
+my $source = "STDIN";
+if ($kws_filename ne "-") {
+  open(KWS, "<$kws_filename") || die "Fail to open keywords file: $kws_filename\n";
+  $source = "KWS";
+}
+open(BSUM, "<$bsum_filename") || die "Fail to open bsum file: $bsum_filename\n";
+
+# Read in the keywords.
+my $kws = "";
+while (<$source>) {
+  chomp;
+  my @col = split();
+  @col == 1 || die "Bad line $_\n";
+  if ($kws eq "") {
+    $kws = $col[0];
+  } else {
+    $kws .= "|$col[0]";
+  }
+}
+
+# Process bsum.txt
+my $targ_sum = 0;
+my $corr_sum = 0;
+my $fa_sum = 0;
+my $miss_sum = 0;
+my $twv_sum = 0;
+my $count = 0;
+my $subset_count = 0;
+my $flag = 0;
+if ($kws ne "") {
+  while (<BSUM>) {
+    chomp;
+    # Workout the total keywords that have occurrence in the search collection
+    if (/^Summary  Totals/) {$flag = 0;}
+    if (/^Keyword/) {$flag = 1;}
+    my @col;
+    if ($flag == 1) {
+      # Figure out keywords that don't have occurrences in the search collection 
+      @col = split(/\|/, $_);
+      $col[2] =~ s/^\s+//;
+      $col[2] =~ s/\s+$//;
+      $col[2] ne "" || next;
+      $count ++;
+    } else {
+      next;
+    }
+
+    # Only collect statistics for given subset
+    m/$kws/ || next;
+
+    # Keywods that are in the given subset, and have occurrences
+    $targ_sum += $col[2];
+    $corr_sum += $col[3];
+    $fa_sum += $col[4];
+    $miss_sum += $col[5];
+    $twv_sum += $col[6];
+    $subset_count ++;
+  }
+}
+
+# Compute ATWV
+my $subset_atwv = ($subset_count == 0) ? 0 : $twv_sum/$subset_count;
+my $atwv = ($count == 0) ? 0 : $twv_sum/$count;
+my $bp_atwv = ($count == 0) ? 0 : $subset_count/$count;
+
+# Format the numbers
+my $format = "%-${width}d";
+$subset_count = sprintf($format, $subset_count);
+$targ_sum = sprintf($format, $targ_sum);
+$corr_sum = sprintf($format, $corr_sum);
+$fa_sum = sprintf($format, $fa_sum);
+$miss_sum = sprintf($format, $miss_sum);
+$subset_atwv = sprintf("% .4f", $subset_atwv);
+$atwv = sprintf("% .4f", $atwv);
+$bp_atwv = sprintf("% .4f", $bp_atwv);
+
+# Print
+if ($subset_name ne "") {print "$subset_name: ";}
+print "#Keywords=$subset_count, #Targ=$targ_sum, #Corr=$corr_sum, #FA=$fa_sum, #Miss=$miss_sum, ";
+print "Contributed ATWV=$atwv, Best Possible Contributed ATWV=$bp_atwv, ATWV=$subset_atwv\n";
+
+if ($kws_filename ne "-") {close(KWS);}
+close(BSUM);
diff --git a/egs/kaldi-vystadial-recipe/.gitingore b/egs/kaldi-vystadial-recipe/.gitingore
new file mode 100644
index 00000000000..bbd86a25b01
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/.gitingore
@@ -0,0 +1,2 @@
+data
+exp
diff --git a/egs/kaldi-vystadial-recipe/README.md b/egs/kaldi-vystadial-recipe/README.md
new file mode 100644
index 00000000000..7d4fcfe7d56
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/README.md
@@ -0,0 +1,54 @@
+SUMMARY
+-------
+KALDI recipe based on voxforge KALDI recipe 
+http://vpanayotov.blogspot.cz/2012/07/voxforge-scripts-for-kaldi.html .
+Requires KALDI installation and Linux environment. (Tested on Ubuntu 10.04 and 12.10.)
+Written in Bash an Python 2.7.3.
+
+DESCRIPTION
+-----------
+ * Our scripts prepare the data to expected format in s5/data. 
+ * Stores experiments in s5/exp
+ * steps/ contains common scripts from wsj/s5/utils
+ * utils/ cotains common scritps from wsj/s5/utils
+ * local/ contains scripts for data preparation to prepare s5/data structure
+ * conf/ contains a few configuration files for KALDI
+
+
+Runnning experiments
+--------------------
+Before running the experiments check the following files:
+ * `conf` directory contains different configuration related for the training
+ * `path.sh` just set up path for running Kaldi binaries and path to data.
+    You should also setup `njobs` according your computer capabalities.
+ * `cmd.sh` set training commands e.g. for SGE grid.
+ * If you set up everything right, just launch `run.sh` It will create `mfcc`, `data` and `exp` directories.
+   If any of them exists, it will ask you if you want them to be overwritten.
+ ```bash
+ ./run.sh | tee mylog.log # I always store the output to the log
+ ```
+ * I wrote a stupid script for collecting results. It's really beta software. It may crash, but it works for me.
+ ```bash
+$ local/results.py exp # specify the experiment directory wait a while
+exp             RT coef         WER             SER
+_ri3b_fmmi_b    2.42235533333   (19.45, 13)     (44.67, 11)
+tri2b_mpe       0.37968465      (20.83, 20)     (47.2, 14)
+mono            0.9478559       (52.42, 15)     (77.33, 14)
+tri3b_mmi       0.357894733333  (19.77, 16)     (46.0, 11)
+tri1            0.6558491       (27.12, 18)     (57.33, 20)
+...
+... and other results in plaintex
+...
+==================
+\begin{tabular}{cccc}
+exp             & RT coef        & WER         & SER        \\
+_ri3b_fmmi_b    & 2.42235533333  & (19.45, 13) & (44.67, 11)\\
+tri2b_mpe       & 0.37968465     & (20.83, 20) & (47.2, 14) \\
+mono            & 0.9478559      & (52.42, 15) & (77.33, 14)\\
+tri3b_mmi       & 0.357894733333 & (19.77, 16) & (46.0, 11) \\
+tri1            & 0.6558491      & (27.12, 18) & (57.33, 20)\\
+...
+... and the same results in TeX
+...
+
+ ```
diff --git a/egs/kaldi-vystadial-recipe/s5/.gitignore b/egs/kaldi-vystadial-recipe/s5/.gitignore
new file mode 100644
index 00000000000..35e801d3f8c
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/.gitignore
@@ -0,0 +1,7 @@
+data
+exp*
+mfcc
+tools
+data_voip_en*
+Results
+voip
diff --git a/egs/kaldi-vystadial-recipe/s5/cmd.sh b/egs/kaldi-vystadial-recipe/s5/cmd.sh
new file mode 100644
index 00000000000..fb1d5d951d6
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/cmd.sh
@@ -0,0 +1,13 @@
+# "queue.pl" uses qsub.  The options to it are
+# options to qsub.  If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+#export train_cmd="queue.pl -q all.q@a*.clsp.jhu.edu"
+#export decode_cmd="queue.pl -q all.q@a*.clsp.jhu.edu"
+# export train_cmd="queue.pl -l mf=5g"
+# export decode_cmd="queue.pl -l mf=5g"
+export train_cmd=run.pl
+export decode_cmd=run.pl
diff --git a/egs/kaldi-vystadial-recipe/s5/conf/decode.config b/egs/kaldi-vystadial-recipe/s5/conf/decode.config
new file mode 100644
index 00000000000..332ae89de9d
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/conf/decode.config
@@ -0,0 +1,3 @@
+first_beam=10.0
+beam=13.0
+lat_beam=6.0
diff --git a/egs/kaldi-vystadial-recipe/s5/conf/mfcc.conf b/egs/kaldi-vystadial-recipe/s5/conf/mfcc.conf
new file mode 100644
index 00000000000..cd5dc059b45
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/conf/mfcc.conf
@@ -0,0 +1,7 @@
+--use-energy=false # non default option. False -> Use C0 instead energy
+# NUMCEPS u HTK is without C0: there 12 here 13- default
+--low-freq=125
+--high-freq=3800
+--htk-compat
+--remove-dc-offset # equivalent ZMEANSOURCE in HTK
+# --subtract-mean # not recommended to do it this way
diff --git a/egs/kaldi-vystadial-recipe/s5/conf/train_conf.sh b/egs/kaldi-vystadial-recipe/s5/conf/train_conf.sh
new file mode 100755
index 00000000000..a0947dcde96
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/conf/train_conf.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# How big portion of available data to use
+# everyN=3    ->   we use one third of data
+everyN=1
+
+# Train monophone models on a subset of the data of this size
+monoTrainData=1000
+
+# Number of states for phonem training
+pdf=1200
+
+# Maximum number of Gaussians used for training
+gauss=19200
+
+# Test-time language model order
+# We are just copying the arpa LM (3-order)
+lm_order=3 
+
+train_mmi_boost=0.05
diff --git a/egs/kaldi-vystadial-recipe/s5/decode/decode-lattice.sh b/egs/kaldi-vystadial-recipe/s5/decode/decode-lattice.sh
new file mode 100755
index 00000000000..53e2a8b3284
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/decode/decode-lattice.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# -*- coding: utf-8 -*-
+# Author:   Ondrej Platek,2013, code is without any warranty!
+# Created:  10:48:02 09/04/2013
+# Modified: 16:52:59 10/04/2013
+
+# Set the paths to the binaries and scripts needed
+fwd=`dirname $0`
+KALDI_ROOT=$fwd/../../../..
+export PATH="$PATH":$fwd/../steps/:$fwd/../utils/:$KALDI_ROOT/src/onlinebin:$KALDI_ROOT/src/bin
+
+# Change this to "tri2a" if you like to test using a ML-trained model
+ac_model_type=tri2a
+exp="$fwd/../Results/expc0bcaa8acd2732dce7c25c27b945d566d80ca7a6"
+data="$fwd/../data_voip_en1/test"
+
+# Alignments and decoding results are saved in this directory(simulated decoding only)
+decode_dir="$fwd/../exp-decode-lat"
+
+# Change this to "live" either here or using command line switch like:
+# --test-mode live # NOT SUPPORTED YET
+test_mode="simulated"
+
+# decoding parameters
+cmd=run.pl
+nj=1   # we do not do data_split as in steps/decode.sh
+max_active=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+lmwt=9 # TODO setup according experiments
+feat_type='delta'
+
+. $fwd/../path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+
+ac_model="$exp/$ac_model_type"
+
+if [ ! -d $ac_model ]; then
+    echo "The directory for AC model does not exist: $ac_model "
+    exit 1
+fi
+
+case $test_mode in
+    live)
+        echo
+        echo "CURRENTLY NOT SUPPORTED!"
+        echo -e "  LIVE DEMO MODE - you can use a microphone and say something\n"
+        echo "Using model in $ac_model directory"
+        echo "CURRENTLY NOT SUPPORTED!"
+        echo 
+        exit 1;;
+    simulated)
+        echo
+        echo -e "  SIMULATED ONLINE DECODING - pre-recorded audio is used\n"
+        echo "Test file are from directory $data"
+        echo "Using model in $ac_model directory"
+        echo 
+        ;;
+    
+    *)
+        echo "Invalid test mode! Should be either \"live\" or \"simulated\"!";
+        exit 1;;
+esac
+
+# Estimate the error rate for the simulated decoding
+if [ $test_mode == "simulated" ]; then
+    mkdir -p $decode_dir
+    # Resets file - do not append 
+    rm -f $decode_dir/wav.scp "$decode_dir/ref.txt" "$decode_dir/utt2spk"
+    for f in "$data"/*.wav; do
+        name=`basename $f`
+        echo "$name $f" >> $decode_dir/wav.scp
+        echo "$name $name" >> $decode_dir/utt2spk
+        # symbols=`sym2int.pl $ac_model/graph/words.txt < "${f}.trn"` 
+        symbols=`cat "${f}.trn"` 
+        echo "$name $symbols" >> $decode_dir/ref.txt
+    done
+
+    # in utils creates utt2spk
+    utt2spk_to_spk2utt.pl "$decode_dir"/utt2spk > "$decode_dir/spk2utt"  || exit 1
+    # # in steps creates feats.scp FIXME creates wrong scp
+    mkdir -p $decode_dir/mfcc
+    time ( make_mfcc.sh --cmd "$cmd" --nj $nj $decode_dir $decode_dir $decode_dir/mfcc || exit 1 )
+    # in steps creates cmvn.scp
+    time ( compute_cmvn_stats.sh $decode_dir $decode_dir $decode_dir/mfcc || exit 1 )
+
+    # Decoding: Based on steps/decode.sh and local/score.sh
+    case $feat_type in
+          delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$decode_dir/utt2spk scp:$decode_dir/cmvn.scp scp:$decode_dir/feats.scp ark:- | add-deltas ark:- ark:- |";;
+          # lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+          *) echo "Invalid feature type $feat_type" && exit 1;
+    esac
+
+    # TODO How is the gmm-latgen-paralelized? On data -> bad for us!
+    # TODO $nj == 1 does it depend on data? IMHO yes (See steps/decode.sh)
+    time ( $cmd JOB=1:$nj $decode_dir/decodeLattice.JOB.log \
+     gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+     --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$ac_model/graph/words.txt \
+     $ac_model/final.mdl $ac_model/graph/HCLG.fst "$feats" "ark:|gzip -c > $decode_dir/lat.JOB.gz" || exit 1 )
+
+    time ( lattice-best-path --lm-scale=$lmwt --word-symbol-table=$ac_model/graph/words.txt \
+        "ark:gunzip -c $decode_dir/lat.*.gz|" ark,t:$decode_dir/trans.txt || exit 1 )
+
+    # Finally compute WER
+    cat $decode_dir/trans.txt | \
+      utils/int2sym.pl -f 2- $ac_model/graph/words.txt | sed 's:\<UNK\>::g' | \
+      compute-wer --text --mode=present \
+      ark:$decode_dir/ref.txt  ark,p:- >& $decode_dir/wer || exit 1;
+
+fi
diff --git a/egs/kaldi-vystadial-recipe/s5/decode/decode-online.sh b/egs/kaldi-vystadial-recipe/s5/decode/decode-online.sh
new file mode 100755
index 00000000000..0b302d3bf42
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/decode/decode-online.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# FIXME in general in bad shape
+#     1) WER is wrong
+
+# Copyright 2013 Ondrej Platek, based on Vassil Panayotov script
+# Apache 2.0
+
+# Set the paths to the binaries and scripts needed
+fwd=`dirname $0`
+KALDI_ROOT=$fwd/../../../..
+export PATH=$fwd/../../s5/utils/:$KALDI_ROOT/src/onlinebin:$KALDI_ROOT/src/bin:$PATH
+
+# Change this to "tri2a" if you like to test using a ML-trained model
+ac_model_type=tri2a
+exp="$fwd/../Results/expc0bcaa8acd2732dce7c25c27b945d566d80ca7a6"
+data="$fwd/../data_voip_en/test"
+
+# Alignments and decoding results are saved in this directory(simulated decoding only)
+decode_dir="$fwd/../exp-decode"
+
+# Change this to "live" either here or using command line switch like:
+# --test-mode live
+test_mode="simulated"
+
+. parse_options.sh
+
+ac_model="$exp/$ac_model_type"
+trans_matrix=""
+
+
+if [ ! -d $ac_model ]; then
+    echo "The directory for AC model does not exist: $ac_model "
+    exit 1
+fi
+
+if [ -s $ac_model/matrix ]; then
+    trans_matrix=$ac_model/matrix  # lda matrix
+fi
+
+case $test_mode in
+    live)
+        echo
+        echo -e "  LIVE DEMO MODE - you can use a microphone and say something\n"
+        echo "Using model in $ac_model directory"
+        echo 
+        online-gmm-decode-faster --rt-min=0.5 --rt-max=0.7 --max-active=4000 \
+           --beam=12.0 --acoustic-scale=0.0769 $ac_model/final.mdl $ac_model/graph/HCLG.fst \
+           $ac_model/graph/words.txt '1:2:3:4:5' $trans_matrix;;
+    
+    simulated)
+        echo
+        echo -e "  SIMULATED ONLINE DECODING - pre-recorded audio is used\n"
+        echo "Test file are from directory $data"
+        echo "Using model in $ac_model directory"
+        echo 
+        ;;
+    
+    *)
+        echo "Invalid test mode! Should be either \"live\" or \"simulated\"!";
+        exit 1;;
+esac
+
+# Estimate the error rate for the simulated decoding
+if [ $test_mode == "simulated" ]; then
+    mkdir -p $decode_dir
+    # Create new input.scp file
+    rm -f $decode_dir/input.scp
+    for f in "$data"/*.wav; do
+        bf=`basename $f`
+        bf=${bf%.wav}
+        echo $bf $f >> $decode_dir/input.scp
+    done
+    # Decode
+    online-wav-gmm-decode-faster --verbose=1 --rt-min=0.8 --rt-max=0.85\
+        --max-active=4000 --beam=12.0 --acoustic-scale=0.0769 \
+        scp:$decode_dir/input.scp $ac_model/final.mdl $ac_model/graph/HCLG.fst \
+        $ac_model/graph/words.txt '1:2:3:4:5' ark,t:$decode_dir/trans.txt \
+        ark,t:$decode_dir/ali.txt $trans_matrix
+
+    # Create new ref.txt file 
+    rm -f "$decode_dir/ref.txt"
+    cat $decode_dir/input.scp | tr -s ' ' | cut -d ' ' -f 2- |\
+    while read wav_file ; do
+        # Convert the reference transcripts from symbols to word IDs
+        symbols=`sym2int.pl $ac_model/graph/words.txt < "$wav_file.trn"` 
+        name=`basename "$wav_file"`
+        name=${name%.wav}
+        echo "$name $symbols" >> $decode_dir/ref.txt
+    done
+    
+    # Compact the hypotheses belonging to the same test utterance
+    cat $decode_dir/trans.txt | tr -s ' ' | sed -r 's:_[0-9]+-[0-9]+\>::' |\
+        gawk '{key=$1; $1=""; arr[key]=arr[key] " " $0; } END { for (k in arr) { print k " " arr[k]} }' > $decode_dir/hyp.txt
+
+    # Finally compute WER
+    compute-wer --mode=all --verbose=100 ark,t:$decode_dir/ref.txt ark,t:$decode_dir/hyp.txt
+fi
diff --git a/egs/kaldi-vystadial-recipe/s5/local/make_trans.py b/egs/kaldi-vystadial-recipe/s5/local/make_trans.py
new file mode 100755
index 00000000000..59ced4e3747
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/local/make_trans.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+# Copyright 2012 Vassil Panayotov
+# Apache 2.0
+
+"""
+Takes a "PROMPTS" file with lines like:
+1snoke-20120412-hge/mfc/a0405 IT SEEMED THE ORDAINED ORDER OF THINGS THAT DOGS SHOULD WORK
+
+, an ID prefix and a list of audio file names (e.g. for above example the list will contain "a0405").
+It checks if the prompts file have transcription for all audio files in the list and
+if this is the case produces a transcript line for each file in the format:
+prefix_a0405 IT SEEMED THE ORDAINED ORDER OF THINGS THAT DOGS SHOULD WORK
+"""
+
+import sys
+
+def err(msg):
+    print >> sys.stderr, msg
+
+if len(sys.argv) != 4:
+    err("Usage: %s <prompts-file> <id-prefix> <utt-ids>" % sys.argv[0])
+    sys.exit(1)
+
+#err(str(sys.argv))
+id_prefix = sys.argv[2]
+utt_ids = sys.argv[3].strip().split()
+utt2trans = dict()
+unnorm_utt = set() 
+for l in file(sys.argv[1]):
+    u, trans = l.split(None, 1)
+    u = u.strip().split('/')[-1]
+    trans = trans.strip().replace("-", " ")
+    if not trans.isupper() or \
+       not trans.strip().replace(' ', '').replace("'", "").isalpha():
+        err("The transcript for '%s'(user '%s') is not properly normalized - skipped!"
+            % (u, id_prefix))
+        err(trans)
+        unnorm_utt.add(u)
+        continue
+    utt2trans[u] = trans
+
+for uid in utt_ids:
+    if uid in unnorm_utt:
+        continue # avoid double reporting the same problem
+    if not uid in utt2trans:
+        err("No transcript found for %s_%s" % (id_prefix, uid))
+        continue
+    print "%s_%s %s" % (id_prefix, uid, utt2trans[uid])
+
diff --git a/egs/kaldi-vystadial-recipe/s5/local/results.py b/egs/kaldi-vystadial-recipe/s5/local/results.py
new file mode 100755
index 00000000000..3586197ada8
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/local/results.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# Author:   Ondrej Platek,2013, code is without any warranty!
+# Created:  14:29:00 07/03/2013
+# Modified: 14:29:00 07/03/2013
+
+import argparse
+import subprocess
+import re
+from numpy import mean
+
+
+def getLog(path):
+    try:
+        rt = subprocess.check_output(['grep', '-r', 'real-time factor', path])
+        wer = subprocess.check_output(['grep', '-r', '%WER', path])
+        ser = subprocess.check_output(['grep', '-r', '%SER', path])
+        return (rt, wer, ser)
+    except subprocess.CalledProcessError as err:
+        print err
+
+
+def readLogs(rtpath, werpath):
+    # let the exception be seen
+    rt = open(rtpath).read()
+    wer = open(werpath).read()
+    ser = wer  # ATTENTION in our setting the same log
+    return (rt, wer, ser)
+
+
+def extractResults(rt, wer, ser):
+    rt = rt.splitlines()
+    wer = wer.splitlines()
+    ser = ser.splitlines()
+    # expp follow convention name the exp directories:
+    # exp/ expBlabalabla/ exp_asdfasdf/  ...
+    expp = re.compile(r'exp.*?/(.*?)/')
+    rtp = re.compile(r'([0-9\.]+)$')
+    werp = re.compile(r'%WER ([0-9\.]+)')
+    serp = re.compile(r'%SER ([0-9\.]+)')
+    itp = re.compile(r'wer_([0-9][0-9]?):%[SW]ER')
+
+    # for l in ser:  # debugging
+    #     print l
+    #     print expp.search(l).group(1)
+    try:
+        rts = [(expp.search(l).group(1), rtp.search(l).group(1)) for l in rt]
+    except:
+        print rt
+        raise
+    try:
+        wers = [(expp.search(l).group(1), werp.search(l).group(1), itp.search(l).group(1))
+                for l in wer]
+    except:
+        print wer
+        raise
+    try:
+        sers = [(expp.search(l).group(1), serp.search(l).group(1), itp.search(l).group(1))
+                for l in ser]
+    except:
+        print ser
+        raise
+
+    exp_names = list(
+        set([n for (n, _) in rts] + [n for (n, _, _) in wers]))
+    results = {}
+    for e in exp_names:
+        w = [(float(wr), int(it)) for (exp, wr, it) in wers if exp == e]
+        s = [(float(sr), int(it)) for (exp, sr, it) in sers if exp == e]
+        r = [float(r_) for (exp, r_) in rts if exp == e]
+        w.sort()
+        s.sort()
+        r.sort()
+        results[e] = (w, s, r)
+    return results
+
+
+class Table(object):
+    def __init__(self, data=[], colnames=[]):
+        self.data = data
+        self.colnames = colnames
+        self.colSep = '\t'
+        self.lineSep = '\n'
+
+    def data2str(self):
+        strdata = []
+        for r in self.data:
+            strdata.append([str(c) for c in r])
+        return strdata
+
+    def __str__(self):
+        sd = self.data2str()
+        colwidth = [len(c) for c in self.colnames]
+        for j in range(len(colwidth)):
+            for r in sd:
+                colwidth[j] = max(colwidth[j], len(r[j]))
+
+        gaps = [m - len(c) for (m, c) in zip(colwidth, self.colnames)]
+        rows = [self.colSep.join(
+            [c + ' ' * gap for c, gap in zip(self.colnames, gaps)])]
+        for r in sd:
+            gaps = [m - len(c) for (m, c) in zip(colwidth, r)]
+            rows.append(
+                self.colSep.join([c + ' ' * d for c, d in zip(r, gaps)]))
+        return self.lineSep.join(rows)
+
+
+class LatexTable(Table):
+    def __init__(self, data=[], colnames=[]):
+        Table.__init__(self, data, colnames)
+        nc = len(colnames)
+        self.header = '\\begin{tabular}{%s}' % ('c' * nc)
+        self.tail = '\\end{tabular}'
+        self.colSep = ' & '
+        self.lineSep = '\\\\ \n'
+
+    def __str__(self):
+        table_s = super(LatexTable, self).__str__()
+        return '%s\n%s\n%s\n' % (self.header, table_s, self.tail)
+
+
+def Table2LatexTable(table):
+    return LatexTable(table.data, table.colnames)
+
+
+def createSmallTable(r):
+    d = []
+    for k, v in r.iteritems():
+        w, s, r = v
+        if w == []:
+            minw = None
+        else:
+            minw = min(w)  # returns tuple if s is list of tuples
+        if s == []:
+            mins = None
+        else:
+            mins = min(s)  # returns tuple if s is list of tuples
+        d.append([k, mean(r), minw, mins])
+    t = Table(d, ['exp', 'RT coef', 'WER', 'SER'])
+    return t
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='Parse WER amd Realtime ratio logs generated by run.sh')
+    # TODO parser with commands log and exp e.g.:
+    # http://pymotw.com/2/argparse/#mutually-exclusive-options
+
+    exp = True
+    if exp:
+        parser.add_argument('expath', type=str, action='store')
+        p = parser.parse_args()
+        rt, wer, ser = getLog(p.expath)
+    else:
+        parser.add_argument('--werlog', action='store', type=str)
+        parser.add_argument('--rtlog', action='store', type=str)
+        p = parser.parse_args()
+        rt, wer, ser = readLogs(p.rtlog, p.werlog)
+
+    r = extractResults(rt, wer, ser)
+    t = createSmallTable(r)
+    print t
+    print '=================='
+    t2 = Table2LatexTable(t)
+    print t2
diff --git a/egs/kaldi-vystadial-recipe/s5/local/save_check_conf.sh b/egs/kaldi-vystadial-recipe/s5/local/save_check_conf.sh
new file mode 100755
index 00000000000..b1e7565b6c0
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/local/save_check_conf.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Author:   Ondrej Platek,2013, Apache 2.0
+# Created:  09:56:45 13/03/2013
+# Modified: 09:56:45 13/03/2013
+
+if [ ! -d "$DATA_ROOT" ]; then
+  echo "You need to set \"DATA_ROOT\" variable in path.sh to point to the directory to host data"
+  exit 1
+fi
+
+# Ask about REMOVING the exp and data directory
+if [ "$(ls -A exp 2>/dev/null)" ]; then
+    read -p "Directory 'exp' is NON EMPTY. Do you want it to be OVERWRITTEN y/n?"
+    case $REPLY in
+        [Yy]* ) echo 'Deleting exp directory'; rm -rf exp;;
+        [Nn]* ) echo 'Keeping exp directory';;
+        * ) echo 'Keeping exp directory and cancelling..'; exit 1;;
+    esac
+fi
+
+if [ "$(ls -A data 2>/dev/null)" ]; then
+    read -p "Directory 'data' is NON EMPTY. Do you want it to be OVERWRITTEN y/n?"
+    case $REPLY in
+        [Yy]* ) echo 'Deleting data directory'; rm -rf data;;
+        [Nn]* ) echo 'Reusing DATA SPLIT, LM, MFCC. SEE THE SCRIPT!';
+                echo 'REUSING DATA from previous experiment!' \
+                    'Check that everyN is THE SAME' >> exp/conf/train_conf.sh ;;
+        * ) echo 'Keeping the data directory and cancelling..'
+            exit 1;;
+    esac
+fi
+
+if [ "$(ls -A ${MFCC_DIR} 2>/dev/null)" ]; then
+    read -p "Directory '${MFCC_DIR}' is NON EMPTY. Do you want it to be OVERWRITTEN y/n?"
+    case $REPLY in
+        [Yy]* ) echo "Echo deleting ${MFCC_DIR}"; rm -rf "${MFCC_DIR}";;
+        [Nn]* ) echo "Echo reusing MFCC at ${MFCC_DIR}!";
+                echo 'REUSING MFCC from previous experiment!' \
+                    'Check that the settings are THE SAME!' >> exp/conf/mfcc.conf
+            ;;
+        * ) echo 'Keeping the data directory and cancelling..'; 
+            exit 1;;
+    esac
+fi
+
+# make sure that the directories exists
+mkdir -p "$MFCC_DIR"
+mkdir -p "exp"
+mkdir -p "data"
+
+# Copy the current settings to exp directory
+cp -r conf exp
+cp cmd.sh path.sh exp/conf
+git log -1 > exp/conf/git_log_state.log
+git diff > exp/conf/git_diff_state.log
diff --git a/egs/kaldi-vystadial-recipe/s5/local/score.sh b/egs/kaldi-vystadial-recipe/s5/local/score.sh
new file mode 100755
index 00000000000..e5737c01a65
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/local/score.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+cmd=run.pl
+min_lmwt=9
+max_lmwt=20
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
+  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
+  exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+  [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/scoring/log
+
+cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
+
+$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
+  lattice-best-path --lm-scale=LMWT --word-symbol-table=$symtab \
+    "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1;
+
+# Note: the double level of quoting for the sed command
+$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
+   cat $dir/scoring/LMWT.tra \| \
+    utils/int2sym.pl -f 2- $symtab \| sed 's:OOV::g' \| \
+    compute-wer --text --mode=present \
+     ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT || exit 1;
+
+# Show results
+for f in $dir/wer_*; do echo $f; egrep  '(WER)|(SER)' < $f; done
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/local/vystadial_data_prep.sh b/egs/kaldi-vystadial-recipe/s5/local/vystadial_data_prep.sh
new file mode 100755
index 00000000000..4e50619ae14
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/local/vystadial_data_prep.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Author:   Ondrej Platek, Copyright 2012, code is without any warranty!
+# Created:  11:06:13 16/11/2012
+# Modified: 11:06:13 16/11/2012
+#
+#
+# Makes train/test splits
+# local/voxforge_data_prep.sh --nspk_test ${nspk_test} ${SELECTED} || exit 1
+# create files: (TYPE=train|test)
+#   a) ${TYPE}_trans.txt: ID transcription capitalized! No interputction
+#   b) ${TYPE}_wav.scp: ID path2ID.wav 
+#   c) $TYPE.utt2spk: ID-recording ID-speaker
+#   d) $TYPE.spk2utt
+#   e) $TYPE.spk2gender  all speakers are male
+# we have ID-recording = ID-speaker
+
+renice 20 $$
+
+
+every_n=1
+[ -f path.sh ] && . ./path.sh # source the path.
+. utils/parse_options.sh || exit 1;
+
+
+msg="Usage: $0 [--every-n 30] <data-directory>";
+if [ $# -gt 1 ] ; then
+    echo "$msg"; exit 1;
+fi
+if [ $# -eq 0 ] ; then
+    echo "$msg"; exit 1;
+fi
+
+DATA=$1
+
+echo "=== Starting initial Vystadial data preparation ..."
+echo "--- Making test/train data split from $DATA taking every $every_n recording..."
+
+locdata=data/local
+loctmp=$locdata/tmp
+rm -rf $loctmp >/dev/null 2>&1
+mkdir -p $locdata
+mkdir -p $loctmp
+
+i=0
+for d in test train ; do
+    ls $DATA/$d/ | sed -n /.*wav$/p |\
+    while read wav ; do
+        # echo "DEBUGGING wav: $wav"
+        ((i++)) # bash specific
+        if [ $i -ge $every_n ] ; then
+            i=0
+            pwav=$DATA/$d/$wav
+            echo "$wav $pwav" >> ${loctmp}/${d}_wav.scp.unsorted
+            echo "$wav $wav" >> ${loctmp}/${d}.utt2spk.unsorted
+            echo "$wav $wav" >> ${loctmp}/${d}.spk2utt.unsorted
+            # transcribtion of $wav
+            trn=`cat $DATA/$d/$wav.trn`
+            # echo "DEBUGGING trn: $trn"
+            echo "$wav $trn" >> ${loctmp}/${d}_trans.txt.unsorted
+            echo "$wav M" >> ${loctmp}/spk2gender.unsorted
+        fi
+    done # while read wav 
+
+    # Sorting
+    for unsorted in _wav.scp.unsorted _trans.txt.unsorted \
+        .spk2utt.unsorted .utt2spk.unsorted _wav.scp.unsorted
+    do
+       u="${d}${unsorted}"
+       s=`echo "$u" | sed -e s:.unsorted::`
+       sort "${loctmp}/$u" -k1 > "${locdata}/$s"
+    done # for unsorted
+
+    #### copy to data dir ###
+    mkdir -p data/$d
+    cp $locdata/${d}_wav.scp data/$d/wav.scp || exit 1;
+    cp $locdata/${d}_trans.txt data/$d/text || exit 1;
+    cp $locdata/$d.spk2utt data/$d/spk2utt || exit 1;
+    cp $locdata/$d.utt2spk data/$d/utt2spk || exit 1;
+done # for in test train
+
+# should be set..OK for 1:1 spk2utt, spk from test AND train
+sort "${loctmp}/spk2gender.unsorted" -k1 > "${locdata}/spk2gender" 
+utils/filter_scp.pl data/$d/spk2utt $locdata/spk2gender > data/$d/spk2gender || exit 1;
diff --git a/egs/kaldi-vystadial-recipe/s5/local/vystadial_format_data.sh b/egs/kaldi-vystadial-recipe/s5/local/vystadial_format_data.sh
new file mode 100755
index 00000000000..08d94d223dd
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/local/vystadial_format_data.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Copyright 2012 Vassil Panayotov
+# Apache 2.0
+
+source ./path.sh
+
+echo "=== Formating train and test data ..."
+srcdir=data/local
+lmdir=data/local/
+tmpdir=data/local/lm_tmp
+lexicon=data/local/dict/lexicon.txt
+mkdir -p $tmpdir
+
+# README ALREADY implemented
+# for x in train test; do 
+#   mkdir -p data/$x
+#   cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
+#   cp $srcdir/${x}_trans.txt data/$x/text || exit 1;
+#   cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1;
+#   cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1;
+#   utils/filter_scp.pl data/$x/spk2utt $srcdir/spk2gender > data/$x/spk2gender || exit 1;
+# done
+
+
+# Next, for each type of language model, create the corresponding FST
+# and the corresponding lang_test_* directory.
+
+echo "--- Preparing the grammar transducer (G.fst) for testing ..."
+
+test=data/lang_test
+mkdir -p $test
+for f in phones.txt words.txt phones.txt L.fst L_disambig.fst phones/; do
+    cp -r data/lang/$f $test
+done
+cat $lmdir/lm.arpa | \
+   utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs.txt
+
+# grep -v '<s> <s>' because the LM seems to have some strange and useless
+# stuff in it with multiple <s>'s in the history.  Encountered some other similar
+# things in a LM from Geoff.  Removing all "illegal" combinations of <s> and </s>,
+# which are supposed to occur only at being/end of utt.  These can cause 
+# determinization failures of CLG [ends up being epsilon cycles].
+cat $lmdir/lm.arpa | \
+  grep -v '<s> <s>' | \
+  grep -v '</s> <s>' | \
+  grep -v '</s> </s>' | \
+  arpa2fst - | fstprint | \
+  utils/remove_oovs.pl $tmpdir/oovs.txt | \
+  utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
+    --osymbols=$test/words.txt  --keep_isymbols=false --keep_osymbols=false | \
+  fstrmepsilon > $test/G.fst
+fstisstochastic $test/G.fst
+# The output is like:
+# 9.14233e-05 -0.259833
+# we do expect the first of these 2 numbers to be close to zero (the second is
+# nonzero because the backoff weights make the states sum to >1).
+# Because of the <s> fiasco for these particular LMs, the first number is not
+# as close to zero as it could be.
+
+# Everything below is only for diagnostic.
+# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+# this might cause determinization failure of CLG.
+# #0 is treated as an empty word.
+mkdir -p $tmpdir/g
+awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
+  < "$lexicon"  >$tmpdir/g/select_empty.fst.txt
+fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt \
+  $tmpdir/g/select_empty.fst.txt | \
+fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > $tmpdir/g/empty_words.fst
+fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' && 
+  echo "Language model has cycles with empty words" && exit 1
+rm -rf $tmpdir
+
+echo "*** Succeeded in formatting data."
+
diff --git a/egs/kaldi-vystadial-recipe/s5/local/vystadial_prepare_dict.sh b/egs/kaldi-vystadial-recipe/s5/local/vystadial_prepare_dict.sh
new file mode 100755
index 00000000000..bd916013af3
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/local/vystadial_prepare_dict.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+# Copyright 2012 Vassil Panayotov
+# Apache 2.0
+
+renice 20 $$
+
+locdata=data/local
+locdict=$locdata/dict
+
+echo "=== Preparing the dictionary ..."
+
+if [ ! -f $locdict/cmudict/cmudict.0.7a ]; then
+  echo "--- Downloading CMU dictionary ..."
+  mkdir -p $locdict 
+  svn co http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict \
+    $locdict/cmudict || exit 1;
+fi
+
+echo "--- Striping stress and pronunciation variant markers from cmudict ..."
+perl $locdict/cmudict/scripts/make_baseform.pl \
+  $locdict/cmudict/cmudict.0.7a /dev/stdout |\
+  sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' > $locdict/cmudict-plain.txt
+
+echo "--- Searching for OOV words ..."
+gawk 'NR==FNR{words[$1]; next;} !($1 in words)' \
+  $locdict/cmudict-plain.txt $locdata/vocab-full.txt |\
+  egrep -v '<.?s>' > $locdict/vocab-oov.txt
+
+gawk 'NR==FNR{words[$1]; next;} ($1 in words)' \
+  $locdata/vocab-full.txt $locdict/cmudict-plain.txt |\
+  egrep -v '<.?s>' > $locdict/lexicon-iv.txt
+
+wc -l $locdict/vocab-oov.txt
+wc -l $locdict/lexicon-iv.txt
+
+###  BEGIN SKIPPING GENERATING PRONUNCIACIONS FOR OOV WORDS ####
+# pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'`
+# if [ ! -f tools/g2p/lib/python${pyver}/site-packages/g2p.py ]; then
+#   echo "--- Downloading Sequitur G2P ..."
+#   echo "NOTE: it assumes that you have Python, NumPy and SWIG installed on your system!"
+#   wget -P tools http://www-i6.informatik.rwth-aachen.de/web/Software/g2p-r1668.tar.gz
+#   tar xf tools/g2p-r1668.tar.gz -C tools
+#   cd tools/g2p
+#   echo '#include <cstdio>' >> Utility.hh # won't compile on my system w/o this "patch"
+#   python setup.py install --prefix=.
+#   cd ../..
+#   if [ ! -f tools/g2p/lib/python${pyver}/site-packages/g2p.py ]; then
+#     echo "Sequitur G2P is not found - installation failed?"
+#     exit 1
+#   fi
+# fi
+# 
+# if [ ! -f conf/g2p_model ]; then
+#   echo "--- Downloading a pre-trained Sequitur G2P model ..."
+#   wget http://sourceforge.net/projects/kaldi/files/sequitur-model4 -O conf/g2p_model
+#   if [ ! -f conf/g2p_model ]; then
+#     echo "Failed to download the g2p model!"
+#     exit 1
+#   fi
+# fi
+# 
+# echo "--- Preparing pronunciations for OOV words ..."
+# python tools/g2p/lib/python${pyver}/site-packages/g2p.py \
+#   --model=conf/g2p_model --apply $locdict/vocab-oov.txt > $locdict/lexicon-oov.txt
+
+# HANDLING OOV WORDS: OOV   SPN    UNKnow has pronancuation SPoken Noise
+echo "OOV SPN" > $locdict/lexicon-oov.txt
+echo "_INHALE_ SPN" >> $locdict/lexicon-oov.txt
+echo "_LAUGH_ SPN" >> $locdict/lexicon-oov.txt
+echo "_EHM_HMM_ SPN" >> $locdict/lexicon-oov.txt
+echo "_NOISE_ SPN" >> $locdict/lexicon-oov.txt
+
+cat $locdict/lexicon-oov.txt $locdict/lexicon-iv.txt |\
+  sort > $locdict/lexicon.txt
+
+echo "--- Prepare phone lists ..."
+echo SIL > $locdict/silence_phones.txt
+echo _SIL_ >> $locdict/silence_phones.txt
+echo SIL > $locdict/optional_silence.txt
+grep -v -w sil $locdict/lexicon.txt | \
+  awk '{for(n=2;n<=NF;n++) { p[$n]=1; }} END{for(x in p) {print x}}' |\
+  sort > $locdict/nonsilence_phones.txt
+
+echo "--- Adding SIL to the lexicon ..."
+echo -e "!SIL\tSIL" >> $locdict/lexicon.txt
+
+# Some downstream scripts expect this file exists, even if empty
+touch $locdict/extra_questions.txt
+
+echo "*** Dictionary preparation finished!"
diff --git a/egs/kaldi-vystadial-recipe/s5/logs/README b/egs/kaldi-vystadial-recipe/s5/logs/README
new file mode 100644
index 00000000000..5eecd9f8e69
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/logs/README
@@ -0,0 +1 @@
+Directory to store logs
diff --git a/egs/kaldi-vystadial-recipe/s5/path.sh b/egs/kaldi-vystadial-recipe/s5/path.sh
new file mode 100755
index 00000000000..4b50cebbb7e
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/path.sh
@@ -0,0 +1,16 @@
+# The number of parallel jobs to be started for some parts of the recipe
+# Make sure you have enough resources(CPUs and RAM) to accomodate this number of jobs
+njobs=10
+
+# Needed for "correct" sorting
+export LC_ALL=C
+
+export KALDI_ROOT=`pwd`/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH
+
+
+# Vystadial data 1 channel 16000 16-bit
+export DATA_ROOT="./data_voip_en"
+
+# Storage dir for MFCC. Need a lot of space. 
+export MFCC_DIR=./mfcc
diff --git a/egs/kaldi-vystadial-recipe/s5/run.sh b/egs/kaldi-vystadial-recipe/s5/run.sh
new file mode 100755
index 00000000000..9c692fe5f26
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/run.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+renice 20 $$
+
+# Copyright Ondrej Platek Apache 2.0
+# based on copyrighted 2012 Vassil Panayotov recipe 
+# at egs/voxforge/s5/run.sh(Apache 2.0)
+
+. ./path.sh
+
+# If you have cluster of machines running GridEngine you may want to
+# change the train and decode commands in the file below
+. ./cmd.sh
+
+# Load few variables for changing the parameters of the training
+. ./conf/train_conf.sh
+
+# Copy the configuration files to exp directory.
+# Write into the exp WARNINGs if reusing settings from another experiment!
+local/save_check_conf.sh || exit 1;
+
+if [ ! "$(ls -A data 2>/dev/null)" ]; then
+
+  # local/voxforge_data_prep.sh --nspk_test ${nspk_test} ${SELECTED} || exit 1
+  local/vystadial_data_prep.sh --every_n $everyN ${DATA_ROOT} || exit 1
+  
+  # prepare an ARPA LM and wordlist
+  mkdir -p data/local
+  # LEAVING it with OOV -> Allow train Kaldi for OOV model
+  # cp -f ${DATA_ROOT}/arpa_trigram data/local/lm.arpa  
+  # NOT ALLOWING OOV WORDS training & also in decoding
+  grep -v -w OOV ${DATA_ROOT}/arpa_trigram > data/local/lm.arpa 
+  echo '</s>' > data/local/vocab-full.txt
+  tail -n +3 ${DATA_ROOT}/classic.v3.dct | cut -d ' ' -f 1 |\
+      sort | uniq >> data/local/vocab-full.txt 
+  
+  # Prepare the lexicon and various phone lists
+  # DISABLED Sequitor model: Pronunciations for OOV words are obtained using a pre-trained Sequitur model
+  local/vystadial_prepare_dict.sh || exit 1 
+  
+  # Prepare data/lang and data/local/lang directories read it IO param describtion
+  utils/prepare_lang.sh data/local/dict 'OOV' data/local/lang data/lang || exit 1
+  
+  # Prepare G.fst and data/{train,test} directories
+  local/vystadial_format_data.sh || exit 1 
+fi 
+# end of generating data directory
+  
+  
+###### TRAINING SETTINGS #######
+
+# if ${MFCC_DIR} is empty then generate the content
+if [ ! "$(ls -A ${MFCC_DIR} 2>/dev/null)" ]; then
+  # Creating MFCC features and storing at ${MFCC_DIR} (Could be large).
+  for x in train test ; do 
+  steps/make_mfcc.sh --cmd "$train_cmd" --nj $njobs \
+  data/$x exp/make_mfcc/$x ${MFCC_DIR} || exit 1;
+  steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x ${MFCC_DIR} || exit 1;
+  done
+fi
+
+
+# Train monophone models on a subset of the data
+utils/subset_data_dir.sh data/train $monoTrainData data/train.1k  || exit 1;
+steps/train_mono.sh --nj $njobs --cmd "$train_cmd" data/train.1k data/lang exp/mono || exit 1;
+  
+# Monophone decoding
+utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph || exit 1
+# note: local/decode.sh calls the command line once for each
+# test, and afterwards averages the WERs into (in this case
+# exp/mono/decode/
+steps/decode.sh --config conf/decode.config --nj $njobs --cmd "$decode_cmd" \
+  exp/mono/graph data/test exp/mono/decode
+ 
+# Get alignments from monophone system.
+steps/align_si.sh --nj $njobs --cmd "$train_cmd" \
+  data/train data/lang exp/mono exp/mono_ali || exit 1;
+
+# train tri1 [first triphone pass]
+steps/train_deltas.sh --cmd "$train_cmd" \
+  $pdf $gauss data/train data/lang exp/mono_ali exp/tri1 || exit 1;
+ 
+# decode tri1
+utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1;
+steps/decode.sh --config conf/decode.config --nj $njobs --cmd "$decode_cmd" \
+  exp/tri1/graph data/test exp/tri1/decode
+ 
+# draw-tree data/lang/phones.txt exp/tri1/tree | dot -Tps -Gsize=8,10.5 | ps2pdf - tree.pdf
+  
+#align tri1 
+steps/align_si.sh --nj $njobs --cmd "$train_cmd" \
+  --use-graphs true data/train data/lang exp/tri1 exp/tri1_ali || exit 1;
+  
+# train tri2a [delta+delta-deltas]
+steps/train_deltas.sh --cmd "$train_cmd" $pdf $gauss \
+  data/train data/lang exp/tri1_ali exp/tri2a || exit 1;
+  
+# decode tri2a
+utils/mkgraph.sh data/lang_test exp/tri2a exp/tri2a/graph
+steps/decode.sh --config conf/decode.config --nj $njobs --cmd "$decode_cmd" \
+  exp/tri2a/graph data/test exp/tri2a/decode
+ 
+# train and decode tri2b [LDA+MLLT]
+steps/train_lda_mllt.sh --cmd "$train_cmd" $pdf $gauss \
+  data/train data/lang exp/tri1_ali exp/tri2b || exit 1;
+utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph
+steps/decode.sh --config conf/decode.config --nj $njobs --cmd "$decode_cmd" \
+  exp/tri2b/graph data/test exp/tri2b/decode
+ 
+# Align all data with LDA+MLLT system (tri2b)
+steps/align_si.sh --nj $njobs --cmd "$train_cmd" \
+    --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1;
+  
+#  Do MMI on top of LDA+MLLT.
+steps/make_denlats.sh --nj $njobs --cmd "$train_cmd" \
+   data/train data/lang exp/tri2b exp/tri2b_denlats || exit 1;
+steps/train_mmi.sh data/train data/lang exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mmi || exit 1;
+steps/decode.sh --config conf/decode.config --iter 4 --nj $njobs --cmd "$decode_cmd" \
+   exp/tri2b/graph data/test exp/tri2b_mmi/decode_it4
+steps/decode.sh --config conf/decode.config --iter 3 --nj $njobs --cmd "$decode_cmd" \
+   exp/tri2b/graph data/test exp/tri2b_mmi/decode_it3
+ 
+# Do the same with boosting. train_mmi_boost is a number e.g. 0.05
+steps/train_mmi.sh --boost ${train_mmi_boost} data/train data/lang \
+   exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mmi_b${train_mmi_boost} || exit 1;
+steps/decode.sh --config conf/decode.config --iter 4 --nj $njobs --cmd "$decode_cmd" \
+   exp/tri2b/graph data/test exp/tri2b_mmi_b${train_mmi_boost}/decode_it4 || exit 1;
+steps/decode.sh --config conf/decode.config --iter 3 --nj $njobs --cmd "$decode_cmd" \
+   exp/tri2b/graph data/test exp/tri2b_mmi_b${train_mmi_boost}/decode_it3 || exit 1;
+
+# Do MPE.
+steps/train_mpe.sh data/train data/lang exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mpe || exit 1;
+steps/decode.sh --config conf/decode.config --iter 4 --nj $njobs --cmd "$decode_cmd" \
+   exp/tri2b/graph data/test exp/tri2b_mpe/decode_it4 || exit 1;
+steps/decode.sh --config conf/decode.config --iter 3 --nj $njobs --cmd "$decode_cmd" \
+   exp/tri2b/graph data/test exp/tri2b_mpe/decode_it3 || exit 1;
+
+
+# Do LDA+MLLT+SAT, and decode.
+steps/train_sat.sh $pdf $gauss data/train data/lang exp/tri2b_ali exp/tri3b || exit 1;
+utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph || exit 1;
+steps/decode_fmllr.sh --config conf/decode.config --nj $njobs --cmd "$decode_cmd" \
+  exp/tri3b/graph data/test exp/tri3b/decode || exit 1;
+
+
+# Align all data with LDA+MLLT+SAT system (tri3b)
+steps/align_fmllr.sh --nj $njobs --cmd "$train_cmd" --use-graphs true \
+  data/train data/lang exp/tri3b exp/tri3b_ali || exit 1;
+
+# MMI on top of tri3b (i.e. LDA+MLLT+SAT+MMI)
+steps/make_denlats.sh --config conf/decode.config \
+   --nj $njobs --cmd "$train_cmd" --transform-dir exp/tri3b_ali \
+  data/train data/lang exp/tri3b exp/tri3b_denlats || exit 1;
+steps/train_mmi.sh data/train data/lang exp/tri3b_ali exp/tri3b_denlats exp/tri3b_mmi || exit 1;
+
+steps/decode_fmllr.sh --config conf/decode.config --nj $njobs --cmd "$decode_cmd" \
+  --alignment-model exp/tri3b/final.alimdl --adapt-model exp/tri3b/final.mdl \
+   exp/tri3b/graph data/test exp/tri3b_mmi/decode || exit 1;
+
+# Do a decoding that uses the exp/tri3b/decode directory to get transforms from.
+steps/decode.sh --config conf/decode.config --nj $njobs --cmd "$decode_cmd" \
+  --transform-dir exp/tri3b/decode  exp/tri3b/graph data/test exp/tri3b_mmi/decode2 || exit 1;
+
+
+# first, train UBM for fMMI experiments.
+steps/train_diag_ubm.sh --silence-weight 0.5 --nj $njobs --cmd "$train_cmd" \
+  250 data/train data/lang exp/tri3b_ali exp/dubm3b
+
+ # Next, various fMMI+MMI configurations.
+steps/train_mmi_fmmi.sh --learning-rate 0.0025 \
+  --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri3b_ali exp/dubm3b exp/tri3b_denlats \
+  exp/_ri3b_fmmi_b || exit 1;
+
+for iter in 3 4 5 6 7 8; do
+ steps/decode_fmmi.sh --nj $njobs --config conf/decode.config --cmd "$decode_cmd" --iter $iter \
+   --transform-dir exp/tri3b/decode  exp/tri3b/graph data/test exp/_ri3b_fmmi_b/decode_it$iter &
+done
+
+steps/train_mmi_fmmi.sh --learning-rate 0.001 \
+  --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri3b_ali exp/dubm3b exp/tri3b_denlats \
+  exp/tri3b_fmmi_c || exit 1;
+
+for iter in 3 4 5 6 7 8; do
+ steps/decode_fmmi.sh --nj $njobs --config conf/decode.config --cmd "$decode_cmd" --iter $iter \
+   --transform-dir exp/tri3b/decode  exp/tri3b/graph data/test exp/tri3b_fmmi_c/decode_it$iter &
+done
+
+# for indirect one, use twice the learning rate.
+steps/train_mmi_fmmi_indirect.sh --learning-rate 0.002 --schedule "fmmi fmmi fmmi fmmi mmi mmi mmi mmi" \
+  --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri3b_ali exp/dubm3b exp/tri3b_denlats \
+  exp/tri3b_fmmi_d || exit 1;
+
+for iter in 3 4 5 6 7 8; do
+ steps/decode_fmmi.sh --nj $njobs --config conf/decode.config --cmd "$decode_cmd" --iter $iter \
+   --transform-dir exp/tri3b/decode  exp/tri3b/graph data/test exp/tri3b_fmmi_d/decode_it$iter &
+done
+
+# SKIPPING this mixturing and speaker dependant settings
+# You don't have to run all 3 of the below, e.g. you can just run the run_sgmm2x.sh
+# local/run_sgmm.sh
+# local/run_sgmm2.sh
+# local/run_sgmm2x.sh
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/align_fmllr.sh b/egs/kaldi-vystadial-recipe/s5/steps/align_fmllr.sh
new file mode 100755
index 00000000000..937c61010f1
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/align_fmllr.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
+# + fMLLR (probably with SAT models).
+# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
+# is not present), then does 2 iterations of fMLLR estimation.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match the source directory.
+
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # factor by which to boost silence during alignment.
+fmllr_update_type=full
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_fmllr.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_fmllr.sh data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --fmllr-update-type (full|diag|offset|none)      # default full."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
+mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
+
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+
+if [ $stage -le 1 ]; then
+  echo "$0: aligning data in $data using $alimdl and speaker-independent features."
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: computing fMLLR transforms"
+  if [ "$alimdl" != "$mdl" ]; then
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
+      gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  else
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  fi
+fi
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+if [ $stage -le 3 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl_cmd" \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/align_sgmm.sh b/egs/kaldi-vystadial-recipe/s5/steps/align_sgmm.sh
new file mode 100755
index 00000000000..6bc58dfa2b0
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/align_sgmm.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments and (if needed) speaker-vectors, given an 
+# SGMM system.  If the system is built on top of SAT, you should supply
+# transforms with the --transform-dir option.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory.
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false # use graphs from srcdir
+use_gselect=false # use gselect info from srcdir [regardless, we use
+   # Gaussian-selection info, we might have to compute it though.]
+gselect=15  # Number of Gaussian-selection indices for SGMMs.
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+transform_dir=  # directory to find fMLLR transforms in.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_sgmm.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_sgmm.sh --transform-dir exp/tri3b data/train data/lang \\"
+   echo "           exp/sgmm4a exp/sgmm5a_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir
+cp $srcdir/final.occs $dir;
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option during alignment."
+fi
+##
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+  ln.pl $srcdir/fsts.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+## Work out where we're getting the Gaussian-selection info from
+if $use_gselect; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1;
+  graphdir=$srcdir
+  gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|"
+  ln.pl $srcdir/gselect.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 1 ]; then
+    echo "$0: computing Gaussian-selection info"
+    # Note: doesn't matter whether we use $alimdl or $mdl, they will
+    # have the same gselect info.
+    $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+      sgmm-gselect --full-gmm-nbest=$gselect $alimdl \
+      "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+  fi
+  gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|"
+fi
+
+
+if [ $alimdl == $mdl ]; then 
+  # Speaker-independent decoding-- just one pass.  Not normal.
+  T=`sgmm-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1;
+  [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1;
+
+  if [ $stage -le 2 ]; then
+    echo "$0: aligning data in $data using model $mdl (no speaker-vectors)"
+    $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+      sgmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \
+      "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  echo "$0: done aligning data."
+  exit 0;
+fi
+
+# Continue with system with speaker vectors.
+if [ $stage -le 2 ]; then
+  echo "$0: aligning data in $data using model $alimdl"
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: computing speaker vectors (1st pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \
+    sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+     $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing speaker vectors (2nd pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
+     --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1;
+  rm $dir/pre_vecs.*
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \
+     --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+     $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/align_sgmm2.sh b/egs/kaldi-vystadial-recipe/s5/steps/align_sgmm2.sh
new file mode 100755
index 00000000000..58af0677b8c
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/align_sgmm2.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments and (if needed) speaker-vectors, given an 
+# SGMM system.  If the system is built on top of SAT, you should supply
+# transforms with the --transform-dir option.
+
+# If you supply the --use-graphs option, it will use the training
+# graphs from the source directory.
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+use_graphs=false # use graphs from srcdir
+use_gselect=false # use gselect info from srcdir [regardless, we use
+   # Gaussian-selection info, we might have to compute it though.]
+gselect=15  # Number of Gaussian-selection indices for SGMMs.
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+transform_dir=  # directory to find fMLLR transforms in.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_sgmm.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_sgmm.sh --transform-dir exp/tri3b data/train data/lang \\"
+   echo "           exp/sgmm4a exp/sgmm5a_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir
+cp $srcdir/final.occs $dir;
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option during alignment."
+fi
+##
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+
+## Work out where we're getting the graphs from.
+if $use_graphs; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
+  graphdir=$srcdir
+  ln.pl $srcdir/fsts.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 0 ]; then
+    echo "$0: compiling training graphs"
+    tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+    $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+      compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+        "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+  fi
+fi
+
+## Work out where we're getting the Gaussian-selection info from
+if $use_gselect; then
+  [ "$nj" != "`cat $srcdir/num_jobs`" ] && \
+    echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1;
+  [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1;
+  graphdir=$srcdir
+  gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|"
+  ln.pl $srcdir/gselect.*.gz $dir
+else
+  graphdir=$dir
+  if [ $stage -le 1 ]; then
+    echo "$0: computing Gaussian-selection info"
+    # Note: doesn't matter whether we use $alimdl or $mdl, they will
+    # have the same gselect info.
+    $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+      sgmm2-gselect --full-gmm-nbest=$gselect $alimdl \
+      "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+  fi
+  gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|"
+fi
+
+
+if [ $alimdl == $mdl ]; then 
+  # Speaker-independent decoding-- just one pass.  Not normal.
+  T=`sgmm2-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1;
+  [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1;
+
+  if [ $stage -le 2 ]; then
+    echo "$0: aligning data in $data using model $mdl (no speaker-vectors)"
+    $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+      sgmm2-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \
+      "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  echo "$0: done aligning data."
+  exit 0;
+fi
+
+# Continue with system with speaker vectors.
+if [ $stage -le 2 ]; then
+  echo "$0: aligning data in $data using model $alimdl"
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    sgmm2-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \
+    "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 3 ]; then
+  echo "$0: computing speaker vectors (1st pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm2-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \
+    sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+     $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: computing speaker vectors (2nd pass)"
+  $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+    weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+    sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
+     --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1;
+  rm $dir/pre_vecs.*
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: doing final alignment."
+  $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \
+    sgmm2-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \
+     --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+     $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done aligning data."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/align_si.sh b/egs/kaldi-vystadial-recipe/s5/steps/align_si.sh
new file mode 100755
index 00000000000..d525550f111
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/align_si.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Computes training alignments using a model with delta or
+# LDA+MLLT features.
+
+# If you supply the "--use-graphs true" option, it will use the training
+# graphs from the source directory (where the model is).  In this
+# case the number of jobs must match with the source directory.
+
+
+# Begin configuration section.  
+nj=4
+cmd=run.pl
+use_graphs=false
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence during alignment.
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_si.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_si.sh data/train data/lang exp/tri1 exp/tri1_ali"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --use-graphs true                                # use graphs in src-dir"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.occs $dir;
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
+
+mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/final.mdl - |"
+
+if $use_graphs; then 
+  [ $nj != "`cat $srcdir/num_jobs`" ] && echo "$0: mismatch in num-jobs" && exit 1;
+  [ ! -f $srcdir/fsts.1.gz ] && echo "$0: no such file $srcdir/fsts.1.gz" && exit 1;
+
+  $cmd JOB=1:$nj $dir/log/align.JOB.log \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $srcdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+else
+  tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
+  # We could just use gmm-align in the next line, but it's less efficient as it compiles the
+  # training graphs one by one.
+  $cmd JOB=1:$nj $dir/log/align.JOB.log \
+    compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" ark:- \| \
+    gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" ark:- \
+      "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+echo "$0: done aligning data."
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/compute_cmvn_stats.sh b/egs/kaldi-vystadial-recipe/s5/steps/compute_cmvn_stats.sh
new file mode 100755
index 00000000000..a340a9f54aa
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/compute_cmvn_stats.sh
@@ -0,0 +1,65 @@
+#!/bin/bash 
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Compute cepstral mean and variance statistics per speaker.  
+# We do this in just one job; it's fast.
+# This script takes no options.
+#
+# Note: there is no option to do CMVN per utterance.  The idea is
+# that if you did it per utterance it would not make sense to do
+# per-speaker fMLLR on top of that (since you'd be doing fMLLR on
+# top of different offsets).  Therefore what would be the use
+# of the speaker information?  In this case you should probably
+# make the speaker-ids identical to the utterance-ids.  The
+# speaker information does not have to correspond to actual
+# speakers, it's just the level you want to adapt at.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ $# != 3 ]; then
+   echo "usage: compute_cmvn_stats.sh <data-dir> <log-dir> <path-to-cmvn-dir>";
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . ./path.sh; fi
+
+data=$1
+logdir=$2
+cmvndir=$3
+
+# make $cmvndir an absolute pathname.
+cmvndir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $cmvndir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $cmvndir || exit 1;
+mkdir -p $logdir || exit 1;
+
+
+required="$data/feats.scp"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "make_cmvn.sh: no such file $f"
+    exit 1;
+  fi
+done
+ 
+! compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark,scp:$cmvndir/cmvn_$name.ark,$cmvndir/cmvn_$name.scp \
+  2> $logdir/cmvn_$name.log && echo "Error computing CMVN stats" && exit 1;
+
+cp $cmvndir/cmvn_$name.scp $data/cmvn.scp || exit 1;
+
+nc=`cat $data/cmvn.scp | wc -l` 
+nu=`cat $data/spk2utt | wc -l` 
+if [ $nc -ne $nu ]; then
+  echo "Error: it seems not all of the speakers got cmvn stats ($nc != $nu);"
+  exit 1;
+fi
+
+echo "Succeeded creating CMVN stats for $name"
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode.sh
new file mode 100755
index 00000000000..b4618cb1439
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration section.  
+transform_dir=
+iter=
+model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+nj=4
+cmd=run.pl
+max_active=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+min_lmwt=9
+max_lmwt=20
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.alimdl)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
+   echo "  --min-lmwt <int>                                 # minumum LM-weight for lattice rescoring "
+   echo "  --max-lmwt <int>                                 # maximum LM-weight for lattice rescoring "
+   echo "                                                   # speaker-adapted decoding"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  else model=$srcdir/$iter.mdl; fi
+fi
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
+done
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode.sh: feature type is $feat_type";
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+
+$cmd JOB=1:$nj $dir/log/decode.JOB.log \
+ gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+   --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+  $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" --min_lmwt $min_lmwt --max_lmwt $max_lmwt $data $graphdir $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_basis_fmllr.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_basis_fmllr.sh
new file mode 100755
index 00000000000..b0521aa59b3
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_basis_fmllr.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+
+# Copyright 2012   Carnegie Mellon University (Author: Yajie Miao)
+#                  Johns Hopkins University (Author: Daniel Povey)
+
+# Decoding script that does basis fMLLR.  This can be on top of delta+delta-delta,
+# or LDA+MLLT features.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+first_beam=10.0 # Beam used in initial, speaker-indep. pass
+first_max_active=2000 # max-active used in initial pass.
+alignment_model=
+adapt_model=
+final_model=
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+
+# Parameters in alignment of training data
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+align_beam=10
+retry_beam=40
+
+max_active=7000
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+cmd=run.pl
+si_dir=
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode_basis_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo " e.g.: steps/decode_basis_fmllr.sh exp/tri2b/graph_tgpr data/train_si84 data/test_dev93 exp/tri2b/decode_dev93_tgpr"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree $srcdir/fmllr.basis; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+##
+
+## Do the speaker-independent decoding, if --si-dir option not present. ##
+if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
+  si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
+  if [ $stage -le 0 ]; then
+    steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model --max-active $first_max_active $graphdir $data $si_dir || exit 1;
+  fi
+fi
+##
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+## Set up the unadapted features "$sifeats" for testing set
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+##
+
+## Now get the first-pass fMLLR transforms.
+## We give all the default parameters in gmm-est-basis-fmllr
+if [ $stage -le 1 ]; then
+  echo "$0: getting first-pass fMLLR transforms."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
+    gunzip -c $si_dir/lat.JOB.gz \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
+    gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
+    gmm-est-basis-fmllr-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+    --fmllr-min-count=200  --num-iters=10 --size-scale=0.2 \
+    --step-size-iters=3 --write-weights=ark:$dir/pre_wgt.JOB \
+     $adapt_model $srcdir/fmllr.basis "$sifeats" ark,s,cs:- \
+    ark:$dir/pre_trans.JOB || exit 1;
+fi
+##
+
+pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- |"
+
+## Do the main lattice generation pass.  Note: we don't determinize the lattices at
+## this stage, as we're going to use them in acoustic rescoring with the larger 
+## model, and it's more correct to store the full state-level lattice for this purpose.
+if [ $stage -le 2 ]; then
+  echo "$0: doing main lattice generation phase"
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt  \
+    --determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
+    || exit 1;
+fi
+##
+
+## Do a second pass of estimating the transform-- this time with the lattices
+## generated from the alignment model.  Compose the transforms to get
+## $dir/trans.1, etc.
+if [ $stage -le 3 ]; then
+  echo "$0: estimating fMLLR transforms a second time."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
+    "ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-basis-fmllr --fmllr-min-count=200 \
+    --spk2utt=ark:$sdata/JOB/spk2utt --write-weights=ark:$dir/trans_tmp_wgt.JOB \
+    $adapt_model $srcdir/fmllr.basis "$pass1feats" ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
+    compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
+    ark:$dir/trans.JOB  || exit 1;
+fi
+##
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+# Rescore the state-level lattices with the final adapted features, and the final model
+# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
+# useful in case of discriminatively trained systems).
+# At this point we prune and determinize the lattices and write them out, ready for 
+# language model rescoring.
+
+if [ $stage -le 4 ]; then
+  echo "$0: doing a final pass of acoustic rescoring."
+  $cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
+fi
+
+[ ! -x local/score.sh ] && \
+  echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+rm $dir/{trans_tmp,pre_trans}.*
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_biglm.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_biglm.sh
new file mode 100755
index 00000000000..1586db1bb13
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_biglm.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration.
+nj=4
+cmd=run.pl
+maxactive=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: steps/decode_si_biglm.sh [options] <graph-dir> <old-LM-fst> <new-LM-fst> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode_si.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+
+graphdir=$1
+oldlm_fst=$2
+newlm_fst=$3
+data=$4
+dir=$5
+
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $srcdir/final.mdl $graphdir/HCLG.fst $oldlm_fst $newlm_fst; do
+  [ ! -f $f ] && echo "decode_si.sh: no such file $f" && exit 1;
+done
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f `dirname $oldlm_fst`/words.txt ] && ! cmp `dirname $oldlm_fst`/words.txt $graphdir/words.txt && \
+  echo "Warning: old LM words.txt does not match with that in $graphdir .. probably will not work.";
+[ -f `dirname $newlm_fst`/words.txt ] && ! cmp `dirname $oldlm_fst`/words.txt $graphdir/words.txt && \
+  echo "Warning: new LM words.txt does not match with that in $graphdir .. probably will not work.";
+
+oldlm_cmd="fstproject --project_output=true $oldlm_fst | fstarcsort --sort_type=ilabel |"
+newlm_cmd="fstproject --project_output=true $newlm_fst | fstarcsort --sort_type=ilabel |"
+
+$cmd JOB=1:$nj $dir/log/decode.JOB.log \
+ gmm-latgen-biglm-faster --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam \
+   --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+  $srcdir/final.mdl $graphdir/HCLG.fst "$oldlm_cmd" "$newlm_cmd" "$feats" \
+  "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_combine.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_combine.sh
new file mode 100755
index 00000000000..b8ac5ede10b
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_combine.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Combine two decoding directories by composing the lattices (we
+# apply a weight to each of the original weights, by default 0.5 each).
+
+# Begin configuration section.
+weight1=0.5 # Weight on 1st set of lattices.
+cmd=run.pl
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/decode_combine.sh [options] <data> <lang-dir|graph-dir> <decode-dir1> <decode-dir2> <decode-dir-out>"
+  echo " e.g.: steps/decode_combine.sh data/lang data/test exp/dir1/decode exp/dir2/decode exp/combine_1_2/decode"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --weight1 <weight>                       # Weight on 1st set of lattices (default 0.5)"
+  exit 1;
+fi
+
+data=$1
+lang_or_graphdir=$2
+srcdir1=$3
+srcdir2=$4
+dir=$5
+
+for f in $data/utt2spk $lang_or_graphdir/phones.txt $srcdir1/lat.1.gz $srcdir2/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj1=`cat $srcdir1/num_jobs` || exit 1;
+nj2=`cat $srcdir2/num_jobs` || exit 1;
+[ $nj1 -ne $nj2 ] && echo "$0: mismatch in number of jobs $nj1 versus $nj2" && exit 1;
+nj=$nj1
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+# The lattice-interp command does the score interpolation (with composition),
+# and the lattice-copy-backoff replaces the result with the 1st lattice, in 
+# cases where the composed result was empty.
+$cmd JOB=1:$nj $dir/log/interp.JOB.log \
+  lattice-interp --alpha=$weight1 "ark:gunzip -c $srcdir1/lat.JOB.gz|" \
+   "ark,s,cs:gunzip -c $srcdir2/lat.JOB.gz|" ark:- \| \
+  lattice-copy-backoff "ark,s,cs:gunzip -c $srcdir1/lat.JOB.gz|" ark,s,cs:- \
+   "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $lang_or_graphdir $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_fmllr.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_fmllr.sh
new file mode 100755
index 00000000000..0b17e0bf3a6
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_fmllr.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+
+# Decoding script that does fMLLR.  This can be on top of delta+delta-delta, or
+# LDA+MLLT features.
+
+# There are 3 models involved potentially in this script,
+# and for a standard, speaker-independent system they will all be the same.
+# The "alignment model" is for the 1st-pass decoding and to get the 
+# Gaussian-level alignments for the "adaptation model" the first time we
+# do fMLLR.  The "adaptation model" is used to estimate fMLLR transforms
+# and to generate state-level lattices.  The lattices are then rescored
+# with the "final model".
+#
+# The following table explains where we get these 3 models from.
+# Note: $srcdir is one level up from the decoding directory.
+#
+#   Model              Default source:                 
+#
+#  "alignment model"   $srcdir/final.alimdl              --alignment-model <model>
+#                     (or $srcdir/final.mdl if alimdl absent)
+#  "adaptation model"  $srcdir/final.mdl                 --adapt-model <model>
+#  "final model"       $srcdir/final.mdl                 --final-model <model>
+
+
+# Begin configuration section
+first_beam=10.0 # Beam used in initial, speaker-indep. pass
+first_max_active=2000 # max-active used in initial pass.
+alignment_model=
+adapt_model=
+final_model=
+stage=0
+acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in 
+              # lattice generation.
+max_active=7000
+beam=13.0
+lattice_beam=6.0
+nj=4
+silence_weight=0.01
+cmd=run.pl
+si_dir=
+fmllr_update_type=full
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode_fmllr.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo " e.g.: steps/decode_fmllr.sh exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b/decode_dev93_tgpr"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --adapt-model <adapt-mdl>                # Model to compute transforms with"
+   echo "  --alignment-model <ali-mdl>              # Model to get Gaussian-level alignments for"
+   echo "                                           # 1st pass of transform computation."
+   echo "  --final-model <finald-mdl>               # Model to finally decode with"
+   echo "  --si-dir <speaker-indep-decoding-dir>    # use this to skip 1st pass of decoding"
+   echo "                                           # Caution-- must be with same tree"
+   echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
+
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=`echo $3 | sed 's:/$::g'` # remove any trailing slash.
+
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1;
+
+# Some checks.  Note: we don't need $srcdir/tree but we expect
+# it should exist, given the current structure of the scripts.
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+##
+
+## Do the speaker-independent decoding, if --si-dir option not present. ##
+if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
+  si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
+  if [ $stage -le 0 ]; then
+    steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model --max-active $first_max_active $graphdir $data $si_dir || exit 1;
+  fi
+fi
+##
+
+## Some checks, and setting of defaults for variables.
+[ "$nj" -ne "`cat $si_dir/num_jobs`" ] && echo "Mismatch in #jobs with si-dir" && exit 1;
+[ ! -f "$si_dir/lat.1.gz" ] && echo "No such file $si_dir/lat.1.gz" && exit 1;
+[ -z "$adapt_model" ] && adapt_model=$srcdir/final.mdl
+[ -z "$final_model" ] && final_model=$srcdir/final.mdl
+for f in $adapt_model $final_model; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+##
+
+## Set up the unadapted features "$sifeats"
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+##
+
+## Now get the first-pass fMLLR transforms.
+if [ $stage -le 1 ]; then
+  echo "$0: getting first-pass fMLLR transforms."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass1.JOB.log \
+    gunzip -c $si_dir/lat.JOB.gz \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $alignment_model ark:- ark:- \| \
+    gmm-post-to-gpost $alignment_model "$sifeats" ark:- ark:- \| \
+    gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$sifeats" ark,s,cs:- \
+    ark:$dir/pre_trans.JOB || exit 1;
+fi
+##
+
+pass1feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/pre_trans.JOB ark:- ark:- |"
+
+## Do the main lattice generation pass.  Note: we don't determinize the lattices at
+## this stage, as we're going to use them in acoustic rescoring with the larger 
+## model, and it's more correct to store the full state-level lattice for this purpose.
+if [ $stage -le 2 ]; then
+  echo "$0: doing main lattice generation phase"
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
+    --acoustic-scale=$acwt  \
+    --determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+    $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat.tmp.JOB.gz" \
+    || exit 1;
+fi
+##
+
+## Do a second pass of estimating the transform-- this time with the lattices
+## generated from the alignment model.  Compose the transforms to get
+## $dir/trans.1, etc.
+if [ $stage -le 3 ]; then
+  echo "$0: estimating fMLLR transforms a second time."
+  $cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
+    "ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
+    gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+    --spk2utt=ark:$sdata/JOB/spk2utt $adapt_model "$pass1feats" \
+    ark,s,cs:- ark:$dir/trans_tmp.JOB '&&' \
+    compose-transforms --b-is-affine=true ark:$dir/trans_tmp.JOB ark:$dir/pre_trans.JOB \
+    ark:$dir/trans.JOB  || exit 1;
+fi
+##
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+# Rescore the state-level lattices with the final adapted features, and the final model
+# (which by default is $srcdir/final.mdl, but which may be specified on the command line,
+# useful in case of discriminatively trained systems).
+# At this point we prune and determinize the lattices and write them out, ready for 
+# language model rescoring.
+
+if [ $stage -le 4 ]; then
+  echo "$0: doing a final pass of acoustic rescoring."
+  $cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
+fi
+
+[ ! -x local/score.sh ] && \
+  echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+rm $dir/{trans_tmp,pre_trans}.*
+
+exit 0;
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_fmmi.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_fmmi.sh
new file mode 100755
index 00000000000..5f1571faefc
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_fmmi.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# Decoding of fMMI or fMPE models (feature-space discriminative training).
+# If transform-dir supplied, expects e.g. fMLLR transforms in that dir.
+
+# Begin configuration section.  
+iter=final
+nj=4
+cmd=run.pl
+maxactive=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+ngselect=2; # Just use the 2 top Gaussians for fMMI/fMPE.  Should match train.
+transform_dir=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode_fmmi.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode_fmmi.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo "You can also use fMLLR features-- you have to supply --transform-dir option."
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <transform-dir>                  # where to find fMLLR transforms."
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+model=$srcdir/$iter.mdl
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode_fmmi.sh: no such file $f" && exit 1;
+done
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode_fmmi.sh: feature type is $feat_type";
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+fmpefeats="$feats fmpe-apply-transform $srcdir/$iter.fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |" 
+
+# Get Gaussian selection info.
+$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+  gmm-gselect --n=$ngselect $srcdir/$iter.fmpe "$feats" \
+  "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+
+$cmd JOB=1:$nj $dir/log/decode.JOB.log \
+ gmm-latgen-faster --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam \
+   --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+  $model $graphdir/HCLG.fst "$fmpefeats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_fromlats.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_fromlats.sh
new file mode 100755
index 00000000000..5b8f41a868f
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_fromlats.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Decode, limited to the word-sequences that were present in a set
+# of lattices on disk.  The other lattices do not have to be built
+# with the same tree or the same context size-- however, you do
+# have to be using the same vocabulary (words.txt)-- if not you'd
+# have to map the vocabulary somehow.
+
+# Note: if the trees are identical, you can use gmm-rescore-lattice.
+
+# Mechanism: create an unweighted acceptor (on words) for each utterance,
+# compose that with G, determinize, and then use compile-train-graphs-fsts
+# to compile a graph for each utterance, to decode with.  
+
+# Begin configuration.
+cmd=run.pl
+maxactive=7000
+beam=20.0
+latbeam=7.0
+acwt=0.083333
+batch_size=75 # Limits memory blowup in compile-train-graphs-fsts
+scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/decode_si_fromlats.sh [options] <data-dir> <lang> <old-decode-dir> <decode-dir>"
+   echo "e.g.: steps/decode_si_fromlats.sh data/test_dev93 data/lang_test_tg exp/tri2b/decode_tgpr_dev93 exp/tri2a/decode_tgpr_dev93_fromlats"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+
+data=$1
+lang=$2
+olddir=$3
+dir=$4
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+mkdir -p $dir/log
+
+nj=`cat $olddir/num_jobs` || exit 1;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+sdata=$data/split$nj
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj >$dir/num_jobs
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $srcdir/final.mdl $olddir/lat.1.gz \
+    $srcdir/tree $lang/L_disambig.fst $lang/phones.txt; do
+  [ ! -f $f ] && echo "decode_si_fromlats.sh: no such file $f" && exit 1;
+done
+
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+
+$cmd JOB=1:$nj $dir/log/decode_lats.JOB.log \
+ lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \
+  fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \
+  fstdeterminizestar ark:- ark:- \| \
+  compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \
+    --batch-size=$batch_size $scale_opts $srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \|  \
+  gmm-latgen-faster --max-active=$maxactive --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt \
+    --allow-partial=true --word-symbol-table=$lang/words.txt \
+    $srcdir/final.mdl ark:- "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $lang $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_nnet.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_nnet.sh
new file mode 100755
index 00000000000..8bc37539c60
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_nnet.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+# Copyright 2012  Karel Vesely, Daniel Povey
+# Apache 2.0
+
+# Begin configuration section.  
+iter=
+nnet= # You can specify the nnet to use (e.g. if you want to use the .alinnet)
+model= # You can specify the transition model to use (e.g. if you want to use the .alimdl)
+
+nj=4
+cmd=run.pl
+max_active=7000
+beam=19.0 # GMM:13.0
+latbeam=9.0 # GMM:6.0
+acwt=0.12 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
+min_lmwt=4
+max_lmwt=15
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: $0 exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --nnet <nnet>                                    # which nnet to use (e.g. to"
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.nnet)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "                                                   # speaker-adapted decoding"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
+  if [ -z $iter ]; then nnet=$srcdir/final.nnet; 
+  else nnet=$(find $srcdir/nnet/ -name nnet_*_iter{,0}${iter}_lrate*); fi
+fi
+[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  model=$srcdir/final.mdl;
+fi
+
+#hard-select feature-extraction files
+hamm_dct=$srcdir/hamm_dct.mat
+cmvn_g=$srcdir/cmvn_glob.mat
+
+#remove the softmax from the nnet
+nnet_i=$nnet; nnet=$dir/$(basename $nnet)_nosoftmax;
+nnet-trim-n-last-transforms --n=1 --binary=false $nnet_i $nnet 2>$dir/$(basename $nnet)_nosoftmax_log || exit 1;
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $hamm_dct $cmvn_g $nnet_i $nnet $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+# PREPARE THE LOG-POSTERIOR COMPUTATION PIPELINE
+norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
+splice_opts=$(cat $srcdir/splice_opts 2>/dev/null)
+feat_type=$(cat $srcdir/feat_type 2>/dev/null)
+
+# We use the pre-computed CMVN as well as pre-defined splicing
+feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
+
+# Transform feats
+echo "Feature type : $feat_type"
+case $feat_type in
+  plain)
+  ;;
+  traps)
+    transf=$srcdir/hamm_dct.mat
+    feats="$feats transform-feats $transf ark:- ark:- |"
+  ;;
+  transf)
+    feats="$feats transform-feats $srcdir/final.mat ark:- ark:- |"
+  ;;
+  transf-sat)
+    echo yet unimplemented...
+    exit 1;
+  ;;
+  *) 
+    echo "Unknown feature type $feat_type"
+    exit 1 
+  ;;
+esac
+
+# Global normalization and the MLP
+feats="$feats apply-cmvn --norm-vars=true $cmvn_g ark:- ark:- | nnet-forward --no-softmax=true --class-frame-counts=$srcdir/ali_train.counts $nnet ark:- ark:- |"
+
+# Run the decoding in the queue
+$cmd JOB=1:$nj $dir/log/decode.JOB.log \
+  latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+  --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+  $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+# Run the scoring
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt --cmd "$cmd" $data $graphdir $dir 2>$dir/scoring.log || exit 1;
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm.sh
new file mode 100755
index 00000000000..211cb03921c
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm.sh
@@ -0,0 +1,254 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, with speaker vectors. 
+# If the SGMM system was
+# built on top of fMLLR transforms from a conventional system, you should
+# provide the --transform-dir option.
+
+# Begin configuration section.
+stage=1
+alignment_model=
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+beam=15.0
+gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
+            # the first_pass_gselect variable is used for the 1st pass of
+            # decoding and can be tighter.
+first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
+            # the 1st pass of decoding (lattice generation).
+max_active=7000
+lat_beam=8.0 # Beam we use in lattice generation.
+vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for 
+    # speaker-vector computation.  Can be quite tight (actually we could
+    # probably just do best-path.
+use_fmllr=false
+fmllr_iters=10
+fmllr_min_count=1000
+
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: steps/decode_sgmm.sh [options] <graph-dir> <data-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --alignment-model <ali-mdl>              # Model for the first-pass decoding."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 13.0"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|"
+gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+##
+
+## Calculate FMLLR pre-transforms if needed. We are doing this here since this
+## step is requried by models both with and without speaker vectors
+if $use_fmllr; then
+  if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
+    echo "$0: computing pre-transform for fMLLR computation."
+    sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
+  fi
+fi
+
+## Save Gaussian-selection info to disk.
+# Note: we can use final.mdl regardless of whether there is an alignment model--
+# they use the same UBM.
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
+    "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+## Work out name of alignment model. ##
+if [ -z "$alignment_model" ]; then
+  if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
+  else alignment_model=$srcdir/final.mdl; fi
+fi
+[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
+
+# Generate state-level lattice which we can rescore.  This is done with the 
+# alignment model and no speaker-vectors.
+if [ $stage -le 2 ]; then
+  $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+    sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lat_beam \
+    --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
+    --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $alignment_model \
+    $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
+fi
+
+## Check if the model has speaker vectors
+spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
+
+if [ $spkdim -gt 0 ]; then  ### For models with speaker vectors:
+
+# Estimate speaker vectors (1st pass).  Prune before determinizing
+# because determinization can take a while on un-pruned lattices.
+# Note: the sgmm-post-to-gpost stage is necessary because we have
+# a separate alignment-model and final model, otherwise we'd skip it 
+# and use sgmm-est-spkvecs.
+  if [ $stage -le 3 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
+      sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
+      sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
+  fi
+
+# Estimate speaker vectors (2nd pass).  Since we already have spk vectors,
+# at this point we need to rescore the lattice to get the correct posteriors.
+  if [ $stage -le 4 ]; then
+    $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+      sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
+      $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
+  fi
+  rm $dir/pre_vecs.*
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ]; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lat_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+  fi
+  rm $dir/pre_lat.*.gz
+
+else  ### For models without speaker vectors:
+
+  if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+    if [ $stage -le 5 ]; then # compute fMLLR transforms.
+      echo "$0: computing fMLLR transforms."
+      $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+	gunzip -c $dir/pre_lat.JOB.gz \| \
+	sgmm-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \
+	"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+	lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+	lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+	weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+	sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
+	--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+	$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+    fi
+    feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+  fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+  if [ $stage -le 6 ] && $use_fmllr; then
+    $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \
+      $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lat_beam ark:- \
+      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+    rm $dir/pre_lat.*.gz
+  else  # Already done with decoding if no adaptation needed.
+    for n in `seq 1 $nj`; do
+      mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
+    done
+  fi
+
+fi
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at 
+# different acoustic scales to get the final output.
+
+
+if [ $stage -le 7 ]; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  echo "score best paths"
+  local/score.sh --cmd "$cmd" $data $graphdir $dir
+  echo "score confidence and timing with sclite"
+  #local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $graphdir $dir
+fi
+echo "Decoding done."
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2.sh
new file mode 100755
index 00000000000..53c2f67e3a3
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, with speaker vectors. 
+# If the SGMM system was
+# built on top of fMLLR transforms from a conventional system, you should
+# provide the --transform-dir option.
+
+# Begin configuration section.
+stage=1
+transform_dir=    # dir to find fMLLR transforms.
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+cmd=run.pl
+beam=13.0
+gselect=15  # Number of Gaussian-selection indices for SGMMs.  [Note:
+            # the first_pass_gselect variable is used for the 1st pass of
+            # decoding and can be tighter.
+first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in 
+            # the 1st pass of decoding (lattice generation).
+max_active=7000
+lat_beam=6.0 # Beam we use in lattice generation.
+vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for 
+    # speaker-vector computation.  Can be quite tight (actually we could
+    # probably just do best-path.
+use_fmllr=false
+fmllr_iters=10
+fmllr_min_count=1000
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: steps/decode_sgmm2.sh [options] <graph-dir> <data-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm2.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 13.0"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+silphonelist=`cat $graphdir/phones/silence.csl` || exit 1
+gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|"
+gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+##
+
+## Save Gaussian-selection info to disk.
+# Note: we can use final.mdl regardless of whether there is an alignment model--
+# they use the same UBM.
+
+if [ $stage -le 1 ]; then
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm2-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
+    "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+# Generate state-level lattice which we can rescore.  This is done with the alignment
+# model and no speaker-vectors.
+if [ $stage -le 2 ]; then
+  $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+    sgmm2-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lat_beam \
+    --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
+    --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $srcdir/final.alimdl \
+    $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
+fi
+
+# Estimate speaker vectors (1st pass).  Prune before determinizing
+# because determinization can take a while on un-pruned lattices.
+# Note: the sgmm2-post-to-gpost stage is necessary because we have
+# a separate alignment-model and final model, otherwise we'd skip it 
+# and use sgmm2-est-spkvecs.
+if [ $stage -le 3 ]; then
+  $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
+    gunzip -c $dir/pre_lat.JOB.gz \| \
+    lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- \| \
+    sgmm2-post-to-gpost "$gselect_opt" $srcdir/final.alimdl "$feats" ark:- ark:- \| \
+    sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
+     $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
+fi
+
+# Estimate speaker vectors (2nd pass).  Since we already have spk vectors,
+# at this point we need to rescore the lattice to get the correct posteriors.
+if [ $stage -le 4 ]; then
+  $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
+    gunzip -c $dir/pre_lat.JOB.gz \| \
+    sgmm2-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+    lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+    weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+    sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
+     $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
+fi
+rm $dir/pre_vecs.*
+
+if $use_fmllr; then
+  # Estimate fMLLR transforms (note: these may be on top of any
+  # fMLLR transforms estimated with the baseline GMM system.
+  if [ $stage -le 5 ]; then # compute fMLLR transforms.
+    echo "$0: computing fMLLR transforms."
+    if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
+      echo "$0: computing pre-transform for fMLLR computation."
+      sgmm2-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      gunzip -c $dir/pre_lat.JOB.gz \| \
+      sgmm2-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
+      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
+      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
+      sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
+       --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
+      $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
+  fi
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"  
+fi
+
+# Now rescore the state-level lattices with the adapted features and the
+# corresponding model.  Prune and determinize the lattices to limit
+# their size.
+if [ $stage -le 6 ]; then
+  $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+    sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
+    $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lat_beam ark:- \
+    "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+fi
+rm $dir/pre_lat.*.gz
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at different
+# acoustic scales to get the final output.
+
+
+if [ $stage -le 7 ]; then
+  [ ! -x local/score.sh ] && \
+    echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+  local/score.sh --cmd "$cmd" $data $graphdir $dir
+fi
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2_rescore.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2_rescore.sh
new file mode 100755
index 00000000000..c8467a66924
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2_rescore.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, by rescoring lattices
+# generated from a previous SGMM system.  The directory with the lattices
+# is assumed to contain speaker vectors, if used.  Basically it rescores
+# the lattices one final time, using the same setup as the final decoding
+# pass of the source dir.  The assumption is that the model may have
+# been discriminatively trained.
+
+# If the system was built on top of fMLLR transforms from a conventional system,
+# you should provide the --transform-dir option.
+
+# Begin configuration section.
+transform_dir=    # dir to find fMLLR transforms.
+cmd=run.pl
+iter=final
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: steps/decode_sgmm_rescore.sh [options] <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a_mmi/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --iter <iter>                            # iteration of model to use (default: final)"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+olddir=$3
+dir=$4
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz $olddir/gselect.1.gz \
+   $srcdir/$iter.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj=`cat $olddir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+gselect_opt="--gselect=ark:gunzip -c $olddir/gselect.JOB.gz|"
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -f $olddir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $olddir"
+  spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors found."
+  spkvecs_opt=
+fi
+
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+
+if [ -f $olddir/trans.1 ]; then
+  echo "$0: using (in addition to any previous transforms) transforms from $olddir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$olddir/trans.JOB ark:- ark:- |"
+fi
+##
+
+# Rescore the state-level lattices with the model provided.  Just
+# one command in this script.
+echo "$0: rescoring lattices with SGMM model in $srcdir/$iter.mdl"
+$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+  sgmm2-rescore-lattice "$gselect_opt" $spkvecs_opt \
+  $srcdir/$iter.mdl "ark:gunzip -c $olddir/lat.JOB.gz|" "$feats" \
+  "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2_rescore_project.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2_rescore_project.sh
new file mode 100755
index 00000000000..eb8347f7532
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm2_rescore_project.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, by rescoring lattices
+# generated from a previous SGMM system.  This version does the "predictive"
+# SGMM, where we subtract some constant times the log-prob of the left
+# few spliced frames, and the same for the right few.
+# The directory with the lattices
+# is assumed to contain any speaker vectors, if used.  This script just
+# adds into the acoustic scores, (some constant, default -0.25) times
+# the acoustic score of the left model, and the same for the right model.
+
+# the lattices one final time, using the same setup as the final decoding
+# pass of the source dir.  The assumption is that the model may have
+# been discriminatively trained.
+
+# If the system was built on top of fMLLR transforms from a conventional system,
+# you should provide the --transform-dir option.
+
+# Begin configuration section.
+stage=0
+transform_dir=    # dir to find fMLLR transforms.
+cmd=run.pl
+iter=final
+prob_scale=-0.25
+dimensions=0:13:104:117
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/decode_sgmm_rescore_project.sh [options] <full-lda-mat> <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm_rescore_project.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "     exp/tri2b/full.mat exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a/decode_dev93_tgpr_predict"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --prob-scale <scale>                     # Default -0.25, scale on left and right models."
+  exit 1;
+fi
+
+full_lda_mat=$1
+graphdir=$2
+data=$3
+olddir=$4
+dir=$5
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $full_lda_mat $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz \
+   $olddir/gselect.1.gz $srcdir/$iter.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj=`cat $olddir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -f $olddir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $olddir"
+  spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors found."
+  spkvecs_opt=
+fi
+
+if [ $stage -le 0 ]; then
+  # Get full LDA+MLLT mat and its inverse.  Note: the full LDA+MLLT mat is
+  # the LDA+MLLT mat, plus the "rejected" rows of the LDA matrix.
+  $cmd $dir/log/get_full_lda.log \
+    get-full-lda-mat $srcdir/final.mat $full_lda_mat $dir/full.mat $dir/full_inv.mat || exit 1;
+fi
+
+if [ $stage -le 1 ]; then
+  left_start=`echo $dimensions | cut '-d:' -f 1`;
+  left_end=`echo $dimensions | cut '-d:' -f 2`;
+  right_start=`echo $dimensions | cut '-d:' -f 3`;
+  right_end=`echo $dimensions | cut '-d:' -f 4`;
+
+  # Prepare left and right models.  For now, the dimensions are hardwired (e.g., 13 MFCCs and splice 9 frames).
+  # Note: the choice of dividing by the prob of the left 4 and the right 4 frames is a bit arbitrary and
+  # we could investigate different configurations.
+  $cmd $dir/log/left.log \
+    sgmm2-project --start-dim=$left_start --end-dim=$left_end $srcdir/final.mdl $dir/full.mat $dir/left.mdl $dir/left.mat || exit 1;
+  $cmd $dir/log/right.log \
+    sgmm2-project --start-dim=$right_start --end-dim=$right_end $srcdir/final.mdl $dir/full.mat $dir/right.mdl $dir/right.mat || exit 1;
+fi
+
+
+# we apply the scaling on the new acoustic probs by adding the inverse
+# of that to the old acoustic probs, and then later inverting again.
+# this has to do with limitations in sgmm2-rescore-lattice: we can only
+# scale the *old* acoustic probs, not the new ones.
+inverse_prob_scale=`perl -e "print (1.0 / $prob_scale);"`
+cur_lats="ark:gunzip -c $olddir/lat.JOB.gz | lattice-scale --acoustic-scale=$inverse_prob_scale ark:- ark:- |"
+
+## Set up features.  Note: we only support LDA+MLLT features, this
+## is inherent in the method, we could not support deltas.
+
+for model_type in left right; do
+
+  feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" # spliced features.
+  if [ ! -z "$transform_dir" ]; then  # using speaker-specific transforms.
+     # we want to transform in the sequence: $dir/full.mat, then the result of
+     # (extend-transform-dim $transform_dir/trans.JOB), then $dir/full_inv.mat to
+     # get back to the spliced space, then the left.mat or right.mat.  But
+     # note that compose-transforms operates in matrix-multiplication order,
+     # which is opposite from the "order of applying the transforms" order.
+     new_dim=$[`copy-matrix --binary=false $dir/full.mat - | wc -l` - 1]; # 117 in normal case.
+     feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk 'ark:extend-transform-dim --new-dimension=$new_dim ark:$transform_dir/trans.JOB ark:- | compose-transforms ark:- $dir/full.mat ark:- | compose-transforms $dir/full_inv.mat ark:- ark:- | compose-transforms $dir/${model_type}.mat ark:- ark:- |' ark:- ark:- |"
+  else  # else, we transform with the "left" or "right" matrix; these transform from the
+        # spliced space.
+     feats="$feats transform-feats $dir/${model_type}.mat |"
+     # If we don't have the --transform-dir option, make sure the model was
+     # trained in the same way.
+     if grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+       echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+       echo "  but you are not providing the --transform-dir option in test time."
+     fi
+  fi
+  if [ -f $olddir/trans.1 ]; then
+     echo "$0: warning: not using transforms in $olddir (this is just a "
+     echo " limitation of the script right now, and could be fixed)."
+  fi
+  
+  if [ $stage -le 2 ]; then
+    echo "Getting gselect info for $model_type model."
+    $cmd JOB=1:$nj $dir/log/gselect.$model_type.JOB.log \
+       sgmm2-gselect $dir/$model_type.mdl "$feats" \
+       "ark,t:|gzip -c >$dir/gselect.$model_type.JOB.gz" || exit 1;
+  fi
+  gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.$model_type.JOB.gz|"
+
+
+  # Rescore the state-level lattices with the model provided.  Just
+  # one command in this script.
+  # The --old-acoustic-scale=1.0 option means we just add the scores
+  # to the old scores.
+  if [ $stage -le 3 ]; then
+    echo "$0: rescoring lattices with $model_type model"
+    $cmd JOB=1:$nj $dir/log/rescore.${model_type}.JOB.log \
+      sgmm2-rescore-lattice --old-acoustic-scale=1.0 "$gselect_opt" $spkvecs_opt \
+      $dir/$model_type.mdl "$cur_lats" "$feats" \
+      "ark:|gzip -c > $dir/lat.${model_type}.JOB.gz" || exit 1;
+  fi
+  cur_lats="ark:gunzip -c $dir/lat.${model_type}.JOB.gz |"
+done
+
+if [ $stage -le 4 ]; then
+  echo "$0: getting final lattices."
+  $cmd JOB=1:$nj $dir/log/scale_lats.JOB.log \
+    lattice-scale --acoustic-scale=$prob_scale "$cur_lats" "ark:|gzip -c >$dir/lat.JOB.gz" \
+   || exit 1;
+fi
+
+rm $dir/lat.{left,right}.*.gz 2>/dev/null  # note: if these still exist, it will
+ # confuse the scoring script.
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm_rescore.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm_rescore.sh
new file mode 100755
index 00000000000..8650776539b
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_sgmm_rescore.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This script does decoding with an SGMM system, by rescoring lattices
+# generated from a previous SGMM system.  The directory with the lattices
+# is assumed to contain speaker vectors, if used.  Basically it rescores
+# the lattices one final time, using the same setup as the final decoding
+# pass of the source dir.  The assumption is that the model may have
+# been discriminatively trained.
+
+# If the system was built on top of fMLLR transforms from a conventional system,
+# you should provide the --transform-dir option.
+
+# Begin configuration section.
+transform_dir=    # dir to find fMLLR transforms.
+cmd=run.pl
+iter=final
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 4 ]; then
+  echo "Usage: steps/decode_sgmm_rescore.sh [options] <graph-dir|lang-dir> <data-dir> <old-decode-dir> <decode-dir>"
+  echo " e.g.: steps/decode_sgmm_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\"
+  echo "      exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a_mmi/decode_dev93_tgpr"
+  echo "main options (for others, see top of script file)"
+  echo "  --transform-dir <decoding-dir>           # directory of previous decoding"
+  echo "                                           # where we can find transforms for SAT systems."
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --iter <iter>                            # iteration of model to use (default: final)"
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+olddir=$3
+dir=$4
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+
+for f in $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz $olddir/gselect.1.gz \
+   $srcdir/$iter.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+nj=`cat $olddir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+gselect_opt="--gselect=ark:gunzip -c $olddir/gselect.JOB.gz|"
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -f $olddir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $olddir"
+  spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors found."
+  spkvecs_opt=
+fi
+
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
+  [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
+    && echo "$0: #jobs mismatch with transform-dir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
+  echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
+  echo "  but you are not providing the --transform-dir option in test time."
+fi
+
+if [ -f $olddir/trans.1 ]; then
+  echo "$0: using (in addition to any previous transforms) transforms from $olddir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$olddir/trans.JOB ark:- ark:- |"
+fi
+##
+
+# Rescore the state-level lattices with the model provided.  Just
+# one command in this script.
+echo "$0: rescoring lattices with SGMM model in $srcdir/$iter.mdl"
+$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
+  sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt \
+  $srcdir/$iter.mdl "ark:gunzip -c $olddir/lat.JOB.gz|" "$feats" \
+  "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $graphdir $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/decode_si.sh b/egs/kaldi-vystadial-recipe/s5/steps/decode_si.sh
new file mode 100755
index 00000000000..b4618cb1439
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/decode_si.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration section.  
+transform_dir=
+iter=
+model= # You can specify the model to use (e.g. if you want to use the .alimdl)
+nj=4
+cmd=run.pl
+max_active=7000
+beam=13.0
+latbeam=6.0
+acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
+min_lmwt=9
+max_lmwt=20
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/decode.sh [options] <graph-dir> <data-dir> <decode-dir>"
+   echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
+   echo " where the model is."
+   echo "e.g.: steps/decode.sh exp/mono/graph_tgpr data/test_dev93 exp/mono/decode_dev93_tgpr"
+   echo ""
+   echo "This script works on CMN + (delta+delta-delta | LDA+MLLT) features; it works out"
+   echo "what type of features you used (assuming it's one of these two)"
+   echo ""
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --iter <iter>                                    # Iteration of model to test."
+   echo "  --model <model>                                  # which model to use (e.g. to"
+   echo "                                                   # specify the final.alimdl)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
+   echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
+   echo "  --min-lmwt <int>                                 # minumum LM-weight for lattice rescoring "
+   echo "  --max-lmwt <int>                                 # maximum LM-weight for lattice rescoring "
+   echo "                                                   # speaker-adapted decoding"
+   exit 1;
+fi
+
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
+sdata=$data/split$nj;
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
+  if [ -z $iter ]; then model=$srcdir/final.mdl; 
+  else model=$srcdir/$iter.mdl; fi
+fi
+
+for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
+  [ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
+done
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "decode.sh: feature type is $feat_type";
+
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "Using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne $nj ] && \
+     echo "Mismatch in number of jobs with $transform_dir";
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+
+
+$cmd JOB=1:$nj $dir/log/decode.JOB.log \
+ gmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$latbeam \
+   --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
+  $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" --min_lmwt $min_lmwt --max_lmwt $max_lmwt $data $graphdir $dir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/get_fmllr_basis.sh b/egs/kaldi-vystadial-recipe/s5/steps/get_fmllr_basis.sh
new file mode 100755
index 00000000000..9ae46bc245d
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/get_fmllr_basis.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# Copyright 2012   Carnegie Mellon University (Author: Yajie Miao)
+#                  Johns Hopkins University (Author: Daniel Povey)
+
+# Decoding script that computes basis for basis-fMLLR (see decode_fmllr_basis.sh).
+# This can be on top of delta+delta-delta, or LDA+MLLT features.
+
+stage=0
+# Parameters in alignment of training data
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+per_utt=true # If true, then treat each utterance as a separate speaker for purposes of
+  # basis training... this is recommended if the number of actual speakers in your
+  # training set is less than (feature-dim) * (feature-dim+1).
+align_beam=10
+retry_beam=40
+silence_weight=0.01
+cmd=run.pl
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Usage: steps/get_fmllr_basis.sh [options] <data-dir> <lang-dir> <exp-dir>"
+   echo " e.g.: steps/decode_basis_fmllr.sh data/train_si84 data/lang exp/tri3b/"
+   echo "Note: we currently assume that this is the same data you trained the model with."
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                   # config containing options"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   exit 1;
+fi
+
+data=$1
+lang=$2
+dir=$3
+
+nj=`cat $dir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+splice_opts=`cat $dir/splice_opts 2>/dev/null` # frame-splicing options.
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+for f in $data/feats.scp $dir/final.alimdl $dir/final.mdl $dir/ali.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set up the unadapted features "$sifeats".
+if [ -f $dir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type";
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |";;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+  # Set up the adapted features "$feats" for training set.
+if [ -f $srcdir/trans.1 ]; then 
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$sdata/trans.JOB ark:- ark:- |";
+else
+  feats="$sifeats";
+fi
+
+
+if $per_utt; then
+  spk2utt_opt=  # treat each utterance as separate speaker when computing basis.
+  echo "Doing per-utterance adaptation for purposes of computing the basis."
+else
+  echo "Doing per-speaker adaptation for purposes of computing the basis."
+  [ `cat $sdata/spk2utt | wc -l` -lt $[41*40] ] && \
+    echo "Warning: number of speakers is small, might be better to use --per-utt=true."
+  spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
+fi
+
+# Note: we get Gaussian level alignments with the "final.mdl" and the
+# speaker adapted features. 
+$cmd JOB=1:$nj $dir/log/basis_acc.JOB.log \
+  ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+  weight-silence-post $silence_weight $silphonelist $dir/final.mdl ark:- ark:- \| \
+  gmm-post-to-gpost $dir/final.mdl "$feats" ark:- ark:- \| \
+  gmm-basis-fmllr-accs-gpost $spk2utt_opt \
+    $dir/final.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1; 
+
+# Compute the basis matrices.
+$cmd $dir/log/basis_training.log \
+  gmm-basis-fmllr-training $dir/final.mdl $dir/fmllr.basis $dir/basis.acc.* || exit 1;
+rm $dir/basis.acc.* 2>/dev/null
+
+exit 0;
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/lmrescore.sh b/egs/kaldi-vystadial-recipe/s5/steps/lmrescore.sh
new file mode 100755
index 00000000000..3553a40ea33
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/lmrescore.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+# Begin configuration section.
+mode=4
+cmd=run.pl
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+for x in `seq 2`; do
+  [ "$1" == "--cmd" ] && cmd=$2 && shift 2;
+  [ "$1" == "--mode" ] && mode=$2 && shift 2;
+done
+
+if [ $# != 5 ]; then
+   echo "Do language model rescoring of lattices (remove old LM, add new LM)"
+   echo "Usage: steps/lmrescore.sh [options] <old-lang-dir> <new-lang-dir> <data-dir> <input-decode-dir> <output-decode-dir>"
+   echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--mode (1|2|3|4)]"
+   exit 1;
+fi
+
+[ -f path.sh ] && . ./path.sh;
+
+oldlang=$1
+newlang=$2
+data=$3
+indir=$4
+outdir=$5
+
+oldlm=$oldlang/G.fst
+newlm=$newlang/G.fst
+! cmp $oldlang/words.txt $newlang/words.txt && echo "Warning: vocabularies may be incompatible."
+[ ! -f $oldlm ] && echo Missing file $oldlm && exit 1;
+[ ! -f $newlm ] && echo Missing file $newlm && exit 1;
+! ls $indir/lat.*.gz >/dev/null && echo "No lattices input directory $indir" && exit 1;
+
+oldlmcommand="fstproject --project_output=true $oldlm |"
+newlmcommand="fstproject --project_output=true $newlm |"
+
+mkdir -p $outdir/log
+
+phi=`grep -w '#0' $newlang/words.txt | awk '{print $2}'`
+
+if [ "$mode" == 4 ]; then
+  # we have to prepare $outdir/Ldet.fst in this case: determinized
+  # lexicon (determinized on phones), with disambig syms removed.
+  # take L_disambig.fst; get rid of transition with "#0 #0" on it; determinize
+  # with epsilon removal; remove disambiguation symbols.
+  fstprint $newlang/L_disambig.fst | awk '{if($4 != '$phi'){print;}}' | fstcompile | \
+    fstdeterminizestar | fstrmsymbols $newlang/phones/disambig.int >$outdir/Ldet.fst || exit 1;
+fi
+
+nj=`cat $indir/num_jobs` || exit 1;
+cp $indir/num_jobs $outdir
+
+
+#for lat in $indir/lat.*.gz; do
+#  number=`basename $lat | cut -d. -f2`;
+#  newlat=$outdir/`basename $lat`
+
+case "$mode" in
+  1) # 1 is inexact, it's the original way of doing it.
+    $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+      lattice-lmrescore --lm-scale=-1.0 "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlmcommand" ark:-  \| \
+      lattice-lmrescore --lm-scale=1.0 ark:- "$newlmcommand" "ark,t:|gzip -c>$outdir/lat.JOB.gz" \
+      || exit 1;
+    ;;
+  2)  # 2 is equivalent to 1, but using more basic operations, combined.
+    $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+      gunzip -c $indir/lat.JOB.gz \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
+      lattice-compose ark:- "fstproject --project_output=true $oldlm |" ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
+      lattice-compose ark:- "fstproject --project_output=true $newlm |" ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      gzip -c \>$outdir/lat.JOB.gz || exit 1;
+    ;;
+  3) # 3 is "exact" in that we remove the old LM scores accepting any path
+     # through G.fst (which is what we want as that happened in lattice 
+     # generation), but we add the new one with "phi matcher", only taking
+     # backoff arcs if an explicit arc did not exist.
+    $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+      gunzip -c $indir/lat.JOB.gz \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
+      lattice-compose ark:- "fstproject --project_output=true $oldlm |" ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- ark:- \| \
+      lattice-compose --phi-label=$phi ark:- $newlm ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      gzip -c \>$outdir/lat.JOB.gz || exit 1;
+    ;;
+  4) # 4 is also exact (like 3), but instead of subtracting the old LM-scores,
+     # it removes the old graph scores entirely and adds in the lexicon,
+     # grammar and transition weights.
+    mdl=`dirname $indir`/final.mdl
+    [ ! -f $mdl ] && echo No such model $mdl && exit 1;
+    $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
+      gunzip -c $indir/lat.JOB.gz \| \
+      lattice-scale --lm-scale=0.0 ark:- ark:- \| \
+      lattice-to-phone-lattice $mdl ark:- ark:- \| \
+      lattice-compose ark:- $outdir/Ldet.fst ark:- \| \
+      lattice-determinize ark:- ark:- \| \
+      lattice-compose --phi-label=$phi ark:- $newlm ark:- \| \
+      lattice-add-trans-probs --transition-scale=1.0 --self-loop-scale=0.1 \
+      $mdl ark:- ark:- \| \
+      gzip -c \>$outdir/lat.JOB.gz  || exit 1;
+    ;;
+esac
+
+rm $outdir/Ldet.fst 2>/dev/null
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $newlang $outdir
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/make_bn_feats.sh b/egs/kaldi-vystadial-recipe/s5/steps/make_bn_feats.sh
new file mode 100755
index 00000000000..0ee91959d5a
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/make_bn_feats.sh
@@ -0,0 +1,141 @@
+#!/bin/bash 
+
+# Copyright 2012  Karel Vesely, Daniel Povey
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+trim_transforms=4
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "usage: $0 [oprtions] <tgt-data-dir> <src-data-dir> <nnet-dir> <log-dir> <abs-path-to-bn-feat-dir>";
+   echo "options: "
+   echo "  --trim-transforms <N>                            # number of NNet Components to remove from the end"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . path.sh; fi
+
+data=$1
+srcdata=$2
+nndir=$3
+logdir=$4
+bnfeadir=$5
+
+######## CONFIGURATION
+norm_vars=$(cat $nndir/norm_vars)
+splice_opts=$(cat $nndir/splice_opts)
+feat_type=$(cat $nndir/feat_type)
+cmvn_g=$nndir/cmvn_glob.mat
+
+# copy the dataset metadata from srcdata.
+mkdir -p $data || exit 1;
+cp $srcdata/* $data 2>/dev/null; rm $data/feats.scp $data/cmvn.scp;
+
+# make $bnfeadir an absolute pathname.
+bnfeadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $bnfeadir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $bnfeadir || exit 1;
+mkdir -p $data || exit 1;
+mkdir -p $logdir || exit 1;
+
+
+srcscp=$srcdata/feats.scp
+scp=$data/feats.scp
+
+required="$srcscp $nndir/final.nnet $cmvn_g $srcdata/cmvn.scp"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "$0: no such file $f"
+    exit 1;
+  fi
+done
+
+if [ ! -d $srcdata/split$nj -o $srcdata/split$nj -ot $srcdata/feats.scp ]; then
+  utils/split_data.sh $srcdata $nj
+fi
+
+
+#cut the MLP
+nnet=$bnfeadir/feature_extractor.nnet
+nnet-trim-n-last-transforms --n=$trim_transforms --binary=false $nndir/final.nnet $nnet 2>$logdir/feature_extractor.log
+
+
+rm $data/.error 2>/dev/null
+
+echo "Creating bn-feats into $data"
+
+
+# note: in general, the double-parenthesis construct in bash "((" is "C-style
+# syntax" where we can get rid of the $ for variable names, and omit spaces.
+# The "for" loop in this style is a special construct.
+for ((n=1; n<=nj; n++)); do
+  log=$logdir/make_bnfeats.$n.log
+  # Prepare features : do per-speaker CMVN and splicing
+  feats="ark:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$srcdata/split$nj/$n/utt2spk scp:$srcdata/cmvn.scp scp:$srcdata/split$nj/$n/feats.scp ark:- | splice-feats --print-args=false $splice_opts ark:- ark:- |"
+  # Choose further processing according to : feat_type
+  case $feat_type in
+    plain)
+    ;;
+    traps)
+      transf=$nndir/hamm_dct.mat
+      feats="$feats transform-feats --print-args=false $transf ark:- ark:- |"
+    ;;
+    transf)
+      feats="$feats transform-feats $nndir/final.mat ark:- ark:- |"
+    ;;
+    transf-sat)
+      echo yet unimplemented...
+      exit 1;
+    ;;
+    *)
+      echo "Unknown feature type $feat_type"
+      exit 1;
+  esac
+  # Rescale to zero mean and unit variance
+  feats="$feats apply-cmvn --print-args=false --norm-vars=true $cmvn_g ark:- ark:- |"
+
+  # MLP forward 
+  $cmd $log \
+    nnet-forward $nnet "$feats" \
+    ark,scp:$bnfeadir/raw_bnfea_$name.$n.ark,$bnfeadir/raw_bnfea_$name.$n.scp \
+    || touch $data/.error &
+ 
+done
+wait;
+
+N0=$(cat $srcdata/feats.scp | wc -l) 
+N1=$(cat $bnfeadir/raw_bnfea_$name.*.scp | wc -l)
+if [[ -f $data/.error && "$N0" != "$N1" ]]; then
+  echo "Error producing bnfea features for $name:"
+  echo "Original feats : $N0  Bottleneck feats : $N1"
+  exit 1;
+fi
+
+if [[ -f $data/.error ]]; then
+  echo "Warning : .error producing bnfea features, but all the $N1 features were computed...";
+fi
+
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $bnfeadir/raw_bnfea_$name.$n.scp >> $data/feats.scp
+done
+
+
+echo "Succeeded creating MLP-BN features for $name ($data)"
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/make_denlats.sh b/egs/kaldi-vystadial-recipe/s5/steps/make_denlats.sh
new file mode 100755
index 00000000000..be0fe5e9fb8
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/make_denlats.sh
@@ -0,0 +1,139 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Create denominator lattices for MMI/MPE training.
+# Creates its output in $dir/lat.*.gz
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# This is in bytes, but not "real" bytes-- you have to multiply
+# by something like 5 or 10 to get real bytes (not sure why so large)
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/make_denlats.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
+   echo "  e.g.: steps/make_denlats.sh data/train data/lang exp/tri1 exp/tri1_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+sdata=$data/split$nj
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
+
+
+if [ -s $dir/dengraph/HCLG.fst ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $dir/lang $srcdir $dir/dengraph || exit 1;
+fi
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "align_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \
+     echo "$0: LDA transforms differ between $srcdir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  if [ -f $srcdir/final.alimdl ]; then
+    echo "$0: you seem to have a SAT system but you did not supply the --transform-dir option.";
+    exit 1;
+  fi
+fi
+
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
+   gmm-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+    --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      sdata2=$data/split$nj/$n/split$sub_split;
+      if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        gmm-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+        --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices."
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/make_denlats_sgmm.sh b/egs/kaldi-vystadial-recipe/s5/steps/make_denlats_sgmm.sh
new file mode 100755
index 00000000000..a18934d04ed
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/make_denlats_sgmm.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Create denominator lattices for MMI/MPE training, with SGMM models.  If the
+# features have fMLLR transforms you have to supply the --transform-dir option.
+# It gets any speaker vectors from the "alignment dir" ($alidir).  Note: this is
+# possibly a slight mismatch because the speaker vectors come from supervised
+# adaptation.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/make_denlats_sgmm.sh [options] <data-dir> <lang-dir> <src-dir|alidir> <exp-dir>"
+   echo "  e.g.: steps/make_denlats_sgmm.sh data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3 # could also be $srcdir, but only if no vectors supplied.
+dir=$4
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $alidir; the output HCLG.fst goes in $dir/graph.
+
+if [ -s $dir/dengraph/HCLG.fst ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $dir/lang $alidir $dir/dengraph || exit 1;
+fi
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "align_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $alidir/final.mat ] && ! cmp $transform_dir/final.mat $alidir/final.mat && \
+     echo "$0: LDA transforms differ between $alidir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "Assuming you don't have a SAT system, since no --transform-dir option supplied "
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  gselect_opt="--gselect=ark:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: no such file $alidir/gselect.1.gz" && exit 1;
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  if [ -f $alidir/final.alimdl ]; then
+    echo "You seem to have an SGMM system with speaker vectors,"
+    echo "yet we can't find speaker vectors.  Perhaps you supplied"
+    echo "the model director instead of the alignment directory?"
+    exit 1;
+  fi
+fi
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
+   sgmm-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $alidir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      sdata2=$data/split$nj/$n/split$sub_split;
+      if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
+      gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
+      $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        sgmm-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \
+          --beam=$beam --lattice-beam=$lattice_beam \
+          --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \
+          --word-symbol-table=$lang/words.txt $alidir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices with SGMMs."
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/make_denlats_sgmm2.sh b/egs/kaldi-vystadial-recipe/s5/steps/make_denlats_sgmm2.sh
new file mode 100755
index 00000000000..dc5dccdf684
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/make_denlats_sgmm2.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# Create denominator lattices for MMI/MPE training, with SGMM models.  If the
+# features have fMLLR transforms you have to supply the --transform-dir option.
+# It gets any speaker vectors from the "alignment dir" ($alidir).  Note: this is
+# possibly a slight mismatch because the speaker vectors come from supervised
+# adaptation.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+sub_split=1
+beam=13.0
+lattice_beam=7.0
+acwt=0.1
+max_active=5000
+transform_dir=
+max_mem=20000000 # This will stop the processes getting too large.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "Usage: steps/make_denlats_sgmm2.sh [options] <data-dir> <lang-dir> <src-dir|alidir> <exp-dir>"
+   echo "  e.g.: steps/make_denlats_sgmm2.sh data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats"
+   echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
+   echo " plus transforms."
+   echo ""
+   echo "Main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --sub-split <n-split>                            # e.g. 40; use this for "
+   echo "                           # large databases so your jobs will be smaller and"
+   echo "                           # will (individually) finish reasonably soon."
+   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3 # could also be $srcdir, but only if no vectors supplied.
+dir=$4
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+oov=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir
+
+cp -r $lang $dir/
+
+# Compute grammar FST which corresponds to unigram decoding graph.
+
+cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
+  awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
+  utils/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \
+   || exit 1;
+
+# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
+# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
+# final.mdl from $alidir; the output HCLG.fst goes in $dir/graph.
+
+if [ -s $dir/dengraph/HCLG.fst ]; then
+   echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
+else
+  utils/mkgraph.sh $dir/lang $alidir $dir/dengraph || exit 1;
+fi
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "align_si.sh: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then # add transforms to features...
+  echo "$0: using fMLLR transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist."
+  [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \
+    && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1;
+  [ -f $alidir/final.mat ] && ! cmp $transform_dir/final.mat $alidir/final.mat && \
+     echo "$0: LDA transforms differ between $alidir and $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "Assuming you don't have a SAT system, since no --transform-dir option supplied "
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  gselect_opt="--gselect=ark:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: no such file $alidir/gselect.1.gz" && exit 1;
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  if [ -f $alidir/final.alimdl ]; then
+    echo "$0: You seem to have an SGMM system with speaker vectors,"
+    echo "yet we can't find speaker vectors.  Perhaps you supplied"
+    echo "the model director instead of the alignment directory?"
+    exit 1;
+  fi
+fi
+
+if [ $sub_split -eq 1 ]; then 
+  $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
+   sgmm2-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
+     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $alidir/final.mdl  \
+     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+else
+  for n in `seq $nj`; do
+    if [ -f $dir/.done.$n ]; then
+      echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
+    else 
+      sdata2=$data/split$nj/$n/split$sub_split;
+      if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
+        split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
+      fi
+      mkdir -p $dir/log/$n
+      mkdir -p $dir/part
+      feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
+      spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
+      gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
+      $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+        sgmm2-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \
+          --beam=$beam --lattice-beam=$lattice_beam \
+          --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \
+          --word-symbol-table=$lang/words.txt $alidir/final.mdl  \
+          $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
+      echo Merging archives for data subset $n
+      rm $dir/.error 2>/dev/null;
+      for k in `seq $sub_split`; do
+        gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error;
+      done | gzip -c > $dir/lat.$n.gz || touch $dir/.error;
+      [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1;
+      rm $dir/lat.$n.*.gz
+      touch $dir/.done.$n
+    fi
+  done
+fi
+
+
+echo "$0: done generating denominator lattices with SGMMs."
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/make_fbank.sh b/egs/kaldi-vystadial-recipe/s5/steps/make_fbank.sh
new file mode 100755
index 00000000000..d482e4737ae
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/make_fbank.sh
@@ -0,0 +1,111 @@
+#!/bin/bash 
+
+# Copyright 2012  Karel Vesely  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+fbank_config=conf/fbank.conf
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "usage: make_fbank.sh [options] <data-dir> <log-dir> <path-to-fbankdir>";
+   echo "options: "
+   echo "  --fbank-config <config-file>                      # config passed to compute-fbank-feats "
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+logdir=$2
+fbankdir=$3
+
+
+# make $fbankdir an absolute pathname.
+fbankdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $fbankdir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $fbankdir || exit 1;
+mkdir -p $logdir || exit 1;
+
+scp=$data/wav.scp
+
+required="$scp $fbank_config"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "make_fbank.sh: no such file $f"
+    exit 1;
+  fi
+done
+
+# note: in general, the double-parenthesis construct in bash "((" is "C-style
+# syntax" where we can get rid of the $ for variable names, and omit spaces.
+# The "for" loop in this style is a special construct.
+
+
+if [ -f $data/segments ]; then
+  echo "$0 [info]: segments file exists: using that."
+  split_segments=""
+  for ((n=1; n<=nj; n++)); do
+    split_segments="$split_segments $logdir/segments.$n"
+  done
+
+  utils/split_scp.pl $data/segments $split_segments || exit 1;
+  rm $logdir/.error 2>/dev/null
+
+  $cmd JOB=1:$nj $logdir/make_fbank.JOB.log \
+    extract-segments scp:$scp $logdir/segments.JOB ark:- \| \
+    compute-fbank-feats --verbose=2 --config=$fbank_config ark:- \
+    ark,scp:$fbankdir/raw_fbank_$name.JOB.ark,$fbankdir/raw_fbank_$name.JOB.scp \
+     || exit 1;
+
+else
+  echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
+  split_scps=""
+  for ((n=1; n<=nj; n++)); do
+    split_scps="$split_scps $logdir/wav.$n.scp"
+  done
+
+  utils/split_scp.pl $scp $split_scps || exit 1;
+ 
+  $cmd JOB=1:$nj $logdir/make_fbank.JOB.log \
+    compute-fbank-feats  --verbose=2 --config=$fbank_config scp:$logdir/wav.JOB.scp \
+      ark,scp:$fbankdir/raw_fbank_$name.JOB.ark,$fbankdir/raw_fbank_$name.JOB.scp \
+      || exit 1;
+
+fi
+
+
+if [ -f $logdir/.error.$name ]; then
+  echo "Error producing fbank features for $name:"
+  tail $logdir/make_fbank.*.log
+  exit 1;
+fi
+
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $fbankdir/raw_fbank_$name.$n.scp >> $data/feats.scp || exit 1;
+done > $data/feats.scp
+
+rm $logdir/wav.*.scp  $logdir/segments.* 2>/dev/null
+
+nf=`cat $data/feats.scp | wc -l` 
+nu=`cat $data/utt2spk | wc -l` 
+if [ $nf -ne $nu ]; then
+  echo "It seems not all of the feature files were successfully ($nf != $nu);"
+  echo "consider using utils/fix_data_dir.sh $data"
+fi
+
+echo "Succeeded creating filterbank features for $name"
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/make_mfcc.sh b/egs/kaldi-vystadial-recipe/s5/steps/make_mfcc.sh
new file mode 100755
index 00000000000..5951bf96fc0
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/make_mfcc.sh
@@ -0,0 +1,111 @@
+#!/bin/bash 
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+mfcc_config=conf/mfcc.conf
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "usage: make_mfcc.sh [options] <data-dir> <log-dir> <path-to-mfccdir>";
+   echo "options: "
+   echo "  --mfcc-config <config-file>                      # config passed to compute-mfcc-feats "
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+logdir=$2
+mfccdir=$3
+
+
+# make $mfccdir an absolute pathname.
+mfccdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $mfccdir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $mfccdir || exit 1;
+mkdir -p $logdir || exit 1;
+
+scp=$data/wav.scp
+
+required="$scp $mfcc_config"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "make_mfcc.sh: no such file $f"
+    exit 1;
+  fi
+done
+
+# note: in general, the double-parenthesis construct in bash "((" is "C-style
+# syntax" where we can get rid of the $ for variable names, and omit spaces.
+# The "for" loop in this style is a special construct.
+
+
+if [ -f $data/segments ]; then
+  echo "$0 [info]: segments file exists: using that."
+  split_segments=""
+  for ((n=1; n<=nj; n++)); do
+    split_segments="$split_segments $logdir/segments.$n"
+  done
+
+  utils/split_scp.pl $data/segments $split_segments || exit 1;
+  rm $logdir/.error 2>/dev/null
+
+  $cmd JOB=1:$nj $logdir/make_mfcc.JOB.log \
+    extract-segments scp:$scp $logdir/segments.JOB ark:- \| \
+    compute-mfcc-feats --verbose=2 --config=$mfcc_config ark:- \
+    ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \
+     || exit 1;
+
+else
+  echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
+  split_scps=""
+  for ((n=1; n<=nj; n++)); do
+    split_scps="$split_scps $logdir/wav.$n.scp"
+  done
+
+  utils/split_scp.pl $scp $split_scps || exit 1;
+ 
+  $cmd JOB=1:$nj $logdir/make_mfcc.JOB.log \
+    compute-mfcc-feats  --verbose=2 --config=$mfcc_config scp:$logdir/wav.JOB.scp \
+      ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \
+      || exit 1;
+
+fi
+
+
+if [ -f $logdir/.error.$name ]; then
+  echo "Error producing mfcc features for $name:"
+  tail $logdir/make_mfcc.*.log
+  exit 1;
+fi
+
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $mfccdir/raw_mfcc_$name.$n.scp >> $data/feats.scp || exit 1;
+done > $data/feats.scp
+
+rm $logdir/wav.*.scp  $logdir/segments.* 2>/dev/null
+
+nf=`cat $data/feats.scp | wc -l` 
+nu=`cat $data/utt2spk | wc -l` 
+if [ $nf -ne $nu ]; then
+  echo "It seems not all of the feature files were successfully ($nf != $nu);"
+  echo "consider using utils/fix_data_dir.sh $data"
+fi
+
+echo "Succeeded creating MFCC features for $name"
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/make_plp.sh b/egs/kaldi-vystadial-recipe/s5/steps/make_plp.sh
new file mode 100755
index 00000000000..0e58e9aa058
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/make_plp.sh
@@ -0,0 +1,111 @@
+#!/bin/bash 
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+# To be run from .. (one directory up from here)
+# see ../run.sh for example
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+plp_config=conf/plp.conf
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "usage: make_plp.sh [options] <data-dir> <log-dir> <path-to-plpdir>";
+   echo "options: "
+   echo "  --plp-config <config-file>                      # config passed to compute-plp-feats "
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   exit 1;
+fi
+
+data=$1
+logdir=$2
+plpdir=$3
+
+
+# make $plpdir an absolute pathname.
+plpdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $plpdir ${PWD}`
+
+# use "name" as part of name of the archive.
+name=`basename $data`
+
+mkdir -p $plpdir || exit 1;
+mkdir -p $logdir || exit 1;
+
+scp=$data/wav.scp
+
+required="$scp $plp_config"
+
+for f in $required; do
+  if [ ! -f $f ]; then
+    echo "make_plp.sh: no such file $f"
+    exit 1;
+  fi
+done
+
+# note: in general, the double-parenthesis construct in bash "((" is "C-style
+# syntax" where we can get rid of the $ for variable names, and omit spaces.
+# The "for" loop in this style is a special construct.
+
+
+if [ -f $data/segments ]; then
+  echo "$0 [info]: segments file exists: using that."
+  split_segments=""
+  for ((n=1; n<=nj; n++)); do
+    split_segments="$split_segments $logdir/segments.$n"
+  done
+
+  utils/split_scp.pl $data/segments $split_segments || exit 1;
+  rm $logdir/.error 2>/dev/null
+
+  $cmd JOB=1:$nj $logdir/make_plp.JOB.log \
+    extract-segments scp:$scp $logdir/segments.JOB ark:- \| \
+    compute-plp-feats --verbose=2 --config=$plp_config ark:- \
+    ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \
+     || exit 1;
+
+else
+  echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
+  split_scps=""
+  for ((n=1; n<=nj; n++)); do
+    split_scps="$split_scps $logdir/wav.$n.scp"
+  done
+
+  utils/split_scp.pl $scp $split_scps || exit 1;
+ 
+  $cmd JOB=1:$nj $logdir/make_plp.JOB.log \
+    compute-plp-feats  --verbose=2 --config=$plp_config scp:$logdir/wav.JOB.scp \
+      ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \
+      || exit 1;
+
+fi
+
+
+if [ -f $logdir/.error.$name ]; then
+  echo "Error producing plp features for $name:"
+  tail $logdir/make_plp.*.log
+  exit 1;
+fi
+
+# concatenate the .scp files together.
+for ((n=1; n<=nj; n++)); do
+  cat $plpdir/raw_plp_$name.$n.scp >> $data/feats.scp || exit 1;
+done > $data/feats.scp
+
+rm $logdir/wav.*.scp  $logdir/segments.* 2>/dev/null
+
+nf=`cat $data/feats.scp | wc -l` 
+nu=`cat $data/utt2spk | wc -l` 
+if [ $nf -ne $nu ]; then
+  echo "It seems not all of the feature files were successfully ($nf != $nu);"
+  echo "consider using utils/fix_data_dir.sh $data"
+fi
+
+echo "Succeeded creating PLP features for $name"
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/mixup.sh b/egs/kaldi-vystadial-recipe/s5/steps/mixup.sh
new file mode 100755
index 00000000000..f22d51244ca
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/mixup.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# mix up (or down); do 3 iters of model training; realign; then do two more
+# iterations of model training.
+
+# Begin configuration section.
+cmd=run.pl
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=5
+realign_iters=3 # Space-separated list of iterations to realign on.
+stage=0
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+if [ $# != 5 ]; then
+   echo "Usage: steps/mixup.sh <num-gauss> <data-dir> <lang-dir> <old-exp-dir> <exp-dir>"
+   echo " e.g.: steps/mixup.sh 20000 data/train_si84 data/lang exp/tri3b exp/tri3b_20k"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --stage <stage>                                  # stage to do partial re-run from."
+   exit 1;
+fi
+
+numgauss=$1
+data=$2
+lang=$3
+srcdir=$4
+dir=$5
+
+for f in $data/feats.scp $srcdir/final.mdl $srcdir/final.mat; do
+  [ ! -f $f ] && echo "mixup_lda_etc.sh: no such file $f" && exit 1;
+done
+
+nj=`cat $srcdir/num_jobs` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
+
+mkdir -p $dir/log
+cp $srcdir/splice_opts $dir 2>/dev/null
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/tree $dir
+
+
+## Set up features.
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $srcdir/trans.1 ]; then
+  echo Using transforms from $srcdir;
+  rm $dir/trans.* 2>/dev/null
+  ln.pl $srcdir/trans.* $dir  # Link those transforms to current directory.
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
+else
+  feats="$sifeats"
+fi
+## Done setting up features.
+
+rm $dir/fsts.*.gz 2>/dev/null
+ln.pl $srcdir/fsts.*.gz $dir  # Link training-graph FSTs to current directory.
+
+## Mix up old model
+if [ $stage -le 0 ]; then
+  echo Mixing up old model to $numgauss Gaussians
+# Note: this script also works for mixing down.
+  $cmd $dir/log/mixup.log \
+    gmm-mixup --mix-up=$numgauss --mix-down=$numgauss \
+    $srcdir/final.mdl $srcdir/final.occs $dir/1.mdl || exit 1;
+fi
+## Done.
+
+cur_alidir=$srcdir # dir to find alignments.
+[ -z "$realign_iters" ] && ln.pl $srcdir/ali.*.gz $dir; # link alignments, if
+ # we won't be generating them.
+
+x=1
+while [ $x -le $num_iters ]; do
+  echo "$0: iteration $x"
+  if echo $realign_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo "$0: realigning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 "$mdl" \
+        "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+        "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+    fi
+    cur_alidir=$dir
+  fi
+  if [ $stage -le $x ]; then
+    echo "$0: accumulating statistics"
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $cur_alidir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    echo "$0: re-estimating model"
+    [ "`ls $dir/$x.*.acc | wc -w`" -ne $nj ] && echo "$0: wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs  2>/dev/null
+  fi
+  x=$[$x+1]
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ -f $dir/trans.1 ]; then 
+  echo "$0: accumulating stats for alignment model."
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ "`ls $dir/$x.*.acc | wc -w`" -ne $nj ] && echo "$0: wrong #accs" && exit 1;  
+  echo "$0: Re-estimating alignment model."
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl  || exit 1;
+  rm $dir/$x.*.acc
+  rm $dir/final.alimdl 2>/dev/null
+  ln -s $x.alimdl $dir/final.alimdl 
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/rnnlmrescore.sh b/egs/kaldi-vystadial-recipe/s5/steps/rnnlmrescore.sh
new file mode 100755
index 00000000000..e204e1acd65
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/rnnlmrescore.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+
+
+# Begin configuration section.
+N=10
+inv_acwt=12
+cmd=run.pl
+use_phi=false  # This is kind of an obscure option.  If true, we'll remove the old
+  # LM weights (times 1-RNN_scale) using a phi (failure) matcher, which is
+  # appropriate if the old LM weights were added in this way, e.g. by
+  # lmrescore.sh.  Otherwise we'll use normal composition, which is appropriate
+  # if the lattices came directly from decoding.  This won't actually make much
+  # difference (if any) to WER, it's more so we know we are doing the right thing.
+test=false # Activate a testing option.
+stage=1 # Stage of this script, for partial reruns.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh
+. utils/parse_options.sh
+
+
+if [ $# != 6 ]; then
+   echo "Do language model rescoring of lattices (partially remove old LM, add new LM)"
+   echo "This version applies an RNNLM and mixes it with the LM scores"
+   echo "previously in the lattices., controlled by the first parameter (rnnlm-weight)"
+   echo ""
+   echo "Usage: utils/rnnlmrescore.sh <rnn-weight> <old-lang-dir> <rnn-dir> <data-dir> <input-decode-dir> <output-decode-dir>"
+   echo "Main options:"
+   echo "  --inv-acwt <inv-acwt>          # default 12.  e.g. --inv-acwt 17.  Equivalent to LM scale to use."
+   echo "                                 # for N-best list generation... note, we'll score at different acwt's"
+   echo "  --cmd <run.pl|queue.pl [opts]> # how to run jobs."
+   echo "  --phi (true|false)             # Should be set to true if the source lattices were created"
+   echo "                                 # by lmrescore.sh, false if they came from decoding."
+   echo "  --N <N>                        # Value of N in N-best rescoring (default: 10)"
+   exit 1;
+fi
+
+
+
+rnnweight=$1
+oldlang=$2
+rnndir=$3
+data=$4
+indir=$5
+dir=$6
+
+
+acwt=`perl -e "print (1.0/$inv_acwt);"` # Note: we'll actually produce lattices
+ # that will be scored at a range of acoustic weights.  This acwt should be close
+ # to the final one we'll pick, though, for best performance (it controls the
+ # N-best list generation).
+
+for f in $oldlang/G.fst $rnndir/rnnlm $data/feats.scp $indir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: expected file $f to exist." && exit 1;
+done
+
+nj=`cat $indir/num_jobs` || exit 1;
+oldlm=$oldlang/G.fst
+adir=$dir/archives
+
+mkdir -p $dir;
+phi=`grep -w '#0' $oldlang/words.txt | awk '{print $2}'`
+
+rm $dir/.error 2>/dev/null
+mkdir -p $dir/log
+
+# First convert lattice to N-best.  Be careful because this
+# will be quite sensitive to the acoustic scale; this should be close
+# to the one we'll finally get the best WERs with.
+# Note: the lattice-rmali part here is just because we don't
+# need the alignments for what we're doing.
+if [ $stage -le 1 ]; then
+  echo "$0: converting lattices to N-best."
+  $cmd JOB=1:$nj $dir/log/lat2nbest.JOB.log \
+    lattice-to-nbest --acoustic-scale=$acwt --n=$N \
+    "ark:gunzip -c $indir/lat.JOB.gz|" ark:- \|  \
+    lattice-rmali ark:- "ark:|gzip -c >$dir/nbest1.JOB.gz" || exit 1;
+fi
+
+# next remove part of the old LM probs.  
+if $use_phi; then
+  if [ $stage -le 2 ]; then
+    echo "$0: removing old LM scores."
+    # Use the phi-matcher style of composition.. this is appropriate
+    # if the old LM scores were added e.g. by lmrescore.sh, using 
+    # phi-matcher composition.
+    $cmd JOB=1:$nj $dir/log/remove_old.JOB.log \
+      lattice-compose --phi-label=$phi "ark:gunzip -c $dir/nbest1.JOB.gz|" $oldlm \
+      "ark:|gzip -c >$dir/nbest2.JOB.gz"  || exit 1;
+  fi    
+else
+  if [ $stage -le 2 ]; then
+    echo "$0: removing old LM scores."
+    # this approach chooses the best path through the old LM FST, while
+    # subtracting the old scores.  If the lattices came straight from decoding,
+    # this is what we want.
+    $cmd JOB=1:$nj $dir/log/remove_old.JOB.log \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 "ark:gunzip -c $dir/nbest1.JOB.gz|" ark:- \| \
+      lattice-compose ark:- "fstproject --project_output=true $oldlm |" ark:- \| \
+      lattice-1best ark:- ark:- \| \
+      lattice-scale --acoustic-scale=-1 --lm-scale=-1 ark:- "ark:|gzip -c >$dir/nbest2.JOB.gz" \
+      || exit 1;
+  fi
+fi
+
+if [ $stage -le 3 ]; then
+# Decompose the n-best lists into 4 archives.
+  echo "$0: creating separate-archive form of N-best lists."
+  $cmd JOB=1:$nj $dir/log/make_new_archives.JOB.log \
+    mkdir -p $adir.JOB '&&' \
+    nbest-to-linear "ark:gunzip -c $dir/nbest2.JOB.gz|" \
+    "ark,t:$adir.JOB/ali" "ark,t:$adir.JOB/words" \
+    "ark,t:$adir.JOB/lmwt.nolm" "ark,t:$adir.JOB/acwt" || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: doing the same with old LM scores."
+# Create an archive with the LM scores before we
+# removed the LM probs (will help us do interpolation).
+$cmd JOB=1:$nj $dir/log/make_old_archives.JOB.log \
+  nbest-to-linear "ark:gunzip -c $dir/nbest1.JOB.gz|" "ark:/dev/null" \
+  "ark:/dev/null" "ark,t:$adir.JOB/lmwt.withlm" "ark:/dev/null" || exit 1;
+fi
+
+if $test; then # This branch is a sanity check that at the acwt where we generated
+  # the N-best list, we get the same WER.
+  echo "$0 [testing branch]: generating lattices without changing scores."
+  $cmd JOB=1:$nj $dir/log/test.JOB.log \
+    linear-to-nbest "ark:$adir.JOB/ali" "ark:$adir.JOB/words" "ark:$adir.JOB/lmwt.withlm" \
+     "ark:$adir.JOB/acwt" ark:- \| \
+    nbest-to-lattice ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+  exit 0;
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: Creating archives with text-form of words, and LM scores without graph scores."
+    # Do some small tasks; for these we don't use the queue, it will only slow us down.
+  for n in `seq $nj`; do
+    utils/int2sym.pl -f 2- $oldlang/words.txt < $adir.$n/words > $adir.$n/words_text || exit 1;
+    mkdir -p $adir.$n/temp
+    paste $adir.$n/lmwt.nolm $adir.$n/lmwt.withlm | awk '{print $1, ($4-$2);}' > \
+      $adir.$n/lmwt.lmonly || exit 1;
+  done
+fi
+if [ $stage -le 6 ]; then
+  echo "$0: invoking rnnlm_compute_scores.sh which calls rnnlm, to get RNN LM scores."
+  $cmd JOB=1:$nj $dir/log/rnnlm_compute_scores.JOB.log \
+    utils/rnnlm_compute_scores.sh $rnndir $adir.JOB/temp $adir.JOB/words_text $adir.JOB/lmwt.rnn \
+    || exit 1;
+fi
+if [ $stage -le 7 ]; then
+  echo "$0: reconstructing total LM+graph scores including interpolation of RNNLM and old LM scores."
+  for n in `seq $nj`; do
+    paste $adir.$n/lmwt.nolm $adir.$n/lmwt.lmonly $adir.$n/lmwt.rnn | awk -v rnnweight=$rnnweight \
+      '{ key=$1; graphscore=$2; lmscore=$4; rnnscore=$6; 
+     score = graphscore+(rnnweight*rnnscore)+((1-rnnweight)*lmscore);
+     print $1,score; } ' > $adir.$n/lmwt.interp.$rnnweight || exit 1;
+  done
+fi
+
+if [ $stage -le 8 ]; then
+  echo "$0: reconstructing archives back into lattices."
+  $cmd JOB=1:$nj $dir/log/reconstruct_lattice.JOB.log \
+    linear-to-nbest "ark:$adir.JOB/ali" "ark:$adir.JOB/words" \
+    "ark:$adir.JOB/lmwt.interp.$rnnweight" "ark:$adir.JOB/acwt" ark:- \| \
+    nbest-to-lattice ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+fi
+
+[ ! -x local/score.sh ] && \
+  echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+local/score.sh --cmd "$cmd" $data $oldlang $dir
+
+exit 0;
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_deltas.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_deltas.sh
new file mode 100755
index 00000000000..daa23acddec
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_deltas.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration.
+stage=-4 #  This allows restarting after partway, when something when wrong.
+config=
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+   echo "Usage: steps/train_deltas.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <alignment-dir> <exp-dir>"
+   echo "e.g.: steps/train_deltas.sh 2000 10000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --stage <stage>                                  # stage to do partial re-run from."
+   exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
+oov=`cat $lang/oov.int` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
+
+rm $dir/.error 2>/dev/null
+
+if [ $stage -le -3 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: getting questions for tree-building, via clustering"
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+
+  gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl 2>$dir/log/mixup.log || exit 1;
+  rm $dir/treeacc
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: training pass $x"
+  if [ $stage -le $x ]; then
+    if echo $realign_iters | grep -w $x >/dev/null; then
+      echo "$0: aligning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+         "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+         "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+       "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --mix-up=$numgauss --power=$power \
+        --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \
+       "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+# Summarize warning messages...
+utils/summarize_warnings.pl  $dir/log
+
+echo "$0: Done training system with delta+delta-delta features in $dir"
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_diag_ubm.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_diag_ubm.sh
new file mode 100755
index 00000000000..e43a9cb5b8b
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_diag_ubm.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+# Copyright Johns Hopkins University (Author: Daniel Povey),  2012.  
+# Apache 2.0.
+
+# Train a diagonal mixture of Gaussians.  This is trained without
+# reference to class labels-- except that, optionally, you can down-weight
+# silence phones, and alignments are needed for that.
+#
+# The current use for this is in fMMI training.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+num_iters=3
+silence_weight=
+stage=-2
+# The value "intermediate" is a number of Gaussians we first obtain by clustering
+# the Gaussians within each state of the model, before clustering down to
+# $num_Gauss.  This is for efficiency.  It's not a very important parameter,
+# as far as I know.
+intermediate=2000
+num_gselect=50 # Number of Gaussian-selection indices to use while training
+               # the model.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+
+if [ $# != 5 ]; then
+  echo "Usage: steps/train_diag_ubm.sh <num-gauss> <data> <lang> <alignment-dir|src-dir> <dir>"
+  echo " e.g.: steps/train_diag_ubm.sh 400 data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm3c"
+  echo "Options: "
+  echo "  --silence-weight <sil-weight>                  # default 1.0.  Use to down-weight silence."
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --nj <num-job>                                 # number of parallel jobs to run."
+  echo "  --num-iters <niter>                            # number of iterations of training (default: $num_iters)"
+  echo "  --stage <stage>                                # stage to do partial re-run from."
+  exit 1;
+fi
+
+num_gauss=$1
+data=$2
+lang=$3
+alidir=$4
+dir=$5
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ -f $alidir/trans.1 ]; then
+  echo Using transforms from $alidir;
+  [ "$nj" -ne "`cat $alidir/num_jobs`" ] && \
+    echo "The number of jobs differs from alignment directory $alidir." && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+fi
+
+if [ ! -z "$silence_weight" ]; then
+  [ ! -f $alidir/ali.1.gz ] && \
+    echo "You specified weighting for silence but $alidir/ali.1.gz does not exist." && exit 1;
+  [ "$nj" -ne "`cat $alidir/num_jobs`" ] && \
+    echo "You specified silence weight but $alidir has different #jobs." && exit 1;
+  weights="--weights='ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- | weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- | post-to-weights ark:- ark:- |'"
+else
+  weights=
+fi
+
+# $intermediate should be more than $num_gauss..
+[ $[$num_gauss*2] -gt $intermediate ] && intermediate=$[$num_gauss*2] \
+  && echo "Setting intermediate=$intermediate (it was too small)";
+
+if [ $stage -le -2 ]; then
+ echo "Clustering Gaussians in $alidir/final.mdl"
+ $cmd $dir/log/cluster.log \
+  init-ubm --fullcov-ubm=false --intermediate-num-gauss=$intermediate \
+    --ubm-num-gauss=$num_gauss $alidir/final.mdl $alidir/final.occs $dir/0.dubm   || exit 1;
+fi
+
+# Store Gaussian selection indices on disk-- this speeds up the training passes.
+if [ $stage -le -1 ]; then
+  echo Getting Gaussian-selection info
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$num_gselect $dir/0.dubm "$feats" \
+      "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+for x in `seq 0 $[$num_iters-1]`; do
+  echo "Training pass $x"
+  if [ $stage -le $x ]; then
+  # Accumulate stats.
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-global-acc-stats $weights "--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" \
+      $dir/$x.dubm "$feats" $dir/$x.JOB.acc || exit 1;
+    if [ $x -lt $[$num_iters-1] ]; then # Don't remove low-count Gaussians till last iter,
+      opt="--remove-low-count-gaussians=false" # or gselect info won't be valid any more.
+    fi
+    $cmd $dir/log/update.$x.log \
+      gmm-global-est $opt $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \
+      $dir/$[$x+1].dubm || exit 1;
+    rm $dir/$x.*.acc $dir/$x.dubm
+  fi
+done
+
+rm $dir/gselect.*.gz
+mv $dir/$num_iters.dubm $dir/final.dubm || exit 1;
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_lda_mllt.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_lda_mllt.sh
new file mode 100755
index 00000000000..7bd283c8658
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_lda_mllt.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# Begin configuration.
+cmd=run.pl
+config=
+stage=-4
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+mllt_iters="2 4 6 12";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25  # Last iter to increase #Gauss on.
+dim=40
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+randprune=4.0 # This is approximately the ratio by which we will speed up the
+              # LDA and MLLT calculations via randomized pruning.
+splice_opts=
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_lda_mllt.sh [options] <#leaves> <#gauss> <data> <lang> <alignments> <dir>"
+  echo " e.g.: steps/train_lda_mllt.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_lda_mllt.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter #gauss increment
+oov=`cat $lang/oov.int` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+
+mkdir -p $dir/log
+echo $nj >$dir/num_jobs
+echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options
+           # so that later stages of system building can know what they were.
+
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+
+splicedfeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
+# Note: $feats gets overwritten later in the script.
+feats="$splicedfeats transform-feats $dir/0.mat ark:- ark:- |"
+
+
+
+if [ $stage -le -4 ]; then
+  echo "Accumulating LDA statistics."
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$randprune $alidir/final.mdl "$splicedfeats" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+  est-lda --write-full-matrix=$dir/full.mat --dim=$dim $dir/0.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;
+  rm $dir/lda.*.acc
+fi
+
+cur_lda_iter=0
+
+if [ $stage -le -3 ]; then
+  echo "Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+   acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ `ls $dir/*.treeacc | wc -w` -ne "$nj" ] && echo "Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+
+if [ $stage -le -2 ]; then
+  echo "Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+
+  # could mix up if we wanted:
+  # gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl 2>$dir/log/mixup.log || exit 1;
+  rm $dir/treeacc
+fi
+
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo Training pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  if echo $mllt_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo "Estimating MLLT"
+      $cmd JOB=1:$nj $dir/log/macc.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-acc-mllt --rand-prune=$randprune  $dir/$x.mdl "$feats" ark:- $dir/$x.JOB.macc \
+        || exit 1;
+      est-mllt $dir/$x.mat.new $dir/$x.*.macc 2> $dir/log/mupdate.$x.log || exit 1;
+      gmm-transform-means  $dir/$x.mat.new $dir/$x.mdl $dir/$x.mdl \
+        2> $dir/log/transform_means.$x.log || exit 1;
+      compose-transforms --print-args=false $dir/$x.mat.new $dir/$cur_lda_iter.mat $dir/$x.mat || exit 1;
+      rm $dir/$x.*.macc
+    fi
+    feats="$splicedfeats transform-feats $dir/$x.mat ark:- ark:- |"
+    cur_lda_iter=$x
+  fi
+
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power \
+        $dir/$x.mdl "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.{mdl,mat,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $cur_lda_iter.mat $dir/final.mat
+
+# Summarize warning messages...
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done training system with LDA+MLLT features in $dir
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_mmi.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi.sh
new file mode 100755
index 00000000000..b4b976de199
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI training (or optionally boosted MMI, if you give the --boost option).
+# 4 iterations (by default) of Extended Baum-Welch update.
+#
+# For the numerator we have a fixed alignment rather than a lattice--
+# this actually follows from the way lattices are defined in Kaldi, which
+# is to have a single path for each word (output-symbol) sequence.
+
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+tau=400
+weight_tau=10
+acwt=0.1
+stage=0
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_mmi.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_mmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+cp $alidir/{final.mdl,tree} $dir
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up featuresl
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-rescore-lattice $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      gmm-acc-stats2 $cur_mdl "$feats" ark,s,cs:- \
+      $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+  # note: this tau value is for smoothing towards model parameters, not
+  # as in the Boosted MMI paper, not towards the ML stats as in the earlier
+  # work on discriminative training (e.g. my thesis).  
+  # You could use gmm-ismooth-stats to smooth to the ML stats, if you had
+  # them available [here they're not available if cancel=true].
+
+    $cmd $dir/log/update.$x.log \
+      gmm-est-gaussians-ebw --tau=$tau $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
+      gmm-est-weights-ebw --weight-tau=$weight_tau - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    rm $dir/{den,num}_acc.$x.acc
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.
+
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_fmmi.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_fmmi.sh
new file mode 100755
index 00000000000..b78ffa98f78
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_fmmi.sh
@@ -0,0 +1,221 @@
+#!/bin/bash
+# by Johns Hopkins University (Author: Daniel Povey), 2012.  Apache 2.0.
+
+# This script does MMI discriminative training, including
+# feature-space (like fMPE) and model-space components. 
+# If you give the --boost option it does "boosted MMI" (BMMI).
+# On the iterations of training it alternates feature-space
+# and model-space training.  We do 8 iterations in total--
+# 4 of each type ((B)MMI, f(B)MMI)
+
+
+# Begin configuration section.
+cmd=run.pl
+schedule="fmmi fmmi fmmi fmmi mmi mmi mmi mmi"
+boost=0.0
+learning_rate=0.01
+tau=400 # For model.  Note: we're doing smoothing "to the previous iteration",
+    # so --smooth-from-model so 400 seems like a more sensible default
+    # than 100.  We smooth to the previous iteration because now
+    # we are discriminatively training the features (and not using
+    # the indirect differential), so it seems like it wouldn't make 
+    # sense to use any element of ML.
+weight_tau=10 # for model weights.
+cancel=true # if true, cancel num and den counts as described in 
+     # the boosted MMI paper. 
+indirect=true # if true, use indirect derivative.
+acwt=0.1
+stage=-1
+ngselect=2; # Just the 2 top Gaussians.  Beyond that, adding more Gaussians
+            # wouldn't make much difference since the posteriors would be very small.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_mmi_fmmi.sh <data> <lang> <diag-ubm-dir> <ali-dir> <denlat-dir> <exp-dir>"
+  echo " e.g.: steps/train_mmi_fmmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm2d exp/tri2b_denlats_si84 exp/tri2b_fmmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1) ... boosted MMI."
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  echo "  --learning-rate                                  # learning rate for fMMI, default 0.01"
+  echo "  --schedule                                       # learning schedule: by default,"
+  echo "                                                   # \"fmmi mmi fmmi mmi fmmi mmi fmmi mmi\""
+  exit 1;
+fi
+
+
+data=$1
+lang=$2
+alidir=$3
+dubmdir=$4  # where diagonal UBM is.
+denlatdir=$5
+dir=$6
+
+silphonelist=`cat $lang/phones/silence.csl`
+mkdir -p $dir/log
+
+for f in $data/feats.scp $lang/phones.txt $dubmdir/final.dubm $alidir/final.mdl \
+    $alidir/ali.1.gz $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "Expected file $f to exist" && exit 1;
+done
+cp $alidir/final.mdl $alidir/tree $dir || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+# Note: $feats is the features before fMPE.
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+fmpefeats="$feats" # At first, the features "after fMPE" are the same as the 
+                   # base features.
+
+
+# Initialize the fMPE object.  Note: we call it .fmpe because
+# that's what it was called in the original paper, but since
+# we're using the MMI objective function, it's really fMMI.
+
+fmpe-init $dubmdir/final.dubm $dir/0.fmpe 2>$dir/log/fmpe_init.log || exit 1;
+
+
+if [ $stage -le -1 ]; then
+  # Get the gselect (Gaussian selection) info for fMPE.
+  # Note: fMPE object starts with GMM object, so can be read
+  # as one.
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$ngselect $dir/0.fmpe "$feats" \
+    "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+cp $alidir/final.mdl $dir/0.mdl
+
+x=0
+num_iters=`echo $schedule | wc -w`
+
+while [ $x -lt $num_iters ]; do
+  iter_type=`echo $schedule | cut -d ' ' -f $[$x+1]`
+  case $iter_type in 
+    fmmi)
+    echo "Iteration $x: doing fMMI"
+    if [ $stage -le $x ]; then
+      numpost="ark,s,cs:gunzip -c $alidir/ali.JOB.gz| ali-to-post ark:- ark:-|"
+        # Note: the command gmm-fmpe-acc-stats below requires the pre-fMPE features.
+      $cmd JOB=1:$nj $dir/log/acc_fmmi.$x.JOB.log \
+        gmm-rescore-lattice $dir/$x.mdl "$lats" "$fmpefeats" ark:- \| \
+        lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+        sum-post --scale1=-1 ark:- "$numpost" ark:- \| \
+        gmm-fmpe-acc-stats $dir/$x.mdl $dir/$x.fmpe "$feats" \
+        "ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" ark,s,cs:- \
+        $dir/$x.JOB.fmpe_acc || exit 1;
+      
+      ( fmpe-sum-accs $dir/$x.fmpe_acc $dir/$x.*.fmpe_acc && \
+        rm $dir/$x.*.fmpe_acc && \
+        fmpe-est --learning-rate=$learning_rate $dir/$x.fmpe $dir/$x.fmpe_acc $dir/$[$x+1].fmpe ) \
+        2>$dir/log/est_fmpe.$x.log || exit 1;
+    fi
+    # We need to set the features to use the correct fMPE object.
+    fmpefeats="$feats fmpe-apply-transform $dir/$[$x+1].fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |" 
+    rm $dir/$[x+1].mdl 2>/dev/null; ln -s $x.mdl $dir/$[$x+1].mdl # link previous model.
+    # Now, diagnostics.
+    objf_nf=`grep Overall $dir/log/acc_fmmi.$x.*.log | grep gmm-fmpe-acc-stats | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf, nf;}'`
+    objf=`echo $objf_nf | awk '{print $1}'`;
+    nf=`echo $objf_nf | awk '{print $2}'`;
+    impr=`grep Objf $dir/log/est_fmpe.$x.log | awk '{print $NF}'`
+    impr=`perl -e "print ($impr/$nf);"` # normalize by #frames.
+    echo On iter $x, objf was $objf, auxf improvement from fMMI was $impr | tee $dir/objf.$x.log
+    ;;
+    mmi) # MMI iteration.
+    echo "Iteration $x: doing MMI (getting stats)..."
+    # Get denominator stats...  For simplicity we rescore the lattice
+    # on all iterations, even though it shouldn't be necessary on the zeroth
+    # (but we want this script to work even if $alidir doesn't contain the
+    # model used to generate the lattice).
+    if [ $stage -le $x ]; then
+      $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+        gmm-rescore-lattice $dir/$x.mdl "$lats" "$fmpefeats" ark:- \| \
+        lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+        sum-post --merge=$cancel --scale1=-1 \
+        ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+        gmm-acc-stats2 $dir/$x.mdl "$fmpefeats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+      n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+      [ "$n" -ne $[$nj*2] ] && \
+        echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+      $cmd $dir/log/den_acc_sum.$x.log \
+        gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+      rm $dir/den_acc.$x.*.acc
+      $cmd $dir/log/num_acc_sum.$x.log \
+        gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+      rm $dir/num_acc.$x.*.acc
+
+      # note: this tau value is for smoothing to model parameters;
+      # you need to use gmm-ismooth-stats to smooth to the ML stats,
+      # but anyway this script does canceling of num and den stats on
+      # each frame (as suggested in the Boosted MMI paper) which would
+      # make smoothing to ML impossible without accumulating extra stats.
+      $cmd $dir/log/update.$x.log \
+        gmm-est-gaussians-ebw --tau=$tau $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
+        gmm-est-weights-ebw --weight-tau=$weight_tau - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    else 
+      echo "not doing this iteration because --stage=$stage"
+    fi
+  
+    # Some diagnostics.. note, this objf is somewhat comparable to the
+    # MMI objective function divided by the acoustic weight, and differences in it
+    # are comparable to the auxf improvement printed by the update program.
+    objf_nf=`grep Overall $dir/log/acc.$x.*.log | grep gmm-acc-stats2 | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf, nf;}'`
+    objf=`echo $objf_nf | awk '{print $1}'`;
+    nf=`echo $objf_nf | awk '{print $2}'`;
+    impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+    impr=`perl -e "print ($impr/$nf);"` # renormalize by "real" #frames, to correct
+    # for the canceling of stats.
+    echo On iter $x, objf was $objf, auxf improvement was $impr | tee $dir/objf.$x.log
+    rm $dir/$[x+1].fmpe 2>/dev/null; ln -s $x.fmpe $dir/$[$x+1].fmpe # link previous fMPE transform
+    ;;
+    *) echo "Invalid --schedule option: expected only mmi or fmmi.";
+  esac
+  x=$[$x+1]
+done
+
+echo "Succeeded with $num_iters iters iterations of MMI+fMMI training (boosting factor = $boost)"
+
+rm $dir/final.mdl 2>/dev/null; ln -s $num_iters.mdl $dir/final.mdl
+rm $dir/final.fmpe 2>/dev/null; ln -s $num_iters.fmpe $dir/final.fmpe 
+
+# Now do some cleanup.
+rm $dir/gselect.*.gz $dir/*.acc $dir/*.fmpe_acc
+exit 0;
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_fmmi_indirect.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_fmmi_indirect.sh
new file mode 100755
index 00000000000..2bed327a3a6
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_fmmi_indirect.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# by Johns Hopkins University (Author: Daniel Povey), 2012.  Apache 2.0.
+
+# This script does MMI discriminative training, including
+# feature-space (like fMPE) and model-space components. 
+# If you give the --boost option it does "boosted MMI" (BMMI).
+# On the iterations of training it alternates feature-space
+# and model-space training.  We do 8 iterations in total--
+# 4 of each type ((B)MMI, f(B)MMI)
+
+
+# Begin configuration section.
+cmd=run.pl
+schedule="fmmi mmi fmmi mmi fmmi mmi fmmi mmi"
+boost=0.0
+learning_rate=0.02
+tau=200 # For model.  Note: we're doing smoothing "to the previous iteration",
+    # so --smooth-from-model so 200 seems like a more sensible default
+    # than 100.  We smooth to the previous iteration because now
+    # we are discriminatively training the features (and not using
+    # the indirect differential), so it seems like it wouldn't make 
+    # sense to use any element of ML.
+cancel=true # if true, cancel num and den counts as described in 
+     # the boosted MMI paper. 
+indirect=true # if true, use indirect derivative.
+acwt=0.1
+stage=-1
+ngselect=2; # Just the 2 top Gaussians.  Beyond that, adding more Gaussians
+            # wouldn't make much difference since the posteriors would be very small.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_mmi_fmmi.sh <data> <lang> <diag-ubm-dir> <ali-dir> <denlat-dir> <exp-dir>"
+  echo " e.g.: steps/train_mmi_fmmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm2d exp/tri2b_denlats_si84 exp/tri2b_fmmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1) ... boosted MMI."
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  echo "  --learning-rate                                  # learning rate for fMMI, default 0.01"
+  echo "  --schedule                                       # learning schedule: by default,"
+  echo "                                                   # \"fmmi mmi fmmi mmi fmmi mmi fmmi mmi\""
+  exit 1;
+fi
+
+
+data=$1
+lang=$2
+alidir=$3
+dubmdir=$4  # where diagonal UBM is.
+denlatdir=$5
+dir=$6
+
+silphonelist=`cat $lang/phones/silence.csl`
+mkdir -p $dir/log
+
+for f in $data/feats.scp $lang/phones.txt $dubmdir/final.dubm $alidir/final.mdl \
+  $alidir/ali.1.gz $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "Expected file $f to exist" && exit 1;
+done
+cp $alidir/final.mdl $alidir/tree $dir || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+# Note: $feats is the features before fMPE.
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+fmpefeats="$feats" # At first, the features "after fMPE" are the same as the 
+                   # base features.
+
+
+# Initialize the fMPE object.  Note: we call it .fmpe because
+# that's what it was called in the original paper, but since
+# we're using the MMI objective function, it's really fMMI.
+
+fmpe-init $dubmdir/final.dubm $dir/0.fmpe 2>$dir/log/fmpe_init.log || exit 1;
+
+
+if [ $stage -le -1 ]; then
+  # Get the gselect (Gaussian selection) info for fMPE.
+  # Note: fMPE object starts with GMM object, so can be read
+  # as one.
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$ngselect $dir/0.fmpe "$feats" \
+    "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+cp $alidir/final.mdl $dir/0.mdl
+
+x=0
+num_iters=`echo $schedule | wc -w`
+
+while [ $x -lt $num_iters ]; do
+  iter_type=`echo $schedule | cut -d ' ' -f $[$x+1]`
+  case $iter_type in 
+    fmmi) fmmi_iter=true; local_cancel=false;;
+    mmi) fmmi_iter=false; local_cancel=$cancel;;
+    *) echo "Bad iteration type $iter_type"; exit 1;;
+  esac
+
+  echo "Getting MMI stats (needed for fMMI and MMI iterations).";
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-rescore-lattice $dir/$x.mdl "$lats" "$fmpefeats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$local_cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      gmm-acc-stats2 $dir/$x.mdl "$fmpefeats" ark,s,cs:- \
+      $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    rm $dir/.error 2>/dev/null
+    $cmd $dir/log/den_acc_sum.$x.log \
+      gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || touch $dir/.error &
+    $cmd $dir/log/num_acc_sum.$x.log \
+      gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || touch $dir/.error &
+    wait
+    [ -f $dir/.error ] && echo "Error summing accs" && exit 1;
+    rm $dir/den_acc.$x.*.acc
+    rm $dir/num_acc.$x.*.acc
+  fi
+
+  if $fmmi_iter; then
+    echo "Iteration $x: doing fMMI"
+    if [ $stage -le $x ]; then
+      # Get model derivative.  Note: the "ml accumulator" is the same as the "numerator"
+      # since this is MMI.  We avoided doing the "canceling of stats" on this iteration
+      # so that this would be true (this canceling wouldn't affect the derivative anyway,
+      # so can have no benefit for fMMI, unlike MMI).
+      $cmd $dir/log/get_stats_deriv.$x.log \
+        gmm-get-stats-deriv $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc \
+        $dir/num_acc.$x.acc $dir/model_deriv.$x.gmmacc
+      numpost="ark,s,cs:gunzip -c $alidir/ali.JOB.gz| ali-to-post ark:- ark:-|"
+        # Note: the command gmm-fmpe-acc-stats below requires the pre-fMPE features.
+      $cmd JOB=1:$nj $dir/log/acc_fmmi.$x.JOB.log \
+        gmm-rescore-lattice $dir/$x.mdl "$lats" "$fmpefeats" ark:- \| \
+        lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+        sum-post --merge=false --scale1=-1 ark:- "$numpost" ark:- \| \
+        gmm-fmpe-acc-stats --model-derivative=$dir/model_deriv.$x.gmmacc \
+          $dir/$x.mdl $dir/$x.fmpe "$feats" \
+         "ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" ark,s,cs:-  \
+         $dir/$x.JOB.fmpe_acc || exit 1;
+      
+      ( fmpe-sum-accs $dir/$x.fmpe_acc $dir/$x.*.fmpe_acc && \
+        rm $dir/$x.*.fmpe_acc && \
+        fmpe-est --learning-rate=$learning_rate $dir/$x.fmpe $dir/$x.fmpe_acc $dir/$[$x+1].fmpe ) \
+        2>$dir/log/est_fmpe.$x.log || exit 1;
+
+      fmpefeats="$feats fmpe-apply-transform $dir/$[$x+1].fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |" 
+      # OK, now we do one iteration of the "rescaling update" where we use the
+      # old and new ML accs, and we shift and rescale the model to match the new
+      # features.
+      $cmd JOB=1:$nj $dir/log/acc_ml.$x.JOB.log \
+        gmm-acc-stats-ali $dir/$x.mdl "$fmpefeats" "ark:gunzip -c $alidir/ali.JOB.gz|" \
+          $dir/new_ml_acc.$x.JOB.acc || exit 1;
+      $cmd $dir/log/new_ml_acc_sum.$x.log \
+        gmm-sum-accs $dir/new_ml_acc.$x.acc $dir/new_ml_acc.$x.*.acc || exit 1;
+      $cmd $dir/log/update_rescale.$x.log \
+        gmm-est-rescale $dir/$x.mdl $dir/num_acc.$x.acc $dir/new_ml_acc.$x.acc \
+        $dir/$[$x+1].mdl || exit 1;
+    fi
+    # We need to set the features to use the correct fMPE object.
+    # This is a repeat of a command above-- in case we didn't do this stage.
+    fmpefeats="$feats fmpe-apply-transform $dir/$[$x+1].fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.JOB.gz|' ark:- |" 
+    # Now, diagnostics.
+    objf_nf=`grep Overall $dir/log/acc_fmmi.$x.*.log | grep gmm-fmpe-acc-stats | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf, nf;}'`
+    objf=`echo $objf_nf | awk '{print $1}'`;
+    nf=`echo $objf_nf | awk '{print $2}'`;
+    impr=`grep Objf $dir/log/est_fmpe.$x.log | awk '{print $NF}'`
+    impr=`perl -e "print ($impr/$nf);"` # normalize by #frames.
+    echo On iter $x, objf was $objf, auxf improvement from fMMI was $impr | tee $dir/objf.$x.log
+  else # MMI iteration-- on this iteration do model-space update.
+    echo "Iteration $x: doing MMI update"
+      # note: this tau value is for smoothing to model parameters;
+      # you need to use gmm-ismooth-stats to smooth to the ML stats,
+      # but anyway this script does canceling of num and den stats on
+      # each frame (as suggested in the Boosted MMI paper) which would
+      # make smoothing to ML impossible without accumulating extra stats.
+    if [ $stage -le $x ]; then
+      $cmd $dir/log/update.$x.log \
+        gmm-est-gaussians-ebw --tau=$tau $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
+        gmm-est-weights-ebw - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    else 
+      echo "not doing this iteration because --stage=$stage"
+    fi
+    
+    # Some diagnostics.. note, this objf is somewhat comparable to the
+    # MMI objective function divided by the acoustic weight, and differences in it
+    # are comparable to the auxf improvement printed by the update program.
+    objf_nf=`grep Overall $dir/log/acc.$x.*.log | grep gmm-acc-stats2 | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf, nf;}'`
+    objf=`echo $objf_nf | awk '{print $1}'`;
+    nf=`echo $objf_nf | awk '{print $2}'`;
+    impr=`grep Overall $dir/log/update.$x.log | head -1 | awk '{print $10*$12;}'`
+    impr=`perl -e "print ($impr/$nf);"` # renormalize by "real" #frames, to correct
+    # for the canceling of stats.
+    echo On iter $x, objf was $objf, auxf improvement was $impr | tee $dir/objf.$x.log
+    rm $dir/$[x+1].fmpe 2>/dev/null; ln -s $x.fmpe $dir/$[$x+1].fmpe # link previous fMPE transform
+  fi
+  x=$[$x+1]
+done
+
+echo "Succeeded with $num_iters iters iterations of MMI+fMMI training (boosting factor = $boost)"
+
+rm $dir/final.mdl 2>/dev/null; ln -s $num_iters.mdl $dir/final.mdl
+rm $dir/final.fmpe 2>/dev/null; ln -s $num_iters.fmpe $dir/final.fmpe 
+
+# Now do some cleanup.
+rm $dir/gselect.*.gz $dir/*.acc $dir/*.fmpe_acc
+exit 0;
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_sgmm.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_sgmm.sh
new file mode 100755
index 00000000000..9f7b081ca82
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_sgmm.sh
@@ -0,0 +1,153 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI training (or optionally boosted MMI, if you give the --boost option),
+# for SGMMs.  4 iterations (by default) of Extended Baum-Welch update.
+#
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+acwt=0.1
+stage=0
+
+update_opts=
+transform_dir=
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_mmi_sgmm.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_mmi_sgmm.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."  
+  echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms."
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+cp $alidir/splice_opts $dir 2>/dev/null
+echo $nj > $dir/num_jobs
+
+cp $alidir/{final.mdl,tree} $dir
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up featuresl
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \
+    && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "$0: no fMLLR transforms."
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $alidir"
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors."
+  spkvecs_opt=
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  echo "$0: using Gaussian-selection info from $alidir"
+  gselect_opt="--gselect=ark:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: error: no Gaussian-selection info found" && exit 1;
+fi
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      sgmm-acc-stats2 "$gselect_opt" $spkvecs_opt $cur_mdl "$feats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      sgmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      sgmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+    $cmd $dir/log/update.$x.log \
+     sgmm-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.  Note: this code is same as in train_mmi.sh
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_sgmm2.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_sgmm2.sh
new file mode 100755
index 00000000000..ef45769fbbf
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_mmi_sgmm2.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI training (or optionally boosted MMI, if you give the --boost option),
+# for SGMMs.  4 iterations (by default) of Extended Baum-Welch update.
+#
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+acwt=0.1
+stage=0
+update_opts=
+transform_dir=
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_mmi_sgmm2.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_mmi_sgmm2.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."  
+  echo "  --transform-dir <transform-dir>                  # directory to find fMLLR transforms."
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+cp $alidir/{final.mdl,tree} $dir
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up featuresl
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+if [ ! -z "$transform_dir" ]; then
+  echo "$0: using transforms from $transform_dir"
+  [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \
+    && exit 1;
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
+else
+  echo "$0: no fMLLR transforms."
+fi
+
+if [ -f $alidir/vecs.1 ]; then
+  echo "$0: using speaker vectors from $alidir"
+  spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk"
+else
+  echo "$0: no speaker vectors."
+  spkvecs_opt=
+fi
+
+if [ -f $alidir/gselect.1.gz ]; then
+  echo "$0: using Gaussian-selection info from $alidir"
+  gselect_opt="--gselect=ark:gunzip -c $alidir/gselect.JOB.gz|"
+else
+  echo "$0: error: no Gaussian-selection info found" && exit 1;
+fi
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MMI training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      sgmm2-rescore-lattice "$gselect_opt" $spkvecs_opt $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
+      sum-post --merge=$cancel --scale1=-1 \
+      ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \
+      sgmm2-acc-stats2 "$gselect_opt" $spkvecs_opt $cur_mdl "$feats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      sgmm2-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      sgmm2-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+    $cmd $dir/log/update.$x.log \
+     sgmm2-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.  Note: this code is same as in train_mmi.sh
+  tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while(<STDIN>) { if(m/sgmm2-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames.  Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1);  $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MMI training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_mono.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_mono.sh
new file mode 100755
index 00000000000..41aab425c15
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_mono.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+
+# To be run from ..
+# Flat start and monophone training, with delta-delta features.
+# This script applies cepstral mean normalization (per speaker).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=40    # Number of iterations of training
+max_iter_inc=30 # Last iter to increase #Gauss on.
+totgauss=1000 # Target #Gaussians.  
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+realign_iters="1 2 3 4 5 6 7 8 9 10 12 14 16 18 20 23 26 29 32 35 38";
+config= # name of config file.
+stage=-4
+power=0.2 # exponent to determine number of gaussians from occurrence counts
+feat_dim=39
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+  echo "Usage: steps/train_mono.sh [options] <data-dir> <lang-dir> <exp-dir>"
+  echo " e.g.: steps/train_mono.sh data/train.1k data/lang exp/mono"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --nj <nj>                                        # number of parallel jobs"
+  echo "  --feat_dim <dim>                                 # dimension of feature vector (39)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  exit 1;
+fi
+
+data=$1
+lang=$2
+dir=$3
+
+oov_sym=`cat $lang/oov.int` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+
+feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
+example_feats="`echo '$feats' | sed s/JOB/1/g`";
+
+echo "$0: Initializing monophone system."
+
+[ ! -f $lang/phones/sets.int ] && exit 1;
+shared_phones_opt="--shared-phones=$lang/phones/sets.int"
+
+if [ $stage -le -3 ]; then
+# Note: JOB=1 just uses the 1st part of the features-- we only need a subset anyway.
+  $cmd JOB=1 $dir/log/init.log \
+    gmm-init-mono $shared_phones_opt "--train-feats=$feats subset-feats --n=10 ark:- ark:-|" $lang/topo $feat_dim \
+    $dir/0.mdl $dir/tree || exit 1;
+fi
+
+numgauss=`gmm-info --print-args=false $dir/0.mdl | grep gaussians | awk '{print $NF}'`
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
+
+if [ $stage -le -2 ]; then
+  echo "$0: Compiling training graphs"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst \
+    "ark:sym2int.pl --map-oov $oov_sym -f 2- $lang/words.txt < $sdata/JOB/text|" \
+    "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: Aligning data equally (pass 0)"
+  $cmd JOB=1:$nj $dir/log/align.0.JOB.log \
+    align-equal-compiled "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" ark,t:-  \| \
+    gmm-acc-stats-ali --binary=true $dir/0.mdl "$feats" ark:- \
+    $dir/0.JOB.acc || exit 1;
+fi
+
+# In the following steps, the --min-gaussian-occupancy=3 option is important, otherwise
+# we fail to est "rare" phones and later on, they never align properly.
+
+if [ $stage -le 0 ]; then
+  gmm-est --min-gaussian-occupancy=3  --mix-up=$numgauss --power=$power \
+    $dir/0.mdl "gmm-sum-accs - $dir/0.*.acc|" $dir/1.mdl 2> $dir/log/update.0.log || exit 1;
+  rm $dir/0.*.acc
+fi
+
+
+beam=6 # will change to 10 below after 1st pass
+# note: using slightly wider beams for WSJ vs. RM.
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: Pass $x"
+  if [ $stage -le $x ]; then
+    if echo $realign_iters | grep -w $x >/dev/null; then
+      echo "$0: Aligning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$[$beam*4] "$mdl" \
+        "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" \
+        || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" "ark:gunzip -c $dir/ali.JOB.gz|" \
+      $dir/$x.JOB.acc || exit 1;
+
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+  fi
+  if [ $x -le $max_iter_inc ]; then
+     numgauss=$[$numgauss+$incgauss];
+  fi
+  beam=10
+  x=$[$x+1]
+done
+
+( cd $dir; rm final.{mdl,occs} 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
+
+# example of showing the alignments:
+# show-alignments data/lang/phones.txt $dir/30.mdl "ark:gunzip -c $dir/ali.0.gz|" | head -4
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_mpe.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_mpe.sh
new file mode 100755
index 00000000000..0808dea6a27
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_mpe.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# MMI training (or optionally boosted MMI, if you give the --boost option).
+# 4 iterations (by default) of Extended Baum-Welch update.
+#
+# For the numerator we have a fixed alignment rather than a lattice--
+# this actually follows from the way lattices are defined in Kaldi, which
+# is to have a single path for each word (output-symbol) sequence.
+
+# Begin configuration section.
+cmd=run.pl
+num_iters=4
+boost=0.0
+cancel=true # if true, cancel num and den counts on each frame.
+tau=400
+weight_tau=10
+acwt=0.1
+stage=0
+smooth_to_mode=true
+# End configuration section
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: steps/train_mmi.sh <data> <lang> <ali> <denlats> <exp>"
+  echo " e.g.: steps/train_mmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi"
+  echo "Main options (for others, see top of script file)"
+  echo "  --boost <boost-weight>                           # (e.g. 0.1), for boosted MMI.  (default 0)"
+  echo "  --cancel (true|false)                            # cancel stats (true by default)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  echo "  --tau                                            # tau for i-smooth to last iter (default 200)"
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+denlatdir=$4
+dir=$5
+mkdir -p $dir/log
+
+for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+nj=`cat $alidir/num_jobs` || exit 1;
+[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \
+  echo "$alidir and $denlatdir have different num-jobs" && exit 1;
+
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+cp $alidir/{final.mdl,tree} $dir
+
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+# Set up features
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+[ -f $alidir/trans.1 ] && echo Using transforms from $alidir && \
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+
+lats="ark:gunzip -c $denlatdir/lat.JOB.gz|"
+if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then
+  lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |"
+fi
+
+
+cur_mdl=$alidir/final.mdl
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Iteration $x of MPE training"
+  # Note: the num and den states are accumulated at the same time, so we
+  # can cancel them per frame.
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-rescore-lattice $cur_mdl "$lats" "$feats" ark:- \| \
+      lattice-to-mpe-post --acoustic-scale=$acwt $cur_mdl \
+        "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- ark:- \| \
+      gmm-acc-stats2 $cur_mdl "$feats" ark,s,cs:- \
+        $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1;
+
+    n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`;
+    [ "$n" -ne $[$nj*2] ] && \
+      echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1;
+    $cmd $dir/log/den_acc_sum.$x.log \
+      gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
+    rm $dir/den_acc.$x.*.acc
+    $cmd $dir/log/num_acc_sum.$x.log \
+      gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
+    rm $dir/num_acc.$x.*.acc
+
+  # note: this tau value is for smoothing towards model parameters, not
+  # as in the Boosted MMI paper, not towards the ML stats as in the earlier
+  # work on discriminative training (e.g. my thesis).  
+  # You could use gmm-ismooth-stats to smooth to the ML stats, if you had
+  # them available [here they're not available if cancel=true].
+    if ! $smooth_to_model; then
+      echo "Iteration $x of MPE: computing ml (smoothing) stats"
+      $cmd JOB=1:$nj $dir/log/acc_ml.$x.JOB.log \
+        gmm-acc-stats $cur_mdl "$feats" \
+          "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" \
+          $dir/ml.$x.JOB.acc || exit 1;
+      $cmd $dir/log/acc_ml_sum.$x.log \
+        gmm-sum-accs $dir/ml.$x.acc $dir/ml.$x.*.acc || exit 1;
+      rm $dir/ml.$x.*.acc
+      num_stats="gmm-ismooth-stats --tau=$tau $dir/ml.$x.acc $dir/num_acc.$x.acc -|"
+    else 
+      num_stats="gmm-ismooth-stats --smooth-from-model=true --tau=$tau $cur_mdl $dir/num_acc.$x.acc -|"
+    fi  
+    
+    $cmd $dir/log/update.$x.log \
+      gmm-est-gaussians-ebw $cur_mdl "$num_stats" $dir/den_acc.$x.acc - \| \
+      gmm-est-weights-ebw - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
+    rm $dir/{den,num}_acc.$x.acc
+  fi
+  cur_mdl=$dir/$[$x+1].mdl
+
+  # Some diagnostics: the objective function progress and auxiliary-function
+  # improvement.
+
+ tail -n 50 $dir/log/acc.$x.*.log | perl -e 'while(<STDIN>) { if(m/lattice-to-mpe-post.+Overall average frame-accuracy is (\S+) over (\S+) frames/) { $tot_objf += $1*$2; $tot_frames += $2; }} $tot_objf /= $tot_frames; print "$tot_objf $tot_frames\n"; ' > $dir/tmpf
+  objf=`cat $dir/tmpf | awk '{print $1}'`;
+  nf=`cat $dir/tmpf | awk '{print $2}'`;
+  rm $dir/tmpf
+  impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'`
+  impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames.
+  # This gives us a projected objective function improvement.
+  echo "Iteration $x: objf was $objf, MPE auxf change was $impr" | tee $dir/objf.$x.log
+  x=$[$x+1]
+done
+
+echo "MPE training finished"
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+exit 0;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_nnet.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_nnet.sh
new file mode 100755
index 00000000000..dde713fbbd1
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_nnet.sh
@@ -0,0 +1,284 @@
+#!/bin/bash
+
+# Copyright 2012  Karel Vesely (Brno University of Technology)
+# Apache 2.0
+
+# Begin configuration.
+cmd=run.pl
+
+# nnet config
+model_size=3000000 # nr. of parameteres in MLP
+hid_layers=2      # nr. of hidden layers (prior to sotfmax or bottleneck)
+bn_dim=           # set value to get a bottleneck network
+hid_dim=          # set this to override the $model_size
+mlp_init=         # set this to override MLP initialization
+# training config
+learn_rate=0.008  # initial learning rate
+momentum=0.0      # momentum
+l1_penalty=0.0     # L1 regualrization constant (lassoo)
+l2_penalty=0.0     # L2 regualrization constant (weight decay)
+# data processing config
+bunch_size=256     # size of the training block
+cache_size=16384   # size of the randimizatio cache
+randomize=true    # do the frame level randomization
+# feature config
+norm_vars=false # normalize the FBANKs (CVN)
+splice_lr=15    # temporal splicing
+feat_type=traps
+dct_basis=16    # nr. od DCT basis
+# scheduling config
+min_iters=    # set to enforce minimum number of iterations
+max_iters=20  # maximum number of iterations
+start_halving_inc=0.5 # frm-accuracy improvement to begin learn_rate reduction
+end_halving_inc=0.1   # frm-accuracy improvement to terminate the training
+halving_factor=0.5    # factor to multiply learn_rate
+# tool config
+TRAIN_TOOL="nnet-train-xent-hardlab-frmshuff" # training tool used for training / cross validation
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+
+if [ $# != 6 ]; then
+   echo "Usage: $0 <data-train> <data-dev> <lang-dir> <ali-train> <ali-dev> <exp-dir>"
+   echo " e.g.: $0 data/train data/cv data/lang exp/mono_ali exp/mono_ali_cv exp/mono_nnet"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --config <config-file>                           # config containing options"
+   exit 1;
+fi
+
+data=$1
+data_cv=$2
+lang=$3
+alidir=$4
+alidir_cv=$5
+dir=$6
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $alidir_cv/ali.1.gz $data/feats.scp $data_cv/feats.scp $data/cmvn.scp $data_cv/cmvn.scp; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+echo "$0 [info]: Training Neural Network"
+printf "\t dir       : $dir \n"
+printf "\t Train-set : $data $alidir \n"
+printf "\t CV-set    : $data_cv $alidir_cv \n"
+
+mkdir -p $dir/{log,nnet}
+
+###### PREPARE ALIGNMENTS ######
+echo "Preparing alignments"
+#convert ali to pdf
+labels_tr="ark:$dir/ali_train.pdf"
+ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" $labels_tr 2> $dir/ali_train.pdf_log || exit 1
+if [[ "$alidir" == "$alidir_cv" ]]; then
+  labels=$labels_tr
+else
+  #convert ali to pdf (cv set)
+  labels_cv="ark:$dir/ali_cv.pdf"
+  ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir_cv/ali.*.gz |" $labels_cv 2> $dir/ali_cv.pdf_log || exit 1
+  #merge the two parts (scheduler expects one file in $labels)
+  labels="ark:$dir/ali_train_and_cv.pdf"
+  cat $dir/ali_train.pdf $dir/ali_cv.pdf > $dir/ali_train_and_cv.pdf
+fi
+
+#get the priors, count the class examples from alignments
+analyze-counts --binary=false $labels_tr $dir/ali_train.counts 2>$dir/ali_train.counts_log || exit 1
+#copy the old transition model, will be needed by decoder
+copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl 2>$dir/final.mdl_log || exit 1
+cp $alidir/tree $dir/tree || exit 1
+
+#analyze the train/cv alignments
+utils/nnet/analyze_alignments.sh "TRAINING SET" "ark:gunzip -c $alidir/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_train
+utils/nnet/analyze_alignments.sh "VALIDATION SET" "ark:gunzip -c $alidir_cv/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_cv
+
+
+###### PREPARE FEATURES ######
+# shuffle the list
+echo "Preparing train/cv lists"
+cat $data/feats.scp | utils/shuffle_list.pl ${seed:-777} > $dir/train.scp
+cp $data_cv/feats.scp $dir/cv.scp
+# print the list sizes
+wc -l $dir/train.scp $dir/cv.scp
+
+#get feature dim
+echo -n "Getting feature dim : "
+feat_dim=$(feat-to-dim --print-args=false scp:$dir/train.scp -)
+echo $feat_dim
+
+#add per-speaker CMVN
+echo "Will use CMVN statistics : $data/cmvn.scp, $data_cv/cmvn.scp"
+cmvn="scp:$data/cmvn.scp"
+cmvn_cv="scp:$data_cv/cmvn.scp"
+feats_tr="ark:apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk $cmvn scp:$dir/train.scp ark:- |"
+feats_cv="ark:apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data_cv/utt2spk $cmvn_cv scp:$dir/cv.scp ark:- |"
+# keep track of norm_vars option
+echo "$norm_vars" >$dir/norm_vars 
+
+#add splicing
+splice_opts="--left-context=$splice_lr --right-context=$splice_lr"
+feats_tr="$feats_tr splice-feats --print-args=false $splice_opts ark:- ark:- |"
+feats_cv="$feats_cv splice-feats --print-args=false $splice_opts ark:- ark:- |"
+# keep track of splice_opts
+echo "$splice_opts" >$dir/splice_opts
+
+#choose further processing of spliced features
+echo "Feature type : $feat_type"
+case $feat_type in
+  plain)
+  ;;
+  traps)
+    #generate hamming+dct transform
+    transf=$dir/hamm_dct.mat
+    echo "Preparing Hamming DCT transform : $transf"
+    utils/nnet/gen_hamm_mat.py --fea-dim=$feat_dim --splice=$splice_lr > $dir/hamm.mat
+    utils/nnet/gen_dct_mat.py --fea-dim=$feat_dim --splice=$splice_lr --dct-basis=$dct_basis > $dir/dct.mat
+    compose-transforms --binary=false $dir/dct.mat $dir/hamm.mat $transf 2>${transf}_log || exit 1
+    #convert transform to NNET format
+    {
+      echo "<biasedlinearity> $((feat_dim*dct_basis)) $((feat_dim*(2*splice_lr+1)))"
+      cat $transf
+      echo -n ' [ '
+      for i in $(seq $((feat_dim*dct_basis))); do echo -n '0 '; done
+      echo ']'
+    } > $transf.net
+    #append transform to features
+    feats_tr="$feats_tr nnet-forward --print-args=false --silent=true $transf.net ark:- ark:- |"
+    feats_cv="$feats_cv nnet-forward --print-args=false --silent=true $transf.net ark:- ark:- |"
+  ;;
+  transf)
+    transf=$dir/final.mat
+    [ ! -f $alidir/final.mat ] && echo "Missing transform $alidir/final.mat" && exit 1;
+    cp $alidir/final.mat $transf
+    echo "Copied transform $transf"
+    feats_tr="$feats_tr transform-feats $transf ark:- ark:- |"
+    feats_cv="$feats_cv transform-feats $transf ark:- ark:- |"
+  ;;
+  transf-sat)
+    echo yet unimplemented...
+    exit 1;
+  ;;
+  *)
+    echo "Unknown feature type $feat_type"
+    exit 1;
+  ;;
+esac
+# keep track of feat_type
+echo $feat_type > $dir/feat_type
+
+#renormalize the MLP input to zero mean and unit variance
+cmvn_g="$dir/cmvn_glob.mat"
+echo "Renormalizing MLP input features by : $cmvn_g"
+compute-cmvn-stats --binary=false "$feats_tr" $cmvn_g 2>${cmvn_g}_log || exit 1
+feats_tr="$feats_tr apply-cmvn --print-args=false --norm-vars=true $cmvn_g ark:- ark:- |"
+feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=true $cmvn_g ark:- ark:- |"
+
+
+###### INITIALIZE THE NNET ######
+
+if [ "" != "$mlp_init" ]; then
+  echo "Using pre-initalized netwk $mlp_init";
+else
+  echo -n "Initializng MLP : "
+  num_fea=$((feat_dim*dct_basis))
+  num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
+  # What is the topology?
+  if [ "" == "$bn_dim" ]; then #MLP w/o bottleneck
+    case "$hid_layers" in
+      1) #3-layer MLP
+        if [ "" != "$hid_dim" ]; then
+          num_hid=$hid_dim
+        else
+          num_hid=$((model_size/(num_fea+num_tgt)))
+        fi
+        mlp_init=$dir/nnet_${num_fea}_${num_hid}_${num_tgt}.init
+        echo " $mlp_init"
+        utils/nnet/gen_mlp_init.py --dim=${num_fea}:${num_hid}:${num_tgt} \
+          --gauss --negbias --seed=777 > $mlp_init
+        ;;
+      2|3|4|5|6|7|8|9|10) #(>3)-layer MLP
+        if [ "" != "$hid_dim" ]; then
+          num_hid=$hid_dim
+        else
+          a=$((hid_layers-1))
+          b=$((num_fea+num_tgt))
+          c=$((-model_size))
+          num_hid=$(awk "BEGIN{ num_hid= -$b/(2*$a) + sqrt($b^2 -4*$a*$c)/(2*$a); print int(num_hid) }") 
+        fi
+        mlp_init=$dir/nnet_${num_fea}
+        dim_arg=${num_fea}
+        for i in $(seq $hid_layers); do
+          mlp_init=${mlp_init}_$num_hid
+          dim_arg=${dim_arg}:${num_hid}
+        done
+        mlp_init=${mlp_init}_${num_tgt}.init
+        dim_arg=${dim_arg}:${num_tgt}
+        echo " $mlp_init"
+        utils/nnet/gen_mlp_init.py --dim=${dim_arg} --gauss --negbias --seed=777 > $mlp_init
+        ;;
+      *)
+        echo "Unsupported number of hidden layers $hid_layers"
+        exit 1;
+    esac
+  else #bn-system
+    num_bn=$bn_dim
+    case "$hid_layers" in # ie. number of layers in front of bottleneck
+      1) #5-layer MLP
+        if [ "" != "$hid_dim" ]; then
+          num_hid=$hid_dim
+        else
+          num_hid=$((model_size/(num_fea+num_tgt+(2*num_bn))))
+        fi
+        mlp_init=$dir/nnet_${num_fea}_${num_hid}_${num_bn}_${num_hid}_${num_tgt}.init
+        echo " $mlp_init"
+        utils/nnet/gen_mlp_init.py --dim=${num_fea}:${num_hid}:${num_bn}:${num_hid}:${num_tgt} --gauss --negbias --seed=777 --linBNdim=$num_bn > $mlp_init
+        ;;
+      2|3|4|5|6|7|8|9|10) #(>5)-layer MLP
+        if [ "" != "$hid_dim" ]; then
+          num_hid=$hid_dim
+        else
+          a=$((hid_layers-1))
+          b=$((num_fea+2*num_bn+num_tgt))
+          c=$((-model_size))
+          num_hid=$(awk "BEGIN{ num_hid= -$b/(2*$a) + sqrt($b^2 -4*$a*$c)/(2*$a); print int(num_hid) }") 
+        fi
+        mlp_init=$dir/nnet_${num_fea}
+        dim_arg=${num_fea}
+        for i in $(seq $hid_layers); do
+          mlp_init=${mlp_init}_$num_hid
+          dim_arg=${dim_arg}:${num_hid}
+        done
+        mlp_init=${mlp_init}_${num_bn}lin_${num_hid}_${num_tgt}.init
+        dim_arg=${dim_arg}:${num_bn}:${num_hid}:${num_tgt}
+        echo " $mlp_init"
+        utils/nnet/gen_mlp_init.py --dim=${dim_arg} --gauss --negbias --seed=777 --linBNdim=$num_bn > $mlp_init
+        ;;
+      *)
+        echo "Unsupported number of hidden layers $hid_layers"
+        exit 1;
+    esac
+  fi
+fi
+
+
+
+###### TRAIN ######
+echo "Starting training : "
+source utils/nnet/train_nnet_scheduler.sh
+echo "Training finished."
+echo
+if [ "" == "$mlp_final" ]; then
+  echo "No final network returned!";
+  exit 1;
+else
+  ( cd $dir; ln -s nnet/${mlp_final##*/} final.nnet; )
+  echo "Final network $mlp_final linked to $dir/final.nnet";
+fi
+
+echo "Succeeded training the Neural Network : $dir/final.nnet"
+
+
+
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_quick.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_quick.sh
new file mode 100755
index 00000000000..80638b3c8c4
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_quick.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# Train a model on top of existing features (no feature-space learning of any
+# kind is done).  This script initializes the model from each stage of the
+# previous system's model, judging the similarities based on overlap of counts
+# in the tree stats.
+
+# Begin configuration..
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 15"; # Only realign twice.
+num_iters=20    # Number of iterations of training
+maxiterinc=15 # Last iter to increase #Gauss on.
+batch_size=750 # batch size to use while compiling graphs... memory/speed tradeoff.
+beam=10 # alignment beam.
+retry_beam=40
+stage=-5
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_quick.sh <num-leaves> <num-gauss> <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_quick.sh 2500 15000 data/train_si284 data/lang exp/tri3c_ali_si284 exp/tri4b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+# Set various variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl`
+numgauss=$[totgauss/2] # Start with half the total number of Gaussians.  We won't have
+  # to mix up much probably, as we're initializing with the old (already mixed-up) pdf's.  
+[ $numgauss -lt $numleaves ] && numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
+nj=`cat $alidir/num_jobs` || exit 1;
+sdata=$data/split$nj
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+
+mkdir -p $dir/log
+echo $nj >$dir/num_jobs
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  ln.pl $alidir/trans.* $dir # Link them to dest dir.
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -5 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: Initializing the model"
+
+  # The gmm-init-model command (with more than the normal # of command-line args)
+  # will initialize the p.d.f.'s to the p.d.f.'s in the alignment model.
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/tmp.mdl $alidir/tree $alidir/final.mdl  \
+    2>$dir/log/init_model.log || exit 1;
+
+  grep 'no stats' $dir/log/init_model.log && echo "$0: This is a bad warning.";
+  rm $dir/treeacc
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: mixing up old model."
+  # We do both mixing-down and mixing-up to get the target #Gauss in each state,
+  # since the initial model may have either more or fewer Gaussians than we want.
+  gmm-mixup --mix-down=$numgauss --mix-up=$numgauss $dir/tmp.mdl $dir/1.occs $dir/1.mdl \
+    2> $dir/log/mixup.log || exit 1;
+  rm $dir/tmp.mdl 
+fi
+
+# Convert alignments to the new tree.
+if [ $stage -le -1 ]; then
+  echo "$0: converting old alignments"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: compiling training graphs"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs --batch-size=$batch_size $dir/tree $dir/1.mdl $lang/L.fst  \
+    "ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \
+    "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: pass $x"
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo "$0: aligning data"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/$x.mdl \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" \
+      || exit 1;
+  fi
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|"  $dir/$x.JOB.acc || exit 1;
+    [ "`ls $dir/$x.*.acc | wc -w`" -ne "$nj" ] && echo "$0: wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs
+  fi
+  [[ $x -le $maxiterinc ]] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: estimating alignment model"
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ "`ls $dir/$x.*.acc | wc -w`" -ne "$nj" ] && echo "$0: wrong #accs" && exit 1;
+
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl || exit 1;
+  rm $dir/$x.*.acc
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+rm $dir/final.mdl 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+
+echo Done
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_sat.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_sat.sh
new file mode 100755
index 00000000000..b9356cf2d6f
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_sat.sh
@@ -0,0 +1,238 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# This does Speaker Adapted Training (SAT), i.e. train on
+# fMLLR-adapted features.  It can be done on top of either LDA+MLLT, or
+# delta and delta-delta features.  If there are no transforms supplied
+# in the alignment directory, it will estimate transforms itself before
+# building the tree (and in any case, it estimates transforms a number
+# of times during training).
+
+
+# Begin configuration section.
+stage=-5
+fmllr_update_type=full
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+realign_iters="10 20 30";
+fmllr_iters="2 4 6 12";
+silence_weight=0.0 # Weight on silence in fMLLR estimation.
+num_iters=35   # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_sat.sh <#leaves> <#gauss> <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_sat.sh 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc]  # per-iter #gauss increment
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+
+echo $nj >$dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+# Set up features.
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+## Set up speaker-independent features.
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+## Get initial fMLLR transforms (possibly from alignment dir)
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: Using transforms from $alidir"
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$alidir
+else 
+  if [ $stage -le -4 ]; then
+    echo "$0: obtaining initial fMLLR transforms since not present in $alidir"
+    $cmd JOB=1:$nj $dir/log/fmllr.0.JOB.log \
+      ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $alidir/final.mdl "$sifeats" \
+      ark:- ark:$dir/trans.JOB || exit 1;
+  fi
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$dir
+fi
+
+if [ $stage -le -3 ]; then
+  # Get tree stats.
+  echo "$0: Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  gmm-init-model  --write-occs=$dir/1.occs  \
+    $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+  grep 'no stats' $dir/log/init_model.log && echo "$0: This is a bad warning.";
+
+  rm $dir/treeacc
+fi
+
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+   echo Pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+
+  if echo $fmllr_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo Estimating fMLLR transforms
+      # We estimate a transform that's additional to the previous transform;
+      # we'll compose them.
+      $cmd JOB=1:$nj $dir/log/fmllr.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+        weight-silence-post $silence_weight $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+        --spk2utt=ark:$sdata/JOB/spk2utt $dir/$x.mdl \
+        "$feats" ark:- ark:$dir/tmp_trans.JOB || exit 1;
+      for n in `seq $nj`; do
+        ! ( compose-transforms --b-is-affine=true \
+          ark:$dir/tmp_trans.$n ark:$cur_trans_dir/trans.$n ark:$dir/composed_trans.$n \
+          && mv $dir/composed_trans.$n $dir/trans.$n && \
+          rm $dir/tmp_trans.$n ) 2>$dir/log/compose_transforms.$x.log \
+          && echo "$0: Error composing transforms" && exit 1;
+      done
+    fi
+    feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+    cur_trans_dir=$dir
+  fi
+  
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --power=$power --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs 
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+
+if [ $stage -le $x ]; then
+  # Accumulate stats for "alignment model"-- this model is
+  # computed with the speaker-independent features, but matches Gaussian-for-Gaussian
+  # with the final speaker-adapted model.
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+  # Update model.
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --power=$power --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl  || exit 1;
+  rm $dir/$x.*.acc
+fi
+
+rm $dir/final.{mdl,alimdl,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $x.alimdl $dir/final.alimdl
+
+
+
+utils/summarize_warnings.pl $dir/log
+(
+  echo "$0: Likelihood evolution:"
+  for x in `seq $[$num_iters-1]`; do
+    tail -n 30 $dir/log/acc.$x.*.log | awk '/Overall avg like/{l += $(NF-3)*$(NF-1); t += $(NF-1); }
+        /Overall average logdet/{d += $(NF-3)*$(NF-1); t2 += $(NF-1);} 
+        END{ d /= t2; l /= t; printf("%s ", d+l); } '
+  done
+  echo
+) | tee $dir/log/summary.log
+
+echo Done
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_sgmm.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_sgmm.sh
new file mode 100755
index 00000000000..8c866a3961a
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_sgmm.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# SGMM training, with speaker vectors.  This script would normally be called on
+# top of fMLLR features obtained from a conventional system, but it also works
+# on top of any type of speaker-independent features (based on
+# deltas+delta-deltas or LDA+MLLT).  For more info on SGMMs, see the paper "The
+# subspace Gaussian mixture model--A structured model for speech recognition".
+# (Computer Speech and Language, 2011).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+stage=-6
+context_opts= # e.g. set it to "--context-width=5 --central-position=2"  for a
+# quinphone system.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=25   # Total number of iterations
+num_iters_alimdl=3 # Number of iterations for estimating alignment model.
+max_iter_inc=15 # Last iter to increase #substates on.
+realign_iters="5 10 15"; # Iters to realign on. 
+spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on.
+increase_dim_iters="6 8"; # Iters on which to increase phn dim and/or spk dim;
+   # rarely necessary, and if it is, only the 1st will normally be necessary.
+rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training.
+phn_dim=  # You can use this to set the phonetic subspace dim. [default: feat-dim+1]
+spk_dim=  # You can use this to set the speaker subspace dim. [default: feat-dim]
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+beam=8
+retry_beam=40
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 7 ]; then
+  echo "Usage: steps/train_sgmm.sh <num-leaves> <num-substates> <data> <lang> <ali-dir> <ubm> <exp-dir>"
+  echo " e.g.: steps/train_sgmm.sh 3500 10000 data/train_si84 data/lang \\"
+  echo "                      exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  exit 1;
+fi
+
+
+num_leaves=$1
+totsubstates=$2
+data=$3
+lang=$4
+alidir=$5
+ubm=$6
+dir=$7
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+numsubstates=$num_leaves # Initial #-substates.
+incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates
+feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1;
+[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric.
+[ -z $phn_dim ] && phn_dim=$[$feat_dim+1]
+[ -z $spk_dim ] && spk_dim=$feat_dim
+nj=`cat $alidir/num_jobs` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+cp $alidir/splice_opts $dir 2>/dev/null
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+spkvecs_opt=  # Empty option for now, until we estimate the speaker vectors.
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -6 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -5 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree --verbose=1 --max-leaves=$num_leaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Initializing the model"  
+  # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims
+  # will be truncated on initialization.
+  $cmd $dir/log/init_sgmm.log \
+    sgmm-init --phn-space-dim=$phn_dim --spk-space-dim=$spk_dim $lang/topo \
+    $dir/tree $ubm $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm-gselect $dir/0.mdl "$feats" \
+    "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: compiling training graphs"
+  text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text|"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+    "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: Converting alignments" 
+  $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \
+    convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \
+    "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+x=0
+while [ $x -lt $num_iters ]; do
+   echo "$0: training pass $x ... "
+   if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+     echo "$0: re-aligning data"
+     $cmd JOB=1:$nj $dir/log/align.$x.JOB.log  \
+       sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \
+       --utt2spk=ark:$sdata/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \
+       $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+       "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+   fi
+   if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then
+     if [ $stage -le $x ]; then
+       $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \
+         ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+         weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+         sgmm-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata/JOB/spk2utt \
+         $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \
+         ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1;
+     fi
+     spkvecs_opt[$n]="--spk-vecs=ark:$dir/vecs.JOB"
+   fi  
+   if [ $x -eq 0 ]; then
+     flags=vwcSt # on the first iteration, don't update projections M or N
+   elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then 
+     # Update N if we have speaker-vector space and x is odd,
+     # and we've already updated the speaker vectors...
+     flags=vNwcSt
+   else
+     # otherwise update M.
+     flags=vMwcSt
+   fi
+   
+   if [ $stage -le $x ]; then
+     $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+       sgmm-acc-stats $spkvecs_opt --utt2spk=ark:$sdata/JOB/utt2spk \
+       --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \
+       $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \
+       $dir/$x.JOB.acc || exit 1;
+   fi
+
+   # The next option is needed if the user specifies a phone or speaker sub-space
+   # dimension that's higher than the "normal" one.
+   increase_dim_opts=
+   if echo $increase_dim_iters | grep -w $x >/dev/null; then
+     increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim"
+     # Note: the command below might have a null effect on some iterations.
+     if [ $spk_dim -gt $feat_dim ]; then 
+       cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \
+         copy-vector --print-args=false --change-dim=$spk_dim \
+         ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \
+         mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1;
+     fi
+   fi
+
+   if [ $stage -le $x ]; then
+     $cmd $dir/log/update.$x.log \
+       sgmm-est --update-flags=$flags --split-substates=$numsubstates $increase_dim_opts \
+         --power=$power --write-occs=$dir/$[$x+1].occs $dir/$x.mdl "sgmm-sum-accs - $dir/$x.*.acc|" \
+       $dir/$[$x+1].mdl || exit 1;
+     rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+   fi
+   
+   if [ $x -lt $max_iter_inc ]; then
+     numsubstates=$[$numsubstates+$incsubstates]
+   fi
+   x=$[$x+1];
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ $spk_dim -gt 0 ]; then
+  # We need to create an "alignment model" that's been trained
+  # without the speaker vectors, to do the first-pass decoding with.
+  # in test time.
+
+  # We do this for a few iters, in this recipe.
+  final_mdl=$dir/$x.mdl
+  cur_alimdl=$dir/$x.mdl
+  while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do
+    echo "$0: building alignment model (pass $x)"
+    if [ $x -eq $num_iters ]; then # 1st pass of building alimdl.
+      flags=MwcS # don't update v the first time.  Note-- we never update transitions.
+      # they wouldn't change anyway as we use the same alignment as previously.
+    else
+      flags=vMwcS
+    fi
+    if [ $stage -le $x ]; then
+      $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \
+        --utt2spk=ark:$sdata/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \
+        sgmm-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \
+          $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1;
+      $cmd $dir/log/update_ali.$x.log \
+        sgmm-est --update-flags=$flags --remove-speaker-space=true --power=$power $cur_alimdl \
+        "sgmm-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1;
+      rm $dir/$x.*.aliacc || exit 1;
+      [ $x -gt $num_iters ]  && rm $dir/$x.alimdl
+    fi
+    cur_alimdl=$dir/$[$x+1].alimdl
+    x=$[$x+1]
+  done
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_sgmm2.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_sgmm2.sh
new file mode 100755
index 00000000000..9cfce3ae6ab
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_sgmm2.sh
@@ -0,0 +1,292 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# SGMM training, with speaker vectors.  This script would normally be called on
+# top of fMLLR features obtained from a conventional system, but it also works
+# on top of any type of speaker-independent features (based on
+# deltas+delta-deltas or LDA+MLLT).  For more info on SGMMs, see the paper "The
+# subspace Gaussian mixture model--A structured model for speech recognition".
+# (Computer Speech and Language, 2011).
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+stage=-6 # use this to resume partially finished training 
+context_opts= # e.g. set it to "--context-width=5 --central-position=2"  for a
+# quinphone system.
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+num_iters=25   # Total number of iterations of training
+num_iters_alimdl=3 # Number of iterations for estimating alignment model.
+max_iter_inc=15 # Last iter to increase #substates on.
+realign_iters="5 10 15"; # Iters to realign on. 
+spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on.
+increase_iters="6 10 14"; # Iters on which to increase phn dim and/or spk dim;
+    # rarely necessary, and if it is, only the 1st will normally be necessary.
+rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training.
+               # Bigger -> more pruning; zero = no pruning.
+phn_dim=  # You can use this to set the phonetic subspace dim. [default: feat-dim+1]
+spk_dim=  # You can use this to set the speaker subspace dim. [default: feat-dim]
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+beam=8
+self_weight=0.9
+retry_beam=40
+leaves_per_group=5 # Relates to the SCTM (state-clustered tied-mixture) aspect:
+                   # average number of pdfs in a "group" of pdfs.
+update_m_iter=4
+spk_dep_weights=true # [Symmetric SGMM] set this to false if you don't want "u" (i.e. to turn off
+                      # symmetric SGMM.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 7 ]; then
+  echo "Usage: steps/train_sgmm2.sh <num-leaves> <num-substates> <data> <lang> <ali-dir> <ubm> <exp-dir>"
+  echo " e.g.: steps/train_sgmm2.sh 5000 8000 data/train_si84 data/lang \\"
+  echo "                      exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  echo "  --leaves-per-group <#leaves>                     # Average #leaves shared in one group"
+  exit 1;
+fi
+
+num_pdfs=$1  # final #leaves, at 2nd level of tree.
+totsubstates=$2
+data=$3
+lang=$4
+alidir=$5
+ubm=$6
+dir=$7
+
+num_groups=$[$num_pdfs/$leaves_per_group]
+first_spkvec_iter=`echo $spkvec_iters | awk '{print $1}'` || exit 1;
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+silphonelist=`cat $lang/phones/silence.csl`
+if [ "$self_weight" == "1.0" ]; then
+  numsubstates=$num_groups # Initial #-substates.
+else
+  numsubstates=$num_pdfs # Initial #-substates.
+fi
+incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates
+feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1;
+[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric.
+[ -z $phn_dim ] && phn_dim=$[$feat_dim+1]
+[ -z $spk_dim ] && spk_dim=$feat_dim
+nj=`cat $alidir/num_jobs` || exit 1;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+spkvecs_opt=  # Empty option for now, until we estimate the speaker vectors.
+gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+
+if [ $stage -le -6 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats  --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -5 ]; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree-two-level --binary=false --verbose=1 --max-leaves-first=$num_groups \
+     --max-leaves-second=$num_pdfs $dir/treeacc $lang/phones/roots.int \
+     $dir/questions.qst $lang/topo $dir/tree $dir/pdf2group.map || exit 1;
+fi
+
+if [ $stage -le -4 ]; then
+  echo "$0: Initializing the model"  
+  # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims
+  # will be truncated on initialization.
+  $cmd $dir/log/init_sgmm.log \
+    sgmm2-init --spk-dep-weights=$spk_dep_weights --self-weight=$self_weight \
+       --pdf-map=$dir/pdf2group.map --phn-space-dim=$phn_dim \
+       --spk-space-dim=$spk_dim $lang/topo $dir/tree $ubm $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -3 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    sgmm2-gselect $dir/0.mdl "$feats" \
+    "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: compiling training graphs"
+  text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text|"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs $dir/tree $dir/0.mdl  $lang/L.fst  \
+    "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "$0: converting alignments" 
+  $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \
+    convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \
+    "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+
+x=0
+while [ $x -lt $num_iters ]; do
+   echo "$0: training pass $x ... "
+   if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+     echo "$0: re-aligning data"
+     $cmd JOB=1:$nj $dir/log/align.$x.JOB.log  \
+       sgmm2-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \
+       --utt2spk=ark:$sdata/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \
+       $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+       "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+   fi
+   if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then
+     if [ $stage -le $x ]; then
+       $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \
+         ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+         weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+         sgmm2-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata/JOB/spk2utt \
+         $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \
+         ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1;
+     fi
+     spkvecs_opt="--spk-vecs=ark:$dir/vecs.JOB"
+   fi  
+   if [ $x -eq 0 ]; then
+     flags=vwcSt # on the first iteration, don't update projections M or N
+   elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge $first_spkvec_iter ]; then 
+     # Update N if we have speaker-vector space and x is odd,
+     # and we've already updated the speaker vectors...
+     flags=vNwSct
+   else
+     if [ $x -ge $update_m_iter ]; then
+       flags=vMwSct # udpate M.
+     else
+       flags=vwSct # no M on early iters, if --update-m-iter option given.
+     fi
+   fi
+   $spk_dep_weights && [ $x -ge $first_spkvec_iter ] && flags=${flags}u; # update 
+   # spk-weight projections "u".
+   
+   if [ $stage -le $x ]; then
+     $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+       sgmm2-acc-stats $spkvecs_opt --utt2spk=ark:$sdata/JOB/utt2spk \
+       --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \
+       $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \
+       $dir/$x.JOB.acc || exit 1;
+   fi
+
+   # The next option is needed if the user specifies a phone or speaker sub-space
+   # dimension that's higher than the "normal" one.
+   increase_dim_opts=
+   if echo $increase_dim_iters | grep -w $x >/dev/null; then
+     increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim"
+     # Note: the command below might have a null effect on some iterations.
+     if [ $spk_dim -gt $feat_dim ]; then 
+       cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \
+         copy-vector --print-args=false --change-dim=$spk_dim \
+         ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \
+         mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1;
+     fi
+   fi
+
+   if [ $stage -le $x ]; then
+     $cmd $dir/log/update.$x.log \
+       sgmm2-est --update-flags=$flags --split-substates=$numsubstates \
+       $increase_dim_opts --power=$power --write-occs=$dir/$[$x+1].occs \
+       $dir/$x.mdl "sgmm2-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl || exit 1;
+     rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null
+   fi
+   if [ $x -lt $max_iter_inc ]; then
+     numsubstates=$[$numsubstates+$incsubstates]
+   fi
+   x=$[$x+1];
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+if [ $spk_dim -gt 0 ]; then
+  # We need to create an "alignment model" that's been trained
+  # without the speaker vectors, to do the first-pass decoding with.
+  # in test time.
+
+  # We do this for a few iters, in this recipe.
+  final_mdl=$dir/$x.mdl
+  cur_alimdl=$dir/$x.mdl
+  while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do
+    echo "$0: building alignment model (pass $x)"
+    if [ $x -eq $num_iters ]; then # 1st pass of building alimdl.
+      flags=MwcS # don't update v the first time.  Note-- we never update transitions.
+      # they wouldn't change anyway as we use the same alignment as previously.
+    else
+      flags=vMwcS
+    fi
+    if [ $stage -le $x ]; then
+      $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        sgmm2-post-to-gpost $spkvecs_opt "$gselect_opt" \
+         --utt2spk=ark:$sdata/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \
+        sgmm2-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \
+          $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1;
+      $cmd $dir/log/update_ali.$x.log \
+        sgmm2-est --update-flags=$flags --remove-speaker-space=true --power=$power \
+        $cur_alimdl "sgmm2-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1;
+      rm $dir/$x.*.aliacc || exit 1;
+      [ $x -gt $num_iters ]  && rm $dir/$x.alimdl
+    fi
+    cur_alimdl=$dir/$[$x+1].alimdl
+    x=$[$x+1]
+  done
+  rm $dir/final.alimdl 2>/dev/null 
+  ln -s $x.alimdl $dir/final.alimdl
+fi
+
+utils/summarize_warnings.pl $dir/log
+
+echo Done
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/train_ubm.sh b/egs/kaldi-vystadial-recipe/s5/steps/train_ubm.sh
new file mode 100755
index 00000000000..768025e25c6
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/train_ubm.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This trains a UBM (i.e. a mixture of Gaussians), by clustering
+# the Gaussians from a trained HMM/GMM system and then doing a few
+# iterations of UBM training.
+# We mostly use this for SGMM systems.
+
+# Begin configuration section.
+nj=4
+cmd=run.pl
+silence_weight=  # You can set it to e.g. 0.0, to weight down silence in training.
+stage=-2
+num_gselect1=50 # first stage of Gaussian-selection
+num_gselect2=25 # second stage.
+intermediate_num_gauss=2000
+num_iters=3
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 5 ]; then
+  echo "Usage: steps/train_ubm.sh <num-gauss> <data> <lang> <ali-dir> <exp>"
+  echo " e.g.: steps/train_ubm.sh 400 data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm3c"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --silence-weight <sil-weight>                    # weight for silence (e.g. 0.5 or 0.0)"
+  echo "  --num-iters <#iters>                             # Number of iterations of E-M"
+  exit 1;
+fi
+
+num_gauss=$1
+data=$2
+lang=$3
+alidir=$4
+dir=$5
+
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+if [ $[$num_gauss*2] -gt $intermediate_num_gauss ]; then
+  echo "intermediate_num_gauss was too small $intermediate_num_gauss"
+  intermediate_num_gauss=$[$num_gauss*2];
+  echo "setting it to $intermediate_num_gauss"
+fi
+
+
+# Set various variables.
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+
+## Set up features.
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
+  lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir    
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: using transforms from $alidir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+fi
+##
+
+if [ ! -z "$silence_weight" ]; then
+  weights_opt="--weights='ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- | weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- | post-to-weights ark:- ark:- |'"
+else
+  weights_opt=
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: clustering model $alidir/final.mdl to get initial UBM"
+  $cmd $dir/log/cluster.log \
+    init-ubm --intermediate-num-gauss=$intermediate_num_gauss --ubm-num-gauss=$num_gauss \
+    --verbose=2 --fullcov-ubm=true $alidir/final.mdl $alidir/final.occs \
+    $dir/0.ubm   || exit 1;
+fi
+
+# Do initial phase of Gaussian selection and save it to disk -- later on we'll
+# do more Gaussian selection to further prune, as the model changes.
+
+
+if [ $stage -le -1 ]; then
+  echo "$0: doing Gaussian selection"
+  $cmd JOB=1:$nj $dir/log/gselect.JOB.log \
+    gmm-gselect --n=$num_gselect1 "fgmm-global-to-gmm $dir/0.ubm - |" "$feats" \
+    "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
+fi
+
+
+x=0
+while [ $x -lt $num_iters ]; do
+  echo "Pass $x"
+  $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+    gmm-gselect --n=$num_gselect2 "--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" \
+    "fgmm-global-to-gmm $dir/$x.ubm - |" "$feats" ark:- \| \
+    fgmm-global-acc-stats $weights_opt --gselect=ark,s,cs:- $dir/$x.ubm "$feats" \
+    $dir/$x.JOB.acc || exit 1;
+  lowcount_opt="--remove-low-count-gaussians=false"
+  [ $[$x+1] -eq $num_iters ] && lowcount_opt=   # Only remove low-count Gaussians 
+  # on last iter-- we can't do it earlier, or the Gaussian-selection info would
+  # be mismatched.
+  $cmd $dir/log/update.$x.log \
+    fgmm-global-est $lowcount_opt --verbose=2 $dir/$x.ubm "fgmm-global-sum-accs - $dir/$x.*.acc |" \
+      $dir/$[$x+1].ubm || exit 1;
+  rm $dir/$x.*.acc $dir/$x.ubm
+  x=$[$x+1]
+done
+
+rm $dir/gselect.*.gz
+rm $dir/final.ubm 2>/dev/null
+mv $dir/$x.ubm $dir/final.ubm || exit 1;
diff --git a/egs/kaldi-vystadial-recipe/s5/steps/word_align_lattices.sh b/egs/kaldi-vystadial-recipe/s5/steps/word_align_lattices.sh
new file mode 100755
index 00000000000..2adcfdec606
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/steps/word_align_lattices.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright Johns Hopkins University (Author: Daniel Povey)  2012
+# Apache 2.0.
+
+# Begin configuration section.
+silence_label=0
+cmd=run.pl
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+for x in `seq 2`; do
+  [ "$1" == "--silence-label" ] && silence_label=$2 && shift 2;
+  [ "$1" == "--cmd" ] && cmd="$2" && shift 2;
+done
+
+if [ $# != 3 ]; then
+   echo "Word-align lattices (make the arcs sync up with words)"
+   echo ""
+   echo "Usage: scripts/walign_lats.sh [options] <lang-dir> <decode-dir-in> <decode-dir-out>"
+   echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--silence-label <integer-id-of-silence-word>]"
+   exit 1;
+fi
+
+. ./path.sh || exit 1;
+
+lang=$1
+indir=$2
+outdir=$3
+
+mdl=`dirname $indir`/final.mdl
+wbfile=$lang/phones/word_boundary.int
+
+for f in $mdl $wbfile $indir/num_jobs; do
+  [ ! -f $f ] && echo "word_align_lattices.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $outdir/log
+
+
+cp $indir/num_jobs $outdir;
+nj=`cat $indir/num_jobs`
+
+$cmd JOB=1:$nj $outdir/log/align.JOB.log \
+  lattice-align-words --silence-label=$silence_label --test=true \
+   $wbfile $mdl "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c >$outdir/lat.JOB.gz" || exit 1;
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/add_disambig.pl b/egs/kaldi-vystadial-recipe/s5/utils/add_disambig.pl
new file mode 100755
index 00000000000..c605659e105
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/add_disambig.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Adds some specified number of disambig symbols to a symbol table.
+# Adds these as #1, #2, etc.
+# If the --include-zero option is specified, includes an extra one
+# #0.
+
+$include_zero = 0;
+if($ARGV[0] eq "--include-zero") {
+    $include_zero = 1;
+    shift @ARGV;
+}
+
+if(@ARGV != 2) {
+    die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
+}
+
+
+$input = $ARGV[0];
+$nsyms = $ARGV[1];
+
+open(F, "<$input") || die "Opening file $input";
+
+while(<F>) {
+    @A = split(" ", $_);
+    @A == 2 || die "Bad line $_";
+    $lastsym = $A[1];
+    print;
+}
+
+if(!defined($lastsym)){
+ die "Empty symbol file?";
+}
+
+if($include_zero) {
+    $lastsym++;
+    print "#0  $lastsym\n";
+}
+
+for($n = 1; $n <= $nsyms; $n++) {
+    $y = $n + $lastsym;
+    print "#$n  $y\n";
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/add_lex_disambig.pl b/egs/kaldi-vystadial-recipe/s5/utils/add_lex_disambig.pl
new file mode 100755
index 00000000000..9f9054e1795
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/add_lex_disambig.pl
@@ -0,0 +1,101 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Adds disambiguation symbols to a lexicon.
+# Outputs still in the normal lexicon format.
+# Disambig syms are numbered #1, #2, #3, etc. (#0 
+# reserved for symbol in grammar).
+# Outputs the number of disambig syms to the standard output.
+
+if(@ARGV != 2) {
+    die "Usage: add_lex_disambig.pl  lexicon.txt lexicon_disambig.txt "
+}
+
+
+$lexfn = shift @ARGV;
+$lexoutfn = shift @ARGV;
+
+open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
+
+# (1)  Read in the lexicon.
+@L = ( );
+while(<L>) {
+    @A = split(" ", $_);
+    push @L, join(" ", @A);
+}
+
+# (2) Work out the count of each phone-sequence in the
+# lexicon.
+
+foreach $l (@L) {
+    @A = split(" ", $l);
+    shift @A; # Remove word.
+    $count{join(" ",@A)}++;
+}
+
+# (3) For each left sub-sequence of each phone-sequence, note down
+# that exists (for identifying prefixes of longer strings).
+
+foreach $l (@L) {
+    @A = split(" ", $l);
+    shift @A; # Remove word.
+    while(@A > 0) {
+        pop @A;  # Remove last phone
+        $issubseq{join(" ",@A)} = 1;
+    }
+}
+
+# (4) For each entry in the lexicon:
+#  if the phone sequence is unique and is not a
+#  prefix of another word, no diambig symbol.
+#  Else output #1, or #2, #3, ... if the same phone-seq
+#  has already been assigned a disambig symbol.
+
+
+open(O, ">$lexoutfn") || die "Opening lexicon file $lexoutfn for writing.\n";
+
+$max_disambig = 0;
+foreach $l (@L) {
+    @A = split(" ", $l);
+    $word = shift @A;
+    $phnseq = join(" ",@A);
+    if(!defined $issubseq{$phnseq}
+       && $count{$phnseq}==1) {
+        ; # Do nothing.
+    } else {
+        if($phnseq eq "") { # need disambig symbols for the empty string
+            # that are not use anywhere else.
+            $max_disambig++;
+            $reserved{$max_disambig} = 1;
+            $phnseq = "#$max_disambig";
+        } else {
+            $curnumber = $disambig_of{$phnseq};
+            if(!defined{$curnumber}) { $curnumber = 0; }
+            $curnumber++; # now 1 or 2, ... 
+            while(defined $reserved{$curnumber} ) { $curnumber++; } # skip over reserved symbols
+            if($curnumber > $max_disambig) {
+                $max_disambig = $curnumber;
+            }
+            $disambig_of{$phnseq} = $curnumber;
+            $phnseq = $phnseq . " #" . $curnumber;
+         }
+    }
+    print O "$word\t$phnseq\n";
+}
+
+print $max_disambig . "\n";
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/apply_map.pl b/egs/kaldi-vystadial-recipe/s5/utils/apply_map.pl
new file mode 100755
index 00000000000..4f89d584b36
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/apply_map.pl
@@ -0,0 +1,54 @@
+#!/usr/bin/perl -w
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This program is a bit like ./sym2int.pl in that it applies a map
+# to things in a file, but it's a bit more general in that it doesn't
+# assume the things being mapped to are single tokens, they could
+# be sequences of tokens.  
+
+# This program takes two arguments, which may be files or "-" for the
+# standard input.  Both files must have lines with one or more fields,
+# interpreted as a map from the first field (a string) to a list of strings.
+# if the first file has as one of its lines
+# A x y
+# and the second has the lines
+# x P
+# y Q R
+# then the output of this program will be
+# A P Q R
+# 
+# Note that if x or y did not appear as the first field of file b, we would
+# print a warning and omit the whole line rather than map it to the empty
+# string.
+
+if(@ARGV != 1) {
+  print STDERR "Usage: apply_map.pl map <input >output\n" .
+    "e.g.: echo A B | apply_map.pl <a.txt\n" .
+    "where a.txt is:\n" .
+    "A a1 a2\n" .
+    "B b\n" .
+    "will produce:\n" .
+    "a1 a2 b\n";
+}
+
+($map) = @ARGV;
+open(M, "<$map") || die "Error opening map file $map: $!";
+
+while (<M>) {
+  @A = split(" ", $_);
+  @A >= 1 || die "apply_map.pl: empty line.";
+  $i = shift @A;
+  $o = join(" ", @A);
+  $map{$i} = $o;
+}
+
+while(<STDIN>) {
+  @A = split(" ", $_);
+  for ($x = 0; $x < @A; $x++) {
+    $a = $A[$x];
+    if (!defined $map{$a}) { die "compose_maps.pl: undefined key $a\n"; }
+    $A[$x] = $map{$a};
+  }
+  print join(" ", @A) . "\n";
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/best_wer.sh b/egs/kaldi-vystadial-recipe/s5/utils/best_wer.sh
new file mode 100755
index 00000000000..126d59bb87a
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/best_wer.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# To be run from one directory above this script.
+
+perl -e 'while(<>){ 
+    if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
+    elsif (m: (Mean|Sum/Avg|)\s+\|\s+\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
+        && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } }  # sclite.
+   if (defined $bestline){ print $bestline; } '
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/combine_data.sh b/egs/kaldi-vystadial-recipe/s5/utils/combine_data.sh
new file mode 100755
index 00000000000..7b2e2062336
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/combine_data.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+if [ $# -le 2 ]; then
+  echo "Usage: combine_data.sh <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
+  exit 1
+fi
+
+dest=$1;
+shift;
+
+first_src=$1;
+
+mkdir -p $dest;
+
+export LC_ALL=C
+
+for file in utt2spk feats.scp text cmvn.scp segments reco2file_and_channel wav.scp; do
+  if [ -f $first_src/$file ]; then
+    ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
+    echo "$0: combined $file"
+  else
+    echo "$0 [info]: not combining $file as it does not exist"
+  fi
+done
+
+utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
+
+utils/fix_data_dir.sh $dest || exit 1;
+
+exit 0
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/convert_ctm.pl b/egs/kaldi-vystadial-recipe/s5/utils/convert_ctm.pl
new file mode 100755
index 00000000000..7676a1d1321
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/convert_ctm.pl
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+# This takes as standard input a ctm file that's "relative to the utterance",
+# i.e. times are measured relative to the beginning of the segments, and it
+# uses a "segments" file (format:
+# utterance-id recording-id start-time end-time
+# ) and a "reco2file_and_channel" file (format:
+# recording-id basename-of-file
+
+if (@ARGV < 2 || @ARGV > 3) {
+  print STDERR "Usage: convert_ctm.pl <segments-file> <reco2file_and_channel-file> [<utterance-ctm>] > real-ctm\n";
+  exit(1);
+}
+
+$segments = shift @ARGV;
+$reco2file_and_channel = shift @ARGV;
+
+open(S, "<$segments") || die "opening segments file $segments";
+while(<S>) {
+  @A = split(" ", $_);
+  @A == 4 || die "Bad line in segments file: $_";
+  ($utt, $recording_id, $begin_time, $end_time) = @A;
+  $utt2reco{$utt} = $recording_id;
+  $begin{$utt} = $begin_time;
+  $end{$utt} = $end_time;
+}
+close(S);
+open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel";
+while(<R>) {
+  @A = split(" ", $_);
+  @A == 3 || die "Bad line in reco2file_and_channel file: $_";
+  ($recording_id, $file, $channel) = @A;
+  $reco2file{$recording_id} = $file;
+  $reco2channel{$recording_id} = $channel;
+}
+
+
+# Now process the ctm file, which is either the standard input or the third
+# command-line argument.
+while(<>) {
+  @A= split(" ", $_);
+  ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_";
+  # lines look like:
+  # <utterance-id> 1 <begin-time> <length> <word> [ confidence ]
+  ($utt, $one, $wbegin, $wlen, $w, $conf) = @A;
+  $reco = $utt2reco{$utt};
+  if (!defined $reco) { die "Utterance-id $utt not defined in segments file $segments"; }
+  $file = $reco2file{$reco};
+  $channel = $reco2channel{$reco};
+  if (!defined $file || !defined $channel) { 
+    die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel"; 
+  }
+  $b = $begin{$utt};
+  $e = $end{$utt};
+  $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording.
+  $wbegin_r = sprintf("%.2f", $wbegin_r);
+  $wlen = sprintf("%.2f", $wlen);
+  if (defined $conf) {
+    $line = "$file $channel $wbegin_r $wlen $w $conf\n"; 
+  } else {
+    $line = "$file $channel $wbegin_r $wlen $w\n"; 
+  }
+  if ($wbegin_r + $wlen > $e + 0.01) {
+    print STDERR "Warning: word appears to be past end of recording; line is $line";
+  }
+  print $line; # goes to stdout.
+}
+
+__END__
+
+# Test example [also test it without the 0.5's]
+echo utt reco 10.0 20.0 > segments
+echo reco file A > reco2file_and_channel
+echo utt 1 8.0 1.0 word 0.5 > ctm_in
+echo file A 18.00 1.00 word 0.5 > ctm_out
+utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error
+rm segments reco2file_and_channel ctm_in ctm_out
+
+
+
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/eps2disambig.pl b/egs/kaldi-vystadial-recipe/s5/utils/eps2disambig.pl
new file mode 100755
index 00000000000..fecbdc83368
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/eps2disambig.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script replaces epsilon with #0 on the input side only, of the G.fst
+# acceptor.  
+
+while(<>){
+    s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
+    print;
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/filter_scp.pl b/egs/kaldi-vystadial-recipe/s5/utils/filter_scp.pl
new file mode 100755
index 00000000000..dfe4b13d14d
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/filter_scp.pl
@@ -0,0 +1,41 @@
+#!/usr/bin/perl -w
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script takes a list of utterance-ids or any file whose first field
+# of each line is an utterance-id, and filters an scp
+# file (or any file whose first field is an utterance id), printing
+# out only those lines whose first field is in id_list.
+
+if(@ARGV < 1 || @ARGV > 2) {
+    die "Usage: filter_scp.pl id_list [in.scp] > out.scp ";
+}
+
+$idlist = shift @ARGV;
+open(F, "<$idlist") || die "Could not open id-list file $idlist";
+while(<F>) {
+    @A = split;
+    @A>=1 || die "Invalid id-list file line $_";
+    $seen{$A[0]} = 1;
+}
+
+while(<>) {
+    @A = split;
+    @A > 0 || die "Invalid scp file line $_";
+    if($seen{$A[0]}) {
+        print $_;
+    }
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/find_arpa_oovs.pl b/egs/kaldi-vystadial-recipe/s5/utils/find_arpa_oovs.pl
new file mode 100755
index 00000000000..abd63f65e7a
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/find_arpa_oovs.pl
@@ -0,0 +1,64 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+if (  @ARGV < 1 && @ARGV > 2) {
+    die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
+    # This program finds words in the arpa file that are not symbols
+    # in the OpenFst-format symbol table words.txt.  It prints them
+    # on the standard output, one per line.
+}
+
+$symtab = shift @ARGV;
+open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
+while(<S>){ 
+    @A = split(" ", $_);
+    @A == 2 || die "Bad line in symbol table file: $_";
+    $seen{$A[0]} = 1;
+}
+
+$curgram=0;
+while(<>) { # Find the \data\ marker.
+    if(m:^\\data\\$:) { last; }
+}
+while(<>) {
+    if(m/^\\(\d+)\-grams:\s*$/) {
+        $curgram = $1;
+        if($curgram > 1) {
+            last; # This is an optimization as we can get the vocab from the 1-grams
+        }
+    } elsif($curgram > 0) {
+        @A = split(" ", $_);
+        if(@A > 1) {
+            shift @A;
+            for($n=0;$n<$curgram;$n++) {
+                $word = $A[$n];
+                if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
+                $in_arpa{$word} = 1;
+            }
+        } else {
+            if(@A > 0 && $A[0] !~ m:\\end\\:) {
+                print STDERR "Unusual line $_ (line $.) in arpa file\n";
+            }
+        }
+    }
+}
+
+foreach $w (keys %in_arpa) {
+    if(!defined $seen{$w} && $w ne "<s>" && $w ne "</s>") {
+        print "$w\n";
+    }
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/fix_data_dir.sh b/egs/kaldi-vystadial-recipe/s5/utils/fix_data_dir.sh
new file mode 100755
index 00000000000..d8b937ac21b
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/fix_data_dir.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# This script makes sure that only the segments present in 
+# all of "feats.scp", "wav.scp" [if present], segments[if prsent]
+# text, and utt2spk are present in any of them.
+# It puts the original contents of data-dir into 
+# data-dir/.backup
+
+if [ $# != 1 ]; then
+  echo "Usage: fix_data_dir.sh data-dir"
+  exit 1
+fi
+
+data=$1
+mkdir -p $data/.backup
+
+[ ! -d $data ] && echo "$0: no such directory $data" && exit 1;
+
+[ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1;
+
+cat $data/utt2spk | awk '{print $1}' > $data/utts
+
+# Do a check.
+export LC_ALL=C
+! cat $data/utt2spk | sort | cmp - $data/utt2spk && \
+   echo "utt2spk is not in sorted order (fix this yourelf)" && exit 1;
+
+! cat $data/utt2spk | sort -k2 | cmp - $data/utt2spk && \
+   echo "utt2spk is not in sorted order when sorted first on speaker-id " && \
+   echo "(fix this by making speaker-ids prefixes of utt-ids)" && exit 1;
+
+! cat $data/spk2utt | sort | cmp - $data/spk2utt && \
+   echo "spk2utt is not in sorted order (fix this yourelf)" && exit 1;
+
+maybe_wav=
+[ ! -f $data/segments ] && maybe_wav=wav  # wav indexed by utts only if segments does not exist.
+for x in feats.scp text segments $maybe_wav; do
+  if [ -f $data/$x ]; then
+     utils/filter_scp.pl $data/$x $data/utts > $data/utts.tmp
+     mv $data/utts.tmp $data/utts
+  fi
+done
+[ ! -s $data/utts ] && echo "fix_data_dir.sh: no utterances remained: not doing anything." && \
+   rm $data/utts && exit 1;
+
+nutts=`cat $data/utts | wc -l`
+if [ -f $data/feats.scp ]; then
+  nfeats=`cat $data/feats.scp | wc -l`
+else
+  nfeats=0
+fi
+ntext=`cat $data/text | wc -l`
+if [ "$nutts" -ne "$nfeats" -o "$nutts" -ne "$ntext" ]; then
+  echo "fix_data_dir.sh: kept $nutts utterances, vs. $nfeats features and $ntext transcriptions."
+else
+  echo "fix_data_dir.sh: kept all $nutts utterances."
+fi
+
+for x in utt2spk feats.scp text segments $maybe_wav; do
+  if [ -f $data/$x ]; then
+     mv $data/$x $data/.backup/$x
+     utils/filter_scp.pl $data/utts $data/.backup/$x > $data/$x
+  fi
+done
+
+
+if [ -f $data/segments ]; then
+  awk '{print $2}' $data/segments | sort | uniq > $data/reco # reco means the id's of the recordings.
+  [ -f $data/wav.scp ] && mv $data/wav.scp $data/.backup/ && \
+    utils/filter_scp.pl $data/reco $data/.backup/wav.scp >$data/wav.scp
+  [ -f $data/reco2file_and_channel ] && mv $data/reco2file_and_channel $data/.backup/ && \
+    utils/filter_scp.pl $data/reco $data/.backup/reco2file_and_channel >$data/reco2file_and_channel
+  rm $data/reco
+fi
+
+utils/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
+
+rm $data/utts
+
+echo "fix_data_dir.sh: old files are kept in $data/.backup"
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/format_lm.sh b/egs/kaldi-vystadial-recipe/s5/utils/format_lm.sh
new file mode 100755
index 00000000000..b6ba4ce7d1c
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/format_lm.sh
@@ -0,0 +1,84 @@
+#!/bin/bash -u
+
+# Copyright 2012  Arnab Ghoshal
+# Copyright 2010-2011  Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+
+if [ $# -ne 4 ]; then
+  printf "Usage: %s lang_dir LM lexicon out_dir\n" `basename $0`
+  echo "  Convert ARPA-format language models to FSTs.";
+  exit 1;
+fi
+
+lang_dir=$1
+lm=$2
+lexicon=$3
+out_dir=$4
+mkdir -p $out_dir
+
+[ -f ./path.sh ] && . ./path.sh
+
+echo "Converting '$lm' to FST"
+
+for f in phones.txt words.txt L.fst L_disambig.fst phones/; do
+  cp -r $lang_dir/$f $out_dir
+done
+
+lm_base=$(basename $lm '.gz')
+gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \
+  > $out_dir/oovs_${lm_base}.txt
+
+# Removing all "illegal" combinations of <s> and </s>, which are supposed to 
+# occur only at being/end of utt.  These can cause determinization failures 
+# of CLG [ends up being epsilon cycles].
+gunzip -c $lm \
+  | egrep -v '<s> <s>|</s> <s>|</s> </s>' \
+  | arpa2fst - | fstprint \
+  | utils/remove_oovs.pl $out_dir/oovs_${lm_base}.txt \
+  | utils/eps2disambig.pl | utils/s2eps.pl \
+  | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
+    --keep_isymbols=false --keep_osymbols=false \
+  | fstrmepsilon > $out_dir/G.fst
+set +e
+fstisstochastic $out_dir/G.fst
+set -e
+# The output is like:
+# 9.14233e-05 -0.259833
+# we do expect the first of these 2 numbers to be close to zero (the second is
+# nonzero because the backoff weights make the states sum to >1).
+
+# Everything below is only for diagnostic.
+# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+# this might cause determinization failure of CLG.
+# #0 is treated as an empty word.
+mkdir -p $out_dir/tmpdir.g
+awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} 
+     END{print "0 0 #0 #0"; print "0";}' \
+     < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt
+
+fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
+  $out_dir/tmpdir.g/select_empty.fst.txt \
+  | fstarcsort --sort_type=olabel \
+  | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst
+
+fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \
+  && echo "Language model has cycles with empty words" && exit 1
+
+rm -r $out_dir/tmpdir.g
+
+
+echo "Succeeded in formatting LM: '$lm'"
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/format_lm_sri.sh b/egs/kaldi-vystadial-recipe/s5/utils/format_lm_sri.sh
new file mode 100755
index 00000000000..8489267911f
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/format_lm_sri.sh
@@ -0,0 +1,110 @@
+#!/bin/bash -u
+
+# Copyright 2012  Arnab Ghoshal
+# Copyright 2010-2011  Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+
+# Begin configuration section.
+srilm_opts="-subset -prune-lowprobs -unk -tolower"
+# end configuration sections
+
+help_message="Usage: "`basename $0`" [options] lang_dir LM lexicon out_dir
+Convert ARPA-format language models to FSTs. Change the LM vocabulary using SRILM.\n
+options: 
+  --help                 # print this message and exit
+  --srilm-opts STRING    # options to pass to SRILM tools (default: '$srilm_opts')
+";
+
+. utils/parse_options.sh
+
+if [ $# -ne 4 ]; then
+  printf "$help_message\n";
+  exit 1;
+fi
+
+lang_dir=$1
+lm=$2
+lexicon=$3
+out_dir=$4
+mkdir -p $out_dir
+
+[ -f ./path.sh ] && . ./path.sh
+( which change-lm-vocab >&/dev/null && which ngram >&/dev/null ) \
+  || { echo "SRILM not found on PATH. Exiting ..."; exit 1; }
+
+echo "Converting '$lm' to FST"
+tmpdir=$(mktemp -d);
+trap 'rm -rf "$tmpdir"' EXIT
+
+for f in phones.txt words.txt L.fst L_disambig.fst phones/; do
+  cp -r $lang_dir/$f $out_dir
+done
+
+lm_base=$(basename $lm '.gz')
+gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \
+  > $out_dir/oovs_${lm_base}.txt
+
+# Removing all "illegal" combinations of <s> and </s>, which are supposed to 
+# occur only at being/end of utt.  These can cause determinization failures 
+# of CLG [ends up being epsilon cycles].
+gunzip -c $lm \
+  | egrep -v '<s> <s>|</s> <s>|</s> </s>' \
+  | gzip -c > $tmpdir/lm.gz
+
+awk '{print $1}' $out_dir/words.txt > $tmpdir/voc
+
+# Change the LM vocabulary to be the intersection of the current LM vocabulary
+# and the set of words in the pronunciation lexicon. This also renormalizes the 
+# LM by recomputing the backoff weights, and remove those ngrams whose 
+# probabilities are lower than the backed-off estimates.
+change-lm-vocab -vocab $tmpdir/voc -lm $tmpdir/lm.gz -write-lm $tmpdir/out_lm \
+  $srilm_opts
+
+arpa2fst $tmpdir/out_lm | fstprint \
+  | utils/eps2disambig.pl | utils/s2eps.pl \
+  | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
+    --keep_isymbols=false --keep_osymbols=false \
+  | fstrmepsilon > $out_dir/G.fst
+set +e
+fstisstochastic $out_dir/G.fst
+set -e
+# The output is like:
+# 9.14233e-05 -0.259833
+# we do expect the first of these 2 numbers to be close to zero (the second is
+# nonzero because the backoff weights make the states sum to >1).
+
+# Everything below is only for diagnostic.
+# Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
+# this might cause determinization failure of CLG.
+# #0 is treated as an empty word.
+mkdir -p $out_dir/tmpdir.g
+awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} 
+     END{print "0 0 #0 #0"; print "0";}' \
+     < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt
+
+fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
+  $out_dir/tmpdir.g/select_empty.fst.txt \
+  | fstarcsort --sort_type=olabel \
+  | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst
+
+fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \
+  && echo "Language model has cycles with empty words" && exit 1
+
+rm -r $out_dir/tmpdir.g
+
+
+echo "Succeeded in formatting LM: '$lm'"
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/gen_topo.pl b/egs/kaldi-vystadial-recipe/s5/utils/gen_topo.pl
new file mode 100755
index 00000000000..1488a884d8e
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/gen_topo.pl
@@ -0,0 +1,63 @@
+#!/usr/bin/perl
+
+# Copyright 2012  Johns Hopkins University (author: Daniel Povey)
+
+# Generate a topology file.  This allows control of the number of states in the
+# non-silence HMMs, and in the silence HMMs.
+
+if(@ARGV != 4) {
+  print STDERR "Usage: utils/gen_topo.pl <num-nonsilence-states> <num-silence-states> <colon-separated-nonsilence-phones> <colon-separated-silence-phones>\n";
+  print STDERR "e.g.:  utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n";
+  exit (1);
+}
+
+($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV;
+
+( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) || die "Unexpected number of nonsilence-model states $num_nonsil_states\n";
+( $num_sil_states >= 3 && $num_sil_states <= 100 ) || die "Unexpected number of silence-model states $num_sil_states\n";
+
+$nonsil_phones =~ s/:/ /g;
+$sil_phones =~ s/:/ /g;
+$nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
+$sil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
+
+print "<Topology>\n";
+print "<TopologyEntry>\n";
+print "<ForPhones>\n";
+print "$nonsil_phones\n";
+print "</ForPhones>\n";
+for ($state = 0; $state < $num_nonsil_states; $state++) {
+  $statep1 = $state+1;
+  print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $statep1 0.25 </State>\n";
+}
+print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
+print "</TopologyEntry>\n";
+# Now silence phones.  They have a different topology-- apart from the first and
+# last states, it's fully connected.
+$transp = 1.0 / ($num_sil_states-1);
+
+print "<TopologyEntry>\n";
+print "<ForPhones>\n";
+print "$sil_phones\n";
+print "</ForPhones>\n";
+print "<State> 0 <PdfClass> 0 ";
+for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last 
+  # emitting state.
+  print "<Transition> $nextstate $transp ";
+}
+print "</State>\n";
+for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to
+  # themselves and to the last emitting state.
+  print "<State> $state <PdfClass> $state ";
+  for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) {
+    print "<Transition> $nextstate $transp ";
+  }
+  print "</State>\n";
+}
+# Final emitting state (non-skippable).
+$state = $num_sil_states-1;
+print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $num_sil_states 0.25 </State>\n";
+# Final nonemitting state:
+print "<State> $num_sil_states </State>\n"; 
+print "</TopologyEntry>\n";
+print "</Topology>\n";
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/int2sym.pl b/egs/kaldi-vystadial-recipe/s5/utils/int2sym.pl
new file mode 100755
index 00000000000..13cc5ae9b1d
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/int2sym.pl
@@ -0,0 +1,71 @@
+#!/usr/bin/perl
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+undef $field_begin;
+undef $field_end;
+
+
+if ($ARGV[0] eq "-f") {
+  shift @ARGV; 
+  $field_spec = shift @ARGV; 
+  if ($field_spec =~ m/^\d+$/) {
+    $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
+  }
+  if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
+    if ($1 ne "") {
+      $field_begin = $1 - 1; # Change to zero-based indexing.
+    }
+    if ($2 ne "") {
+      $field_end = $2 - 1; # Change to zero-based indexing.
+    }
+  }
+  if (!defined $field_begin && !defined $field_end) {
+    die "Bad argument to -f option: $field_spec"; 
+  }
+}
+$symtab = shift @ARGV;
+if(!defined $symtab) {
+    print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
+      "options: [-f (<field>|<field_start>-<field-end>)]\n" .
+      "e.g.: -f 2, or -f 3-4\n";
+    exit(1);
+}
+
+open(F, "<$symtab") || die "Error opening symbol table file $symtab";
+while(<F>) {
+    @A = split(" ", $_);
+    @A == 2 || die "bad line in symbol table file: $_";
+    $int2sym{$A[1]} = $A[0];
+}
+
+sub int2sym {
+    my $a = shift @_;
+    my $pos = shift @_;
+    if($a !~  m:^\d+$:) { # not all digits..
+      $pos1 = $pos+1; # make it one-based.
+      die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
+    }
+    $s = $int2sym{$a};
+    if(!defined ($s)) {
+      die "int2sym.pl: integer $a not in symbol table $symtab.";
+    }
+    return $s;
+}
+
+$error = 0;
+while (<>) {
+  @A = split(" ", $_);
+  for ($pos = 0; $pos <= $#A; $pos++) {
+    $a = $A[$pos];
+    if ( (!defined $field_begin || $pos >= $field_begin)
+         && (!defined $field_end || $pos <= $field_end)) {
+      $a = int2sym($a, $pos);
+    }
+    print $a . " ";
+  }
+  print "\n";
+}
+
+
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/ln.pl b/egs/kaldi-vystadial-recipe/s5/utils/ln.pl
new file mode 100755
index 00000000000..594d3924ec8
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/ln.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/perl
+use File::Spec;
+
+if ( @ARGV < 2 ) {
+  print STDERR "usage: ln.pl input1 input2 dest-dir\n" .
+    "This script does a soft link of input1, input2, etc." .
+    "to dest-dir, using relative links where possible\n" .
+    "Note: input-n and dest-dir may both be absolute pathnames,\n" .
+    "or relative pathnames, relative to the current directlory.\n";
+  exit(1);
+}  
+
+$dir = pop @ARGV;
+if ( ! -d $dir ) {
+  print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n";
+  exit(1);
+}
+
+$ans = 1; # true.
+
+$absdir = File::Spec->rel2abs($dir); # Get $dir as abs path.
+defined $absdir || die "No such directory $dir";
+foreach $file (@ARGV) {
+  $absfile =  File::Spec->rel2abs($file); # Get $file as abs path.
+  defined $absfile || die "No such file or directory: $file";
+  @absdir_split = split("/", $absdir);
+  @absfile_split = split("/", $absfile);
+
+  $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this
+  # as the destination in the link command.
+  $num_removed = 0;
+  while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) {
+    shift @absdir_split;
+    shift @absfile_split;
+    $num_removed++;
+  }
+  if (-l $newfile) { # newfile is already a link -> safe to delete it.
+    unlink($newfile); # "unlink" just means delete.
+  }
+  if ($num_removed == 0) { # will use absolute pathnames.
+    $oldfile = "/" . join("/", @absfile_split);
+    $ret = symlink($oldfile, $newfile);
+  } else {
+    $num_dots = @absdir_split;
+    $oldfile = join("/", @absfile_split);
+    for ($n = 0; $n < $num_dots; $n++) {
+      $oldfile = "../" . $oldfile;
+    }
+    $ret = symlink($oldfile, $newfile);
+  }
+  $ans = $ans && $ret;
+  if (! $ret) {
+    print STDERR "Error linking $oldfile to $newfile\n";
+  }
+}
+
+exit ($ans == 1 ? 0 : 1);
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/make_lexicon_fst.pl b/egs/kaldi-vystadial-recipe/s5/utils/make_lexicon_fst.pl
new file mode 100755
index 00000000000..a5334279c8c
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/make_lexicon_fst.pl
@@ -0,0 +1,122 @@
+#!/usr/bin/perl -w
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# makes lexicon FST (no pron-probs involved).
+
+if(@ARGV != 1 && @ARGV != 3 && @ARGV != 4) {
+    die "Usage: make_lexicon_fst.pl lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt"
+}
+
+$lexfn = shift @ARGV;
+if(@ARGV == 0) {
+    $silprob = 0.0;
+} elsif (@ARGV == 2){ 
+    ($silprob,$silphone) = @ARGV;
+} else {
+    ($silprob,$silphone,$sildisambig) = @ARGV;
+}
+if($silprob != 0.0) {
+    $silprob < 1.0 || die "Sil prob cannot be >= 1.0";
+    $silcost = -log($silprob);
+    $nosilcost = -log(1.0 - $silprob);
+}
+
+
+open(L, "<$lexfn") || die "Error opening lexicon $lexfn";
+
+
+
+sub is_sil {
+    # Return true (1) if provided with a phone-sequence
+    # that means silence.
+    # @_ is the parameters of the function
+    # This function returns true if @_ equals ( $silphone )
+    # or something of the form ( "#0", $silphone, "#1" )
+    # where the "#0" and "#1" are disambiguation symbols.
+    return ( @_ == 1 && $_[0] eq $silphone ||
+             (@_ == 3 && $_[1] eq $silphone &&
+              $_[0] =~ m/^\#\d+$/ &&
+              $_[0] =~ m/^\#\d+$/));
+}
+
+if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state which is numbered zero.
+    $loopstate = 0;
+    $nextstate = 1; # next unallocated state.
+    while(<L>) {
+        @A = split(" ", $_);
+        $w = shift @A;
+
+        $s = $loopstate;
+        $word_or_eps = $w;
+        while (@A > 0) {
+            $p = shift @A;
+            if(@A > 0) {
+                $ns = $nextstate++;
+            } else {
+                $ns = $loopstate;
+            }
+            print "$s\t$ns\t$p\t$word_or_eps\n";
+            $word_or_eps = "<eps>";
+            $s = $ns;
+        }
+    }
+    print "$loopstate\t0\n"; # final-cost.
+} else { # have silence probs.
+    $startstate = 0;
+    $loopstate = 1;
+    $silstate = 2; # state from where we go to loopstate after emitting silence.
+    print "$startstate\t$loopstate\t<eps>\t<eps>\t$nosilcost\n"; # no silence.
+    if (!defined $sildisambig) {
+        print "$startstate\t$loopstate\t$silphone\t<eps>\t$silcost\n"; # silence.
+        print "$silstate\t$loopstate\t$silphone\t<eps>\n"; # no cost.
+        $nextstate = 3;
+    } else {
+        $disambigstate = 3;
+        $nextstate = 4;
+        print "$startstate\t$disambigstate\t$silphone\t<eps>\t$silcost\n"; # silence.
+        print "$silstate\t$disambigstate\t$silphone\t<eps>\n"; # no cost.
+        print "$disambigstate\t$loopstate\t$sildisambig\t<eps>\n"; # silence disambiguation symbol.
+    }
+    while(<L>) {
+        @A = split(" ", $_);
+        $w = shift @A;
+
+        $s = $loopstate;
+        $word_or_eps = $w;
+        while (@A > 0) {
+            $p = shift @A;
+            if(@A > 0) {
+                $ns = $nextstate++;
+                print "$s\t$ns\t$p\t$word_or_eps\n";
+                $word_or_eps = "<eps>";
+                $s = $ns;
+            } else {
+                if(!is_sil(@A)){
+                    # This is non-deterministic but relatively compact,
+                    # and avoids epsilons.
+                    print "$s\t$loopstate\t$p\t$word_or_eps\t$nosilcost\n";
+                    print "$s\t$silstate\t$p\t$word_or_eps\t$silcost\n";
+                } else {
+                    # no point putting opt-sil after silence word.
+                    print "$s\t$loopstate\t$p\t$word_or_eps\n";
+                }
+                $word_or_eps = "<eps>";
+            }
+        }            
+    }
+    print "$loopstate\t0\n"; # final-cost.
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/make_unigram_grammar.pl b/egs/kaldi-vystadial-recipe/s5/utils/make_unigram_grammar.pl
new file mode 100755
index 00000000000..314a66a10cf
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/make_unigram_grammar.pl
@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script is used in discriminative training.
+# This script makes a simple unigram-loop version of G.fst
+# using a unigram grammar estimated from some training transcripts.
+# This is for MMI training.
+# We don't have any silences in G.fst; these are supplied by the
+# optional silences in the lexicon.
+
+# Note: the symbols in the transcripts become the input and output
+# symbols of G.txt; these can be numeric or not.
+
+if(@ARGV != 0) {
+    die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt"
+}
+
+$totcount = 0;
+$nl = 0;
+while (<>) {
+  @A = split(" ", $_);
+  foreach $a (@A) {
+    $count{$a}++;
+    $totcount++;
+  }
+  $nl++;
+  $totcount++; # Treat end-of-sentence as a symbol for purposes of
+  # $totcount, so the grammar is properly stochastic.  This doesn't
+  # become </s>, it just becomes the final-prob.
+}
+
+foreach $a (keys %count) {
+  $prob = $count{$a} / $totcount;
+  $cost = -log($prob);          # Negated natural-log probs.
+  print "0\t0\t$a\t$a\t$cost\n";
+}
+# Zero final-cost.
+$final_prob = $nl / $totcount;
+$final_cost = -log($final_prob);
+print "0\t$final_cost\n";
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/mkgraph.sh b/egs/kaldi-vystadial-recipe/s5/utils/mkgraph.sh
new file mode 100755
index 00000000000..1134ba778c0
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/mkgraph.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# This script creates a fully expanded decoding graph (HCLG) that represents
+# all the language-model, pronunciation dictionary (lexicon), context-dependency,
+# and HMM structure in our model.  The output is a Finite State Transducer
+# that has word-ids on the output, and pdf-ids on the input (these are indexes
+# that resolve to Gaussian Mixture Models).  
+# See
+#  http://kaldi.sourceforge.net/graph_recipe_test.html
+# (this is compiled from this repository using Doxygen,
+# the source for this part is in src/doc/graph_recipe_test.dox)
+
+
+N=3
+P=1
+reverse=false
+
+for x in `seq 2`; do 
+  [ "$1" == "--mono" ] && N=1 && P=0 && shift;
+  [ "$1" == "--quinphone" ] && N=5 && P=2 && shift;
+  [ "$1" == "--reverse" ] && reverse=true && shift;
+done
+
+if [ $# != 3 ]; then
+   echo "Usage: utils/mkgraph.sh [options] <lang-dir> <model-dir> <graphdir>"
+   echo "e.g.: utils/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
+   echo " Options:"
+   echo " --mono          #  For monophone models."
+   echo " --quinphone     #  For models with 5-phone context (3 is default)"
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . ./path.sh; fi
+
+lang=$1
+tree=$2/tree
+model=$2/final.mdl
+dir=$3
+
+mkdir -p $dir
+
+tscale=1.0
+loopscale=0.1
+
+# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
+# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
+# would have to use -o instead),  -f means file exists, and -ot means older than).
+
+required="$lang/L.fst $lang/G.fst $lang/phones.txt $lang/words.txt $lang/phones/silence.csl $lang/phones/disambig.int $model $tree"
+for f in $required; do
+  [ ! -f $f ] && echo "mkgraph.sh: expected $f to exist" && exit 1;
+done
+
+mkdir -p $lang/tmp
+# Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in 
+# place of -o
+if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
+      $lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
+  fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
+    fstminimizeencoded  > $lang/tmp/LG.fst || exit 1;
+  fstisstochastic $lang/tmp/LG.fst || echo "[info]: LG not stochastic."
+fi
+
+
+clg=$lang/tmp/CLG_${N}_${P}.fst
+
+if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst ]]; then
+  fstcomposecontext --context-size=$N --central-position=$P \
+   --read-disambig-syms=$lang/phones/disambig.int \
+   --write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.int \
+    $lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
+  fstisstochastic $clg  || echo "[info]: CLG not stochastic."
+fi
+
+if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model  \
+    || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then
+  if $reverse; then
+    make-h-transducer --reverse=true --push_weights=true \
+      --disambig-syms-out=$dir/disambig_tid.int \
+      --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
+      > $dir/Ha.fst  || exit 1;
+  else
+    make-h-transducer --disambig-syms-out=$dir/disambig_tid.int \
+      --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
+       > $dir/Ha.fst  || exit 1;
+  fi
+fi
+
+if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
+      $dir/HCLGa.fst -ot $clg ]]; then
+  fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
+    | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \
+     fstminimizeencoded > $dir/HCLGa.fst || exit 1;
+  fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
+fi
+
+if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
+  add-self-loops --self-loop-scale=$loopscale --reorder=true \
+    $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
+
+  if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
+    # No point doing this test if transition-scale not 1, as it is bound to fail. 
+    fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic."
+  fi
+fi
+
+# keep a copy of the lexicon and a list of silence phones with HCLG...
+# this means we can decode without reference to the $lang directory.
+
+
+cp $lang/words.txt $dir/ || exit 1;
+mkdir -p $dir/phones
+cp $lang/phones/word_boundary.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring,
+  # but ignore the error if it's not there.
+cp $lang/phones/silence.csl $dir/phones/ || exit 1;
+cp $lang/phones.txt $dir/ 2>/dev/null # ignore the error if it's not there.
+
+# to make const fst:
+# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/nnet/analyze_alignments.sh b/egs/kaldi-vystadial-recipe/s5/utils/nnet/analyze_alignments.sh
new file mode 100755
index 00000000000..dc01bb872e0
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/nnet/analyze_alignments.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Copyright 2012 Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# To be run from ..
+
+
+if [ $# != 4 ]; then
+   echo "Usage: $0 <ali-tag> <ali-rspecifier> <transition-model> <lang>"
+   echo " e.g.: $0 'TRAINING SET' 'ark:gunzip -c \$alidir/ali.gz |' tri1/final.mdl "
+   exit 1;
+fi
+
+if [ -f path.sh ]; then . path.sh; fi
+
+tag=$1
+ali=$2
+model=$3
+lang=$4
+
+tmpfile=$(mktemp)
+
+echo "%%%%%% .pdf STATS, $tag %%%%%%"
+analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true \
+  "ark:ali-to-pdf --print-args=false $model \"$ali\" ark:- 2>/dev/null |" \
+  $tmpfile.0 2>&1
+echo
+
+echo "%%%%%% .phone STATS, $tag %%%%%%"
+#prob stats
+analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true \
+  "ark:ali-to-phones --print-args=false --per-frame=true $model \"$ali\" ark:- |" \
+  $tmpfile.1 2>&1
+#frame stats
+analyze-counts --binary=false \
+  "ark:ali-to-phones --print-args=false --per-frame=true $model \"$ali\" ark:- |" \
+  $tmpfile.2 2>/dev/null
+echo
+
+echo "%%%%%% .ali STATS, $tag %%%%%%"
+analyze-counts --binary=false --rescale-to-probs=true --show-histogram=true "$ali" /dev/null 2>&1
+echo
+
+echo "%%%%%% .phone STATS (VERBOSE), $tag %%%%%%"
+#paste and show the logs
+cat $tmpfile.1 | sed -e 's|^\s*\[ ||' -e 's|\]||' | tr ' ' '\n' >$tmpfile.1a
+cat $tmpfile.2 | sed -e 's|^\s*\[ ||' -e 's|\]||' | tr ' ' '\n' >$tmpfile.2a
+paste $tmpfile.1a $tmpfile.2a > $tmpfile
+paste $lang/phones.txt $tmpfile | awk '{printf "%10s %4d  %f %d\n", $1, $2, $3, $4;}' 
+echo
+
+echo "%%%%%% .pdf STATS (VERBOSE), $part %%%%%%"
+cat $tmpfile.0
+echo "%%%%%% END"
+
+rm $tmpfile{,.0,.1,.2,.1a,.2a}
+
+
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_dct_mat.py b/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_dct_mat.py
new file mode 100755
index 00000000000..bff014af447
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_dct_mat.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python -u
+
+# ./gen_dct_mat.py
+# script generates matrix with DCT transform
+#     
+# author: Karel Vesely
+#
+
+from math import *
+import sys
+
+
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option('--fea-dim', dest='dim', help='feature dimension')
+parser.add_option('--splice', dest='splice', help='applied splice value')
+parser.add_option('--dct-basis', dest='dct_basis', help='number of DCT basis')
+(options, args) = parser.parse_args()
+
+if(options.dim == None):
+    parser.print_help()
+    sys.exit(1)
+
+dim=int(options.dim)
+splice=int(options.splice)
+dct_basis=int(options.dct_basis)
+
+timeContext=2*splice+1
+
+
+#generate the DCT matrix
+M_PI = 3.1415926535897932384626433832795
+M_SQRT2 = 1.4142135623730950488016887
+
+
+#generate small DCT matrix
+print '['
+for k in range(dct_basis):
+    for m in range(dim):
+        for n in range(timeContext):
+          if(n==0): 
+              print m*'0 ',
+          else: 
+              print (dim-1)*'0 ',
+          print str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))),
+          if(n==timeContext-1):
+              print (dim-m-1)*'0 ',
+        print
+    print 
+
+print ']'
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_hamm_mat.py b/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_hamm_mat.py
new file mode 100755
index 00000000000..31a6d877d00
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_hamm_mat.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python -u
+
+# ./gen_hamm_mat.py
+# script generates diagonal matrix with hamming window values
+#     
+# author: Karel Vesely
+#
+
+from math import *
+import sys
+
+
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option('--fea-dim', dest='dim', help='feature dimension')
+parser.add_option('--splice', dest='splice', help='applied splice value')
+(options, args) = parser.parse_args()
+
+if(options.dim == None):
+    parser.print_help()
+    sys.exit(1)
+
+dim=int(options.dim)
+splice=int(options.splice)
+
+
+#generate the diagonal matrix with hammings
+M_2PI = 6.283185307179586476925286766559005
+
+dim_mat=(2*splice+1)*dim
+timeContext=2*splice+1
+print '['
+for row in range(dim_mat):
+    for col in range(dim_mat):
+        if col!=row:
+            print '0',
+        else:
+            i=int(row/dim)
+            print str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))),
+    print
+
+print ']'
+
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_mlp_init.py b/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_mlp_init.py
new file mode 100755
index 00000000000..305bd853c3f
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/nnet/gen_mlp_init.py
@@ -0,0 +1,83 @@
+#!/usr/bin/python -u
+
+# ./gen_hamm_dct.py
+# script generateing NN initialization for training with TNet
+#     
+# author: Karel Vesely
+#
+
+import math, random
+import sys
+
+
+from optparse import OptionParser
+
+parser = OptionParser()
+parser.add_option('--dim', dest='dim', help='d1:d2:d3 layer dimensions in the network')
+parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights', action='store_true', default=False)
+parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (defaultall 0.0)', action='store_true', default=False)
+parser.add_option('--inputscale', dest='inputscale', help='scale the weights by 3/sqrt(Ninputs)', action='store_true', default=False)
+parser.add_option('--linBNdim', dest='linBNdim', help='dim of linear bottleneck (sigmoids will be omitted, bias will be zero)',default=0)
+parser.add_option('--seed', dest='seedval', help='seed for random generator',default=0)
+(options, args) = parser.parse_args()
+
+if(options.dim == None):
+    parser.print_help()
+    sys.exit(1)
+
+#seeding
+seedval=int(options.seedval)
+if(seedval != 0):
+    random.seed(seedval)
+
+
+dimStrL = options.dim.split(':')
+
+dimL = []
+for i in range(len(dimStrL)):
+    dimL.append(int(dimStrL[i]))
+
+
+#print dimL,'linBN',options.linBNdim
+
+for layer in range(len(dimL)-1):
+    print '<biasedlinearity>', dimL[layer+1], dimL[layer]
+    #weight matrix
+    print '['
+    for row in range(dimL[layer+1]):
+        for col in range(dimL[layer]):
+            if(options.gauss):
+                if(options.inputscale):
+                    print 3/math.sqrt(dimL[layer])*random.gauss(0.0,1.0),
+                else:
+                    print 0.1*random.gauss(0.0,1.0),
+            else:
+                if(options.inputscale):
+                    print (random.random()-0.5)*2*3/math.sqrt(dimL[layer]),
+                else:
+                    print random.random()/5.0-0.1, 
+        print #newline for each row 
+    print ']'
+    #bias vector
+    print '[',
+    for idx in range(dimL[layer+1]):
+        if(int(options.linBNdim) == dimL[layer+1]):
+            print '0.0',
+        elif(layer == len(dimL)-2):#last layer (softmax)
+            print '0.0',
+        elif(options.negbias):
+            print random.random()/5.0-4.1,
+        else:
+            print '0.0',
+    print ']'
+
+    if(int(options.linBNdim) != dimL[layer+1]):
+        if(layer == len(dimL)-2):
+            print '<softmax>', dimL[layer+1], dimL[layer+1]
+        else:
+            print '<sigmoid>', dimL[layer+1], dimL[layer+1]
+
+
+
+
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/nnet/train_nnet_scheduler.sh b/egs/kaldi-vystadial-recipe/s5/utils/nnet/train_nnet_scheduler.sh
new file mode 100755
index 00000000000..612a365316c
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/nnet/train_nnet_scheduler.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+##############################
+#check for obligatory parameters
+echo
+echo %%% CONFIG
+echo learn_rate: ${learn_rate?$0: learn_rate not specified}
+echo momentum:  ${momentum?$0: momentum not specified}
+echo l1_penalty: ${l1_penalty?$0: l1_penalty not specified}
+echo l2_penalty: ${l2_penalty?$0: l2_penalty not specified}
+echo 
+echo bunch_size: ${bunch_size?$0: bunch_size not specified}
+echo cache_size: ${cache_size?$0: cache_size not specified}
+echo randomize: ${randomize?$0: randomize not specified}
+echo
+echo max_iters: ${max_iters?$0: max_iters not specified}
+echo start_halving_inc: ${start_halving_inc?$0: start_halving_inc not specified}
+echo end_halving_inc: ${end_halving_inc?$0: end_halving_inc not specified}
+echo halving_factor: ${halving_factor?$0: halving_factor not specified}
+echo
+echo TRAIN_TOOL: ${TRAIN_TOOL?$0: TRAIN_TOOL not specified}
+echo
+echo feats_cv: ${feats_cv?$0: feats_cv not specified}
+echo feats_tr: ${feats_tr?$0: feats_tr not specified}
+echo labels: ${labels?$0: labels not specified}
+echo mlp_init: ${mlp_init?$0: mlp_init not specified}
+echo ${feature_transform:+feature_transform: $feature_transform}
+echo ${min_iters:+min_iters: $min_iters}
+echo %%% CONFIG
+echo
+
+
+##############################
+#start training
+
+#prerun cross-validation
+$TRAIN_TOOL --cross-validate=true \
+ --bunchsize=$bunch_size --cachesize=$cache_size \
+ ${feature_transform:+ --feature-transform=$feature_transform} \
+ $mlp_init "$feats_cv" "$labels" \
+ 2> $dir/log/prerun.log || exit 1;
+
+acc=$(cat $dir/log/prerun.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
+echo "CROSSVAL PRERUN ACCURACY $acc"
+
+#training
+mlp_best=$mlp_init
+mlp_base=${mlp_init##*/}; mlp_base=${mlp_base%.*}
+
+iter=0
+halving=0
+for iter in $(seq -w $max_iters); do
+  echo -n "ITERATION $iter: "
+  mlp_next=$dir/nnet/${mlp_base}_iter${iter}
+  
+  #training
+  $TRAIN_TOOL \
+   --learn-rate=$learn_rate --momentum=$momentum --l1-penalty=$l1_penalty --l2-penalty=$l2_penalty \
+   --bunchsize=$bunch_size --cachesize=$cache_size --randomize=$randomize \
+   ${feature_transform:+ --feature-transform=$feature_transform} \
+   $mlp_best "$feats_tr" "$labels" $mlp_next \
+   2> $dir/log/iter$iter.log || exit 1; 
+
+  tr_acc=$(cat $dir/log/iter$iter.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
+  echo -n "TRAIN ACCURACY $(printf "%.2f" $tr_acc) LRATE $(printf "%.6g" $learn_rate), "
+  
+  #cross-validation
+  $TRAIN_TOOL --cross-validate=true \
+   --bunchsize=$bunch_size --cachesize=$cache_size \
+   ${feature_transform:+ --feature-transform=$feature_transform} \
+   $mlp_next "$feats_cv" "$labels" \
+   2>>$dir/log/iter$iter.log || exit 1;
+  
+  acc_new=$(cat $dir/log/iter$iter.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
+  echo -n "CROSSVAL ACCURACY $(printf "%.2f" $acc_new), "
+
+  #accept or reject new parameters
+  acc_prev=$acc
+  if [ "1" == "$(awk "BEGIN{print($acc_new>$acc);}")" ]; then
+    acc=$acc_new
+    mlp_best=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.2f" $tr_acc)_cv$(printf "%.2f" $acc_new)
+    mv $mlp_next $mlp_best
+    echo "nnet accepted ($(basename $mlp_best))"
+  else
+    mlp_reject=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.2f" $tr_acc)_cv$(printf "%.2f" $acc_new)_rejected
+    mv $mlp_next $mlp_reject
+    echo "nnet rejected ($(basename $mlp_reject))"
+  fi
+
+  #stopping criterion
+  if [[ "1" == "$halving" && "1" == "$(awk "BEGIN{print($acc < $acc_prev+$end_halving_inc)}")" ]]; then
+    if [[ "$min_iters" != "" ]]; then
+      if [ $min_iters -gt $iter ]; then
+        echo we were supposed to finish, but we continue, min_iters : $min_iters
+        continue
+      fi
+    fi
+    echo finished, too small improvement $(awk "BEGIN{print($acc-$acc_prev)}")
+    break
+  fi
+
+  #start annealing when improvement is low
+  if [ "1" == "$(awk "BEGIN{print($acc < $acc_prev+$start_halving_inc)}")" ]; then
+    halving=1
+  fi
+  
+  #do annealing
+  if [ "1" == "$halving" ]; then
+    learn_rate=$(awk "BEGIN{print($learn_rate*$halving_factor)}")
+  fi
+done
+
+#select the best network
+if [ $mlp_best != $mlp_init ]; then 
+  mlp_final=${mlp_best}_final_
+  ( cd $dir/nnet; ln -s $(basename $mlp_best) $(basename $mlp_final); )
+fi
+
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/parse_options.sh b/egs/kaldi-vystadial-recipe/s5/utils/parse_options.sh
new file mode 100755
index 00000000000..fa7113a0cc6
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/parse_options.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
+#                 Arnab Ghoshal
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Parse command-line options.
+# To be sourced by another script (as in ". parse_options.sh").
+# Option format is: --option-name arg
+# and shell variable "option_name" gets set to value "arg."
+# The exception is --help, which takes no arguments, but prints the 
+# $help_message variable (if defined).
+
+# The following assignment allows the --config variable to be specified
+# in all cases.
+# The following test will work even if the calling script disallows unset 
+# variables (using set -u or set -o nounset).
+[ -z "${config:-}" ] && config=
+
+while true; do
+  [ -z "${1:-}" ] && break;  # break if there are no arguments
+  case "$1" in
+    # If the enclosing script is called with --help option, print the help 
+    # message and exit.  Scripts should put help messages in $help_message
+  --help) if [ -z "$help_message" ]; then echo "No help found.";
+	  else printf "$help_message\n"; fi; 
+	  exit 0 ;; 
+    # If the first command-line argument begins with "--" (e.g. --foo-bar), 
+    # then work out the variable name as $name, which will equal "foo_bar".
+  --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
+    # Next we test whether the variable in question is undefned-- if so it's 
+    # an invalid option and we die.  Note: $0 evaluates to the name of the 
+    # enclosing script.
+    # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
+    # is undefined.  We then have to wrap this test inside "eval" because 
+    # foo_bar is itself inside a variable ($name).
+      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" && exit 1;
+      
+      oldval="`eval echo \\$$name`";
+    # Work out whether we seem to be expecting a Boolean argument.
+      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
+	was_bool=true;
+      else 
+	was_bool=false;
+      fi
+
+    # Set the variable to the right value-- the escaped quotes make it work if
+    # the option had spaces, like --cmd "queue.pl -sync y"
+      eval $name=\"$2\"; 
+        
+    # Check that Boolean-valued arguments are really Boolean.
+      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
+        echo "$0: expected \"true\" or \"false\": --$name $2"
+        exit 1;
+      fi
+      shift 2;
+      ;;
+  *) break;
+  esac
+done
+
+
+# Override any of the options, if --config was specified.
+[ -z "$config" ] || . $config || exit 1;
+
+# Check for an empty argument to the --cmd option, which can easily occur as a 
+# result of scripting errors.
+[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" && exit 1;
+
+true; # so this script returns code zero.
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/prepare_lang.sh b/egs/kaldi-vystadial-recipe/s5/utils/prepare_lang.sh
new file mode 100755
index 00000000000..c729de6474f
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/prepare_lang.sh
@@ -0,0 +1,275 @@
+#!/bin/bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
+#                 Arnab Ghoshal
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script prepares a directory such as data/lang/, in the standard format,
+# given a source directory containing a dictionary lexicon.txt in a form like:
+# word phone1 phone2 ... phonen
+# per line (alternate prons would be separate lines).
+# and also files silence_phones.txt, nonsilence_phones.txt, optional_silence.txt
+# and extra_questions.txt
+# Here, silence_phones.txt and nonsilence_phones.txt are lists of silence and
+# non-silence phones respectively (where silence includes various kinds of 
+# noise, laugh, cough, filled pauses etc., and nonsilence phones includes the 
+# "real" phones.)
+# In each line of those files is a list of phones, and the phones on each line 
+# are assumed to correspond to the same "base phone", i.e. they will be 
+# different stress or tone variations of the same basic phone.
+# The file "optional_silence.txt" contains just a single phone (typically SIL) 
+# which is used for optional silence in the lexicon.
+# extra_questions.txt might be empty; typically will consist of lists of phones,
+# all members of each list with the same stress or tone; and also possibly a 
+# list for the silence phones.  This will augment the automtically generated 
+# questions (note: the automatically generated ones will treat all the 
+# stress/tone versions of a phone the same, so will not "get to ask" about 
+# stress or tone).
+
+# This script adds word-position-dependent phones and constructs a host of other
+# derived files, that go in data/lang/.
+
+# Begin configuration section.
+num_sil_states=5
+num_nonsil_states=3
+position_dependent_phones=true
+# false also when position dependent phones and word_boundary.txt 
+# have been generated by another source
+reverse=false
+share_silence_phones=false  # if true, then share pdfs of different silence 
+                            # phones together.
+sil_prob=0.5
+# end configuration sections
+
+. utils/parse_options.sh 
+
+if [ $# -ne 4 ]; then 
+  echo "usage: utils/prepare_lang.sh <dict-src-dir> <oov-dict-entry> <tmp-dir> <lang-dir>"
+  echo "e.g.: utils/prepare_lang.sh data/local/dict <SPOKEN_NOISE> data/local/lang data/lang"
+  echo "options: "
+  echo "     --num-sil-states <number of states>             # default: 5, #states in silence models."
+  echo "     --num-nonsil-states <number of states>          # default: 3, #states in non-silence models."
+  echo "     --position-dependent-phones (true|false)        # default: true; if true, use _B, _E, _S & _I"
+  echo "                                                     # markers on phones to indicate word-internal positions. "
+  echo "     --reverse (true|false)                          # reverse lexicon."
+  echo "     --share-silence-phones (true|false)             # default: false; if true, share pdfs of "
+  echo "                                                     # all non-silence phones. "
+  echo "     --sil-prob <probability of silence>             # default: 0.5 [must have 0 < silprob < 1]"
+  exit 1;
+fi
+
+srcdir=$1
+oov_word=$2
+tmpdir=$3
+dir=$4
+mkdir -p $dir $tmpdir $dir/phones
+
+[ -f path.sh ] && . ./path.sh
+
+utils/validate_dict_dir.pl $srcdir || exit 1;
+
+if $position_dependent_phones; then
+  # Create $tmpdir/lexicon.original from $srcdir/lexicon.txt by
+  # adding the markers _B, _E, _S, _I depending on word position.
+  # In this recipe, these markers apply to silence also.
+
+  perl -ane '@A=split(" ",$_); $w = shift @A; @A>0||die;
+         if(@A==1) { print "$w $A[0]_S\n"; } else { print "$w $A[0]_B ";
+         for($n=1;$n<@A-1;$n++) { print "$A[$n]_I "; } print "$A[$n]_E\n"; } ' \
+    < $srcdir/lexicon.txt > $tmpdir/lexicon.original || exit 1;
+
+  # create $tmpdir/phone_map.txt
+  # this has the format (on each line)
+  # <original phone> <version 1 of original phone> <version 2> ...
+  # where the versions depend on the position of the phone within a word. 
+  # For instance, we'd have:
+  # AA AA_B AA_E AA_I AA_S
+  # for (B)egin, (E)nd, (I)nternal and (S)ingleton
+  # and in the case of silence
+  # SIL SIL SIL_B SIL_E SIL_I SIL_S
+  # [because SIL on its own is one of the variants; this is for when it doesn't
+  #  occur inside a word but as an option in the lexicon.]
+
+  # This phone map expands the phone lists into all the word-position-dependent
+  # versions of the phone lists.
+
+  cat <(for x in `cat $srcdir/silence_phones.txt`; do for y in "" "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
+    <(for x in `cat $srcdir/nonsilence_phones.txt`; do for y in "" "_B" "_E" "_I" "_S"; do echo -n "$x$y "; done; echo; done) \
+    > $tmpdir/phone_map.txt
+else
+  cp $srcdir/lexicon.txt $tmpdir/lexicon.original
+  # there might be clusters phones
+  cat $srcdir/silence_phones.txt $srcdir/nonsilence_phones.txt | \
+    sed 's/ /\n/g' | awk '(NF>0){print}' > $tmpdir/phones
+  paste -d' ' $tmpdir/phones $tmpdir/phones > $tmpdir/phone_map.txt
+fi
+
+if $reverse; then
+  echo "reversing lexicon."
+  cat $tmpdir/lexicon.original \
+    | awk '{printf "%s ",$1;for(i=NF;i>1;i--){printf "%s ",$i;}printf "\n"}' \
+    > $tmpdir/lexicon.txt
+else
+  mv $tmpdir/lexicon.original $tmpdir/lexicon.txt
+fi
+
+
+mkdir -p $dir/phones  # various sets of phones...
+
+# Sets of phones for use in clustering, and making monophone systems.
+
+if $share_silence_phones; then
+  # build a roots file that will force all the silence phones to share the
+  # same pdf's. [three distinct states, only the transitions will differ.]
+  # 'shared'/'not-shared' means, do we share the 3 states of the HMM
+  # in the same tree-root?
+  # Sharing across models(phones) is achieved by writing several phones
+  # into one line of roots.txt (shared/not-shared doesn't affect this).
+  # 'shared split' means we have 1 tree-root for the 3 states of the HMM 
+  # (but we get to ask about the HMM-position when we split).
+  # 'not-shared not-split' means we have separate tree roots for the 3 states,
+  # but we never split the tree so they remain stumps
+  # so all phones in the line correspond to the same model.
+
+  cat $srcdir/silence_phones.txt | awk '{printf("%s ", $0); } END{printf("\n");}' | cat - $srcdir/nonsilence_phones.txt | \
+    utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
+  cat $dir/phones/sets.txt | awk '{if(NR==1) print "not-shared", "not-split", $0; else print "shared", "split", $0;}' > $dir/phones/roots.txt
+else
+  # different silence phones will have different GMMs.  [note: here, all "shared split" means
+  # is that we may have one GMM for all the states, or we can split on states.  because they're
+  # context-independent phones, they don't see the context.]
+  cat $srcdir/{,non}silence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt > $dir/phones/sets.txt
+  cat $dir/phones/sets.txt | awk '{print "shared", "split", $0;}' > $dir/phones/roots.txt
+fi
+
+cat $srcdir/silence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
+ awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/silence.txt
+cat $srcdir/nonsilence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
+ awk '{for(n=1;n<=NF;n++) print $n;}' > $dir/phones/nonsilence.txt
+cp $srcdir/optional_silence.txt $dir/phones/optional_silence.txt
+cp $dir/phones/silence.txt $dir/phones/context_indep.txt
+
+cat $srcdir/extra_questions.txt | utils/apply_map.pl $tmpdir/phone_map.txt \
+  >$dir/phones/extra_questions.txt
+
+# Want extra questions about the word-start/word-end stuff. Make it separate for
+# silence and non-silence. Probably doesn't matter, as silence will rarely
+# be inside a word.
+if $position_dependent_phones; then
+  for suffix in _B _E _I _S; do
+    (for x in `cat $srcdir/nonsilence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
+  done
+  for suffix in "" _B _E _I _S; do
+    (for x in `cat $srcdir/silence_phones.txt`; do echo -n "$x$suffix "; done; echo) >>$dir/phones/extra_questions.txt
+  done
+fi
+
+# add disambig symbols to the lexicon in $tmpdir/lexicon.txt
+# and produce $tmpdir/lexicon_disambig.txt
+
+ndisambig=`utils/add_lex_disambig.pl $tmpdir/lexicon.txt $tmpdir/lexicon_disambig.txt`
+ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST.
+echo $ndisambig > $tmpdir/lex_ndisambig
+
+# Format of lexicon_disambig.txt:
+# !SIL	SIL_S
+# <SPOKEN_NOISE>	SPN_S #1
+# <UNK>	SPN_S #2
+# <NOISE>	NSN_S
+# !EXCLAMATION-POINT	EH2_B K_I S_I K_I L_I AH0_I M_I EY1_I SH_I AH0_I N_I P_I OY2_I N_I T_E
+
+( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) >$dir/phones/disambig.txt
+
+# Create phone symbol table.
+echo "<eps>" | cat - $dir/phones/{silence,nonsilence,disambig}.txt | \
+  awk '{n=NR-1; print $1, n;}' > $dir/phones.txt 
+
+# Create a file that describes the word-boundary information for
+# each phone.  5 categories.
+if $position_dependent_phones; then
+  cat $dir/phones/{silence,nonsilence}.txt | \
+    awk '/_I$/{print $1, "internal"; next;} /_B$/{print $1, "begin"; next; }
+         /_S$/{print $1, "singleton"; next;} /_E$/{print $1, "end"; next; }
+         {print $1, "nonword";} ' > $dir/phones/word_boundary.txt
+else
+  # word_boundary.txt might have been generated by another source
+  [ -f $srcdir/word_boundary.txt ] && cp $srcdir/word_boundary.txt $dir/phones/word_boundary.txt
+fi
+
+# Create word symbol table.
+cat $tmpdir/lexicon.txt | awk '{print $1}' | sort | uniq  | \
+ awk 'BEGIN{print "<eps> 0";} {printf("%s %d\n", $1, NR);} END{printf("#0 %d\n", NR+1);} ' \
+  > $dir/words.txt || exit 1;
+
+# format of $dir/words.txt:
+#<eps> 0
+#!EXCLAMATION-POINT 1
+#!SIL 2
+#"CLOSE-QUOTE 3
+#...
+
+silphone=`cat $srcdir/optional_silence.txt` || exit 1;
+
+# Create the basic L.fst without disambiguation symbols, for use
+# in training. 
+utils/make_lexicon_fst.pl $tmpdir/lexicon.txt $sil_prob $silphone | \
+  fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+  --keep_isymbols=false --keep_osymbols=false | \
+   fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;
+
+# The file oov.txt contains a word that we will map any OOVs to during
+# training.
+echo "$oov_word" > $dir/oov.txt || exit 1;
+cat $dir/oov.txt | utils/sym2int.pl $dir/words.txt >$dir/oov.int # integer version of oov
+# symbol, used in some scripts.
+
+
+
+# Create these lists of phones in colon-separated integer list form too, 
+# for purposes of being given to programs as command-line options.
+for f in silence nonsilence optional_silence disambig context_indep; do
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt >$dir/phones/$f.int
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$f.txt | \
+   awk '{printf(":%d", $1);} END{printf "\n"}' | sed s/:// > $dir/phones/$f.csl || exit 1;
+done
+
+for x in sets extra_questions; do
+  utils/sym2int.pl $dir/phones.txt <$dir/phones/$x.txt > $dir/phones/$x.int || exit 1;
+done
+
+utils/sym2int.pl -f 3- $dir/phones.txt <$dir/phones/roots.txt \
+   > $dir/phones/roots.int || exit 1;
+
+#if $position_dependent_phones; then
+if [ -f $dir/phones/word_boundary.txt ]; then
+  utils/sym2int.pl -f 1 $dir/phones.txt <$dir/phones/word_boundary.txt \
+    > $dir/phones/word_boundary.int || exit 1;
+fi
+
+silphonelist=`cat $dir/phones/silence.csl`
+nonsilphonelist=`cat $dir/phones/nonsilence.csl`
+utils/gen_topo.pl $num_nonsil_states $num_sil_states $nonsilphonelist $silphonelist >$dir/topo
+
+
+# Create the lexicon FST with disambiguation symbols, and put it in lang_test.
+# There is an extra step where we create a loop to "pass through" the
+# disambiguation symbols from G.fst.
+phone_disambig_symbol=`grep \#0 $dir/phones.txt | awk '{print $2}'`
+word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`
+
+utils/make_lexicon_fst.pl $tmpdir/lexicon_disambig.txt $sil_prob $silphone '#'$ndisambig | \
+   fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
+   --keep_isymbols=false --keep_osymbols=false |   \
+   fstaddselfloops  "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |" | \
+   fstarcsort --sort_type=olabel > $dir/L_disambig.fst || exit 1;
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/queue.pl b/egs/kaldi-vystadial-recipe/s5/utils/queue.pl
new file mode 100755
index 00000000000..8ceff97f4b6
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/queue.pl
@@ -0,0 +1,263 @@
+#!/usr/bin/perl
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+use File::Basename;
+use Cwd;
+
+# queue.pl has the same functionality as run.pl, except that
+# it runs the job in question on the queue (Sun GridEngine).
+# This version of queue.pl uses the task array functionality
+# of the grid engine.  Note: it's different from the queue.pl
+# in the s4 and earlier scripts.
+
+$qsub_opts = "";
+$sync = 0;
+
+for ($x = 1; $x <= 3; $x++) { # This for-loop is to 
+  # allow the JOB=1:n option to be interleaved with the
+  # options to qsub.
+  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
+    $switch = shift @ARGV;
+    if ($switch eq "-V") {
+      $qsub_opts .= "-V ";
+    } else {
+      $option = shift @ARGV;
+      if ($switch eq "-sync" && $option =~ m/^[yY]/) {
+        $sync = 1;
+      }
+      $qsub_opts .= "$switch $option ";
+      if ($switch eq "-pe") { # e.g. -pe smp 5
+        $option2 = shift @ARGV;
+        $qsub_opts .= "$option2 ";
+      }
+    }
+  }
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "queue.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+  }
+}
+
+
+if (@ARGV < 2) {
+  print STDERR
+   "Usage: queue.pl [options to qsub] [JOB=1:n] log-file command-line arguments...\n" .
+   "e.g.: queue.pl foo.log echo baz\n" .
+   " (which will echo \"baz\", with stdout and stderr directed to foo.log)\n" .
+   "or: queue.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
+   " (which is an example of using a pipe; you can provide other escaped bash constructs)\n" .
+   "or: queue.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
+   " (which illustrates the mechanism to submit parallel jobs; note, you can use \n" .
+   "  another string other than JOB)\n" .
+   "Note: if you pass the \"-sync y\" option to qsub, this script will take note\n" .
+   "and change its behavior.  Otherwise it uses qstat to work out when the job finished\n";
+  exit 1;
+}
+
+$cwd = getcwd();
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/
+    && $jobend > $jobstart) {
+  print STDERR "run.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+#
+# Work out the command; quote escaping is done here.
+# Note: the rules for escaping stuff are worked out pretty
+# arbitrarily, based on what we want it to do.  Some things that
+# we pass as arguments to queue.pl, such as "|", we want to be
+# interpreted by bash, so we don't escape them.  Other things,
+# such as archive specifiers like 'ark:gunzip -c foo.gz|', we want
+# to be passed, in quotes, to the Kaldi program.  Our heuristic
+# is that stuff with spaces in should be quoted.  This doesn't
+# always work.
+#
+$cmd = "";
+
+foreach $x (@ARGV) { 
+  if ($x =~ m/^\S+$/) { $cmd .= $x . " "; } # If string contains no spaces, take
+                                            # as-is.
+  elsif ($x =~ m:\":) { $cmd .= "'\''$x'\'' "; } # else if no dbl-quotes, use single
+  else { $cmd .= "\"$x\" "; }  # else use double.
+}
+
+#
+# Work out the location of the script file, and open it for writing.
+#
+$dir = dirname($logfile);
+$base = basename($logfile);
+$qdir = "$dir/q";
+$qdir =~ s:/(log|LOG)/*q:/q:; # If qdir ends in .../log/q, make it just .../q.
+$queue_logfile = "$qdir/$base";
+
+if (!-d $dir) { system "mkdir $dir 2>/dev/null"; } # another job may be doing this...
+if (!-d $dir) { die "Cannot make the directory $dir\n"; }
+if (!-d "$qdir") { system "mkdir $qdir 2>/dev/null"; } # make a directory called "q",
+  # where we will put the log created by qsub... normally this doesn't contain
+  # anything interesting, evertyhing goes to $logfile.
+
+if (defined $jobname) { # It's an array job.
+  $queue_array_opt = "-t $jobstart:$jobend"; 
+  $logfile =~ s/$jobname/\$SGE_TASK_ID/g; # This variable will get 
+  # replaced by qsub, in each job, with the job-id.
+  $cmd =~ s/$jobname/\$SGE_TASK_ID/g; # same for the command...
+  $queue_logfile =~ s/\.?$jobname//; # the log file in the q/ subdirectory
+  # is for the queue to put its log, and this doesn't need the task array subscript
+  # so we remove it.
+}
+
+# queue_scriptfile is as $queue_logfile [e.g. dir/q/foo.log] but
+# with the suffix .sh.
+$queue_scriptfile = $queue_logfile;
+($queue_scriptfile =~ s/\.[a-zA-Z]{1,5}$/.sh/) || ($queue_scriptfile .= ".sh");
+if ($queue_scriptfile !~ m:^/:) {
+  $queue_scriptfile = $cwd . "/" . $queue_scriptfile; # just in case.
+}
+
+# We'll write to the standard input of "qsub" (the file-handle Q),
+# the job that we want it to execute.
+# Also keep our current PATH around, just in case there was something
+# in it that we need (although we also source ./path.sh)
+
+$syncfile = "$qdir/done.$$";
+
+system("rm $queue_logfile $syncfile 2>/dev/null");
+#
+# Write to the script file, and then close it.
+#
+open(Q, ">$queue_scriptfile") || die "Failed to write to $queue_scriptfile";
+
+print Q "#!/bin/bash\n";
+print Q "cd $cwd\n";
+print Q ". ./path.sh\n";
+print Q "( echo '#' Running on \`hostname\`\n";
+print Q "  echo '#' Started at \`date\`\n";
+print Q "  echo -n '# '; cat <<EOF\n";
+print Q "$cmd\n"; # this is a way of echoing the command into a comment in the log file,
+print Q "EOF\n"; # without having to escape things like "|" and quote characters.
+print Q ") >$logfile\n";
+print Q " ( $cmd ) 2>>$logfile >>$logfile\n";
+print Q "ret=\$?\n";
+print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n";
+if (!defined $jobname) { # not an array job
+  print Q "touch $syncfile\n"; # so we know it's done.
+} else {
+  print Q "touch $syncfile.\$SGE_TASK_ID\n"; # touch a bunch of sync-files.
+}
+print Q "exit \$[\$ret ? 1 : 0]\n"; # avoid status 100 which grid-engine
+print Q "## submitted with:\n";       # treats specially.
+print Q "# $qsub_cmd\n";
+if (!close(Q)) { # close was not successful... || die "Could not close script file $shfile";
+  die "Failed to close the script file (full disk?)";
+}
+
+$ret = system ("qsub -S /bin/bash -v PATH -cwd -j y -o $queue_logfile $qsub_opts $queue_array_opt $queue_scriptfile >>$queue_logfile 2>&1");
+if ($ret != 0) {
+  if ($sync && $ret == 256) { # this is the exit status when a job failed (bad exit status)
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+    print STDERR "queue.pl: job writing to $logfile failed\n";
+  } else {
+    print STDERR "queue.pl: error submitting jobs to queue (return status was $ret)\n";
+    print STDERR `tail $queue_logfile`;
+  }
+  exit(1);
+}
+
+if (! $sync) { # We're not submitting with -sync y, so we
+  # need to wait for the jobs to finish.  We wait for the
+  # sync-files we "touched" in the script to exist.
+  @syncfiles = ();
+  if (!defined $jobname) { # not an array job.
+    push @syncfiles, $syncfile;
+  } else {
+    for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+      push @syncfiles, "$syncfile.$jobid";
+    }
+  }
+  $wait = 0.1;
+  foreach $f (@syncfiles) {
+    # wait for them to finish one by one.
+    while (! -f $f) {
+      sleep($wait);
+      $wait *= 1.2;
+      if ($wait > 1.0) {
+        $wait = 1.0; # never wait more than 1 second.
+      }  
+    }
+  }
+  $all_syncfiles = join(" ", @syncfiles);
+  system("rm $all_syncfiles 2>/dev/null");
+}
+
+# OK, at this point we are synced; we know the job is done.
+# But we don't know about its exit status.  We'll look at $logfile for this.
+# First work out an array @logfiles of file-locations we need to
+# read (just one, unless it's an array job).
+@logfiles = ();
+if (!defined $jobname) { # not an array job.
+  push @logfiles, $logfile;
+} else {
+  for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+    $l = $logfile; 
+    $l =~ s/\$SGE_TASK_ID/$jobid/g;
+    push @logfiles, $l;
+  }
+}
+
+$num_failed = 0;
+foreach $l (@logfiles) {
+  @wait_times = (0.1, 0.2, 0.2, 0.3, 0.5, 0.5, 1.0, 2.0, 5.0, 5.0, 5.0, 10.0, 25.0);
+  for ($iter = 0; $iter <= @wait_times; $iter++) {
+    $line = `tail -1 $l 2>/dev/null`;
+    if ($line =~ m/with status (\d+)/) {
+      $status = $1;
+      last;
+    } else {
+      if ($iter < @wait_times) {
+        sleep($wait_times[$iter]);
+      } else {
+        if (! -f $l) {
+          print STDERR "Log-file $l does not exist.\n";
+        } else {
+          print STDERR "The last line of log-file $l does not seem to indicate the "
+            . "return status as expected\n";
+        }
+        exit(1);                # Something went wrong with the queue, or the
+        # machine it was running on, probably.
+      }
+    }
+  }
+  # OK, now we have $status, which is the return-status of
+  # the command in the job.
+  if ($status != 0) { $num_failed++; }
+}
+if ($num_failed == 0) { exit(0); }
+else { # we failed.
+  if (@logfiles == 1) {
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/$jobstart/g; }
+    print STDERR "queue.pl: job failed with status $status, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  } else {
+    if (defined $jobname) { $logfile =~ s/\$SGE_TASK_ID/*/g; }
+    $numjobs = 1 + $jobend - $jobstart;
+    print STDERR "queue.pl: $num_failed / $numjobs failed, log is in $logfile\n";
+  }
+  exit(1);
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/remove_oovs.pl b/egs/kaldi-vystadial-recipe/s5/utils/remove_oovs.pl
new file mode 100755
index 00000000000..5bcab59840c
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/remove_oovs.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script removes lines that contain these OOVs on either the
+# third or fourth fields  of the line.  It is intended to remove arcs
+# with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
+
+if (  @ARGV < 1 && @ARGV > 2) {
+    die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
+}
+
+$unklist = shift @ARGV;
+open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
+while(<S>){ 
+    @A = split(" ", $_);
+    @A == 1 || die "Bad line in unknown-symbol list: $_";
+    $unk{$A[0]} = 1;
+}
+
+$num_removed = 0;
+while(<>){ 
+    @A = split(" ", $_);
+    if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
+        $num_removed++;
+    } else {
+        print;
+    }
+}
+print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/rnnlm_compute_scores.sh b/egs/kaldi-vystadial-recipe/s5/utils/rnnlm_compute_scores.sh
new file mode 100755
index 00000000000..d904fdc995f
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/rnnlm_compute_scores.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Compute scores from RNNLM.  This script takes a directory
+# $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ),
+# where it expects the files:
+#  rnnlm  wordlist.rnn  unk.probs,
+# and also an input file location where it can get the sentences to score, and
+# an output file location to put the scores (negated logprobs) for each
+# sentence.  This script uses the Kaldi-style "archive" format, so the input and
+# output files will have a first field that corresponds to some kind of
+# utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the
+# N-best list.
+#
+# Here, "wordlist.rnn" is the set of words, like a vocabulary,
+# that the RNN was trained on (note, it won't include <s> or </s>),
+# plus <RNN_UNK> which is a kind of class where we put low-frequency
+# words; unk.probs gives the probs for words given this class, and it
+# has, on each line, "word prob".
+
+. ./path.sh || exit 1;
+
+rnnlm=$KALDI_ROOT/tools/rnnlm-0.3e/rnnlm
+
+[ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1;
+
+if [ $# != 4 ]; then
+  echo "Usage: rnnlm_compute_scores.sh <rnn-dir> <temp-dir> <input-text> <output-scores>"
+  exit 1;
+fi
+
+dir=$1
+tempdir=$2
+text_in=$3
+scores_out=$4
+
+for x in rnnlm wordlist.rnn unk.probs; do
+  if [ ! -f $dir/$x ]; then 
+    echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist."
+    exit 1;
+  fi
+done
+
+mkdir -p $tempdir
+cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text
+cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids.
+cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \
+  -v logprobs=$tempdir/loglikes.oov \
+ 'BEGIN{ while((getline<voc)>0) { invoc[$1]=1; } while ((getline<unk)>0){ unkprob[$1]=$2;} }
+  { logprob=0; for (x=1;x<=NF;x++) { w=$x;  
+    if (invoc[w]) { printf("%s ",w); } else {
+      printf("<RNN_UNK> ");
+      if (unkprob[w] != 0) { logprob += log(unkprob[w]); }
+      else { print "Warning: unknown word ", w >"/dev/stderr"; logprob += log(1.0e-07); }}}
+    printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk
+
+# OK, now we compute the scores on the text with OOVs replaced
+# with <RNN_UNK>
+
+$rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \
+   awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
+
+[ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \
+  echo "rnnlm rescoring failed" && exit 1;
+
+paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores
+
+# scores out, with utterance-ids.
+paste $tempdir/ids $tempdir/scores  > $scores_out
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/run.pl b/egs/kaldi-vystadial-recipe/s5/utils/run.pl
new file mode 100755
index 00000000000..efb2ed4f8f9
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/run.pl
@@ -0,0 +1,123 @@
+#!/usr/bin/perl -w
+
+# In general, doing 
+#  run.pl some.log a b c is like running the command a b c in
+# the bash shell, and putting the standard error and output into some.log.
+# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
+#  run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
+# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
+# If any of the jobs fails, this script will fail.
+
+# A typical example is:
+#  run.pl some.log my-prog "--opt=foo bar" foo \|  other-prog baz
+# and run.pl will run something like:
+# ( my-prog '--opt=foo bar' foo |  other-prog baz ) >& some.log
+# 
+# Basically it takes the command-line arguments, quotes them
+# as necessary to preserve spaces, and evaluates them with bash.
+# In addition it puts the command line at the top of the log, and
+# the start and end times of the command at the beginning and end.
+# The reason why this is useful is so that we can create a different
+# version of this program that uses a queueing system instead.
+
+@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
+
+$jobstart=1;
+$jobend=1;
+
+# First parse an option like JOB=1:4
+
+if (@ARGV > 0) {
+  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) {
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $3;
+    shift;
+    if ($jobstart > $jobend) {
+      die "queue.pl: invalid job range $ARGV[0]";
+    }
+  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
+    $jobname = $1;
+    $jobstart = $2;
+    $jobend = $2;
+    shift;
+  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
+    print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
+  }
+}
+
+$logfile = shift @ARGV;
+
+if (defined $jobname && $logfile !~ m/$jobname/ &&
+    $jobend > $jobstart) {
+  print STDERR "run.pl: you are trying to run a parallel job but "
+    . "you are putting the output into just one log file ($logfile)\n";
+  exit(1);
+}
+
+$cmd = "";
+
+foreach $x (@ARGV) { 
+    if ($x =~ m/^\S+$/) { $cmd .=  $x . " "; }
+    elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
+    else { $cmd .= "\"$x\" "; } 
+}
+
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $childpid = fork();
+  if (!defined $childpid) { die "Error forking in run.pl (writing to $logfile)"; }
+  if ($childpid == 0) { # We're in the child... this branch
+    # executes the job and returns (possibly with an error status).
+    if (defined $jobname) { 
+      $cmd =~ s/$jobname/$jobid/g;
+      $logfile =~ s/$jobname/$jobid/g;
+    }
+    system("mkdir -p `dirname $logfile` 2>/dev/null");
+    open(F, ">$logfile") || die "Error opening log file $logfile";
+    print F "# " . $cmd . "\n";
+    print F "# Started at " . `date`;
+    $starttime = `date +'%s'`;
+    print F "#\n";
+    close(F);
+
+    # Pipe into bash.. make sure we're not using any other shell.
+    open(B, "|bash") || die "Error opening shell command"; 
+    print B "( " . $cmd . ") 2>>$logfile >> $logfile";
+    close(B);                   # If there was an error, exit status is in $?
+    $ret = $?;
+
+    $endtime = `date +'%s'`;
+    open(F, ">>$logfile") || die "Error opening log file $logfile (again)";
+    $enddate = `date`;
+    chop $enddate;
+    print F "# Ended (code $ret) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
+    close(F);
+    exit($ret == 0 ? 0 : 1);
+  }
+}
+
+$ret = 0;
+$numfail = 0;
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $r = wait();
+  if ($r == -1) { die "Error waiting for child process"; } # should never happen.
+  if ($? != 0) { $numfail++; $ret = 1; } # The child process failed.
+}
+
+if ($ret != 0) {
+  $njobs = $jobend - $jobstart + 1;
+  if ($njobs == 1) { 
+    print STDERR "run.pl: job failed, log is in $logfile\n";
+    if ($logfile =~ m/JOB/) {
+      print STDERR "queue.pl: probably you forgot to put JOB=1:\$nj in your script.\n";
+    }
+  }
+  else {
+    $logfile =~ s/$jobname/*/g;
+    print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
+  }
+}
+
+
+exit ($ret);
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/s2eps.pl b/egs/kaldi-vystadial-recipe/s5/utils/s2eps.pl
new file mode 100755
index 00000000000..de993db67f7
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/s2eps.pl
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This script replaces <s> and </s> with <eps> (on both input and output sides),
+# for the G.fst acceptor.
+
+while(<>){
+    @A = split(" ", $_);
+    if ( @A >= 4 ) {
+        if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
+        if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
+    }
+    print join("\t", @A) . "\n";
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/shuffle_list.pl b/egs/kaldi-vystadial-recipe/s5/utils/shuffle_list.pl
new file mode 100755
index 00000000000..3144c263053
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/shuffle_list.pl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# seeding is optional...
+if ($#ARGV==0) {
+  srand($ARGV[0]);
+} else {
+  srand(0); # Seems to give inconsistent behavior if we don't seed.
+}
+
+
+# This script shuffles lines of a list. 
+# The list is read from stdin and written to stdout. 
+@X = <STDIN>;
+@X = sort { rand() <=> rand() } @X;
+print @X; 
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/spk2utt_to_utt2spk.pl b/egs/kaldi-vystadial-recipe/s5/utils/spk2utt_to_utt2spk.pl
new file mode 100755
index 00000000000..ca8a6a1249c
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/spk2utt_to_utt2spk.pl
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+while(<>){ 
+    @A = split(" ", $_);
+    @A > 1 || die "Invalid line in spk2utt file: $_";
+    $s = shift @A;
+    foreach $u ( @A ) {
+        print "$u $s\n";
+    }
+}
+
+
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/split_data.sh b/egs/kaldi-vystadial-recipe/s5/utils/split_data.sh
new file mode 100755
index 00000000000..bee31a29643
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/split_data.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+split_per_spk=true
+if [ "$1" == "--per-utt" ]; then
+  split_per_spk=false
+  shift
+fi
+
+if [ $# != 2 ]; then
+  echo "Usage: split_data.sh data-dir num-to-split"
+  exit 1
+fi
+
+data=$1
+numsplit=$2
+
+if [ $numsplit -le 0 ]; then
+  echo "Invalid num-split argument $numsplit";
+  exit 1;
+fi
+
+n=0;
+feats=""
+wavs=""
+utt2spks=""
+texts=""
+
+nu=`cat $data/utt2spk | wc -l`
+nf=`cat $data/feats.scp | wc -l`
+nt=`cat $data/text | wc -l`
+if [ $nu -ne $nf ]; then
+  echo "split_data.sh: warning, #lines is (utt2spk,feats.scp) is ($nu,$nf); this script "
+  echo " may produce incorrectly split data."
+  echo "use utils/fix_data_dir.sh to fix this."
+fi
+if [ $nt -ne 0 -a $nu -ne $nt ]; then
+  echo "split_data.sh: warning, #lines is (utt2spk,text) is ($nu,$nt); this script "
+  echo " may produce incorrectly split data."
+  echo "use utils/fix_data_dir.sh to fix this."
+fi
+
+# `utils/get_split.pl` returns "0 1 2 3" or "00 01 .. 18 19" or whatever.
+for n in `seq $numsplit`; do
+   mkdir -p $data/split$numsplit/$n
+   feats="$feats $data/split$numsplit/$n/feats.scp"
+   texts="$texts $data/split$numsplit/$n/text"
+   utt2spks="$utt2spks $data/split$numsplit/$n/utt2spk"
+done
+
+if $split_per_spk; then
+  utt2spk_opt="--utt2spk=$data/utt2spk"
+else
+  utt2spk_opt=
+fi
+
+utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1
+
+utils/split_scp.pl $utt2spk_opt $data/feats.scp $feats || exit 1
+[ -f $data/text ] && \
+ utils/split_scp.pl $utt2spk_opt $data/text $texts
+
+for n in `seq $numsplit`; do
+   dsn=$data/split$numsplit/$n
+   utils/utt2spk_to_spk2utt.pl $dsn/utt2spk > $dsn/spk2utt || exit 1;
+   # for completeness, also split the spk2gender file
+   [ -f $data/spk2gender ] && \
+     utils/filter_scp.pl $dsn/spk2utt $data/spk2gender > $dsn/spk2gender 
+   [ -f $data/cmvn.scp ] && \
+     utils/filter_scp.pl $dsn/spk2utt $data/cmvn.scp > $dsn/cmvn.scp 
+   if [ -f $data/segments ]; then
+     utils/filter_scp.pl $dsn/utt2spk $data/segments > $dsn/segments
+      awk '{print $2;}' $dsn/segments |sort|uniq > $data/tmp.reco # recording-ids.
+     [ -f $data/reco2file_and_channel ] &&
+     utils/filter_scp.pl $data/tmp.reco $data/reco2file_and_channel > $dsn/reco2file_and_channel
+     [ -f $data/wav.scp ] && utils/filter_scp.pl $data/tmp.reco $data/wav.scp  > $dsn/wav.scp
+     rm $data/tmp.reco
+   else # else wav indexed by utterance -> filter on this.
+     [ -f $data/wav.scp ] &&
+       utils/filter_scp.pl $dsn/utt2spk $data/wav.scp > $dsn/wav.scp
+   fi
+done
+
+exit 0
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/split_scp.pl b/egs/kaldi-vystadial-recipe/s5/utils/split_scp.pl
new file mode 100755
index 00000000000..18abcdb2fb1
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/split_scp.pl
@@ -0,0 +1,221 @@
+#!/usr/bin/perl -w
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+
+# This program splits up any kind of .scp or archive-type file.
+# If there is no utt2spk option it will work on any text  file and
+# will split it up with an approximately equal number of lines in
+# each but.
+# With the --utt2spk option it will work on anything that has the 
+# utterance-id as the first entry on each line; the utt2spk file is
+# of the form "utterance speaker" (on each line).
+# It splits it into equal size chunks as far as it can.  If you use
+# the utt2spk option it will make sure these chunks coincide with
+# speaker boundaries.  In this case, if there are more chunks
+# than speakers (and in some other circumstances), some of the 
+# resulting  chunks will be empty and it
+# will print a warning.
+# You will normally call this like:
+# split_scp.pl scp scp.1 scp.2 scp.3 ...
+# or
+# split_scp.pl --utt2spk=utt2spk scp scp.1 scp.2 scp.3 ...
+# Note that you can use this script to split the utt2spk file itself,
+# e.g. split_scp.pl --utt2spk=utt2spk utt2spk utt2spk.1 utt2spk.2 ...
+
+# You can also call the scripts like:
+# split_scp.pl -j 3 0 scp scp.0
+# [note: with this option, it assumes zero-based indexing of the split parts,
+# i.e. the second number must be 0 <= n < num-jobs.]
+
+$num_jobs = 0;
+$job_id = 0;
+$utt2spk_file = "";
+
+for ($x = 1; $x <= 2; $x++) {
+    if ($ARGV[0] eq "-j") {
+        shift @ARGV;
+        $num_jobs = shift @ARGV;
+        $job_id = shift @ARGV;
+        if ($num_jobs <= 0 || $job_id < 0 || $job_id >= $num_jobs) {
+            die "Invalid num-jobs and job-id: $num_jobs and $job_id";
+        }
+    }
+    if ($ARGV[0] =~ "--utt2spk=(.+)") {
+        $utt2spk_file=$1;
+        shift;
+    }
+}
+
+if(($num_jobs == 0 && @ARGV < 2) || ($num_jobs > 0 && (@ARGV < 1 || @ARGV > 2))) {
+    die "Usage: split_scp.pl [--utt2spk=<utt2spk_file>] in.scp out1.scp out2.scp ... \n" .
+        " or: split_scp.pl -j num-jobs job-id [--utt2spk=<utt2spk_file>] in.scp [out.scp]\n" .
+        " ... where 0 <= job-id < num-jobs.";
+}
+
+$error = 0;   
+$inscp = shift @ARGV;
+if ($num_jobs == 0) { # without -j option
+    @OUTPUTS = @ARGV;
+} else {
+    for ($j = 0; $j < $num_jobs; $j++) {
+        if ($j == $job_id) { 
+            if (@ARGV > 0) { push @OUTPUTS, $ARGV[0]; }
+            else { push @OUTPUTS, "-"; }
+        } else {
+            push @OUTPUTS, "/dev/null";
+        }
+    }
+} 
+
+if ($utt2spk_file ne "") {  # We have the --utt2spk option...
+    open(U, "<$utt2spk_file") || die "Failed to open utt2spk file $utt2spk_file";
+    while(<U>) {
+        @A = split;
+        @A == 2 || die "Bad line $_ in utt2spk file $utt2spk_file";
+        ($u,$s) = @A;
+        $utt2spk{$u} = $s;
+    }
+    open(I, "<$inscp") || die "Opening input scp file $inscp";
+    @spkrs = ();
+    while(<I>) {
+        @A = split;
+        if(@A == 0) { die "Empty or space-only line in scp file $inscp"; }
+        $u = $A[0];
+        $s = $utt2spk{$u};
+        if(!defined $s) { die "No such utterance $u in utt2spk file $utt2spk_file"; }
+        if(!defined $spk_count{$s}) { 
+            push @spkrs, $s; 
+            $spk_count{$s} = 0;
+            $spk_data{$s} = "";
+        }
+        $spk_count{$s}++;
+        $spk_data{$s} = $spk_data{$s} . $_;
+    }
+    # Now split as equally as possible ..
+    # First allocate spks to files by allocating an approximately
+    # equal number of speakers.
+    $numspks = @spkrs;  # number of speakers.
+    $numscps = @OUTPUTS; # number of output files.
+    for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
+        $scparray[$scpidx] = []; # [] is array reference.
+    }
+    for ($spkidx = 0; $spkidx < $numspks; $spkidx++) {
+        $scpidx = int(($spkidx*$numscps) / $numspks);
+        $spk = $spkrs[$spkidx];
+        push @{$scparray[$scpidx]}, $spk;
+        $scpcount[$scpidx] += $spk_count{$spk};
+    }
+
+    # Now will try to reassign beginning + ending speakers
+    # to different scp's and see if it gets more balanced.
+    # Suppose objf we're minimizing is sum_i (num utts in scp[i] - average)^2.
+    # We can show that if considering changing just 2 scp's, we minimize
+    # this by minimizing the squared difference in sizes.  This is
+    # equivalent to minimizing the absolute difference in sizes.  This
+    # shows this method is bound to converge.
+
+    $changed = 1;
+    while($changed) {
+        $changed = 0;
+        for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
+            # First try to reassign ending spk of this scp.
+            if($scpidx < $numscps-1) {
+                $sz = @{$scparray[$scpidx]};
+                if($sz > 0) {
+                    $spk = $scparray[$scpidx]->[$sz-1];
+                    $count = $spk_count{$spk};
+                    $nutt1 = $scpcount[$scpidx];
+                    $nutt2 = $scpcount[$scpidx+1];
+                    if( abs( ($nutt2+$count) - ($nutt1-$count))
+                        < abs($nutt2 - $nutt1))  { # Would decrease
+                        # size-diff by reassigning spk...
+                        $scpcount[$scpidx+1] += $count;
+                        $scpcount[$scpidx] -= $count;
+                        pop @{$scparray[$scpidx]};
+                        unshift @{$scparray[$scpidx+1]}, $spk;
+                        $changed = 1;
+                    }
+                }
+            }
+            if($scpidx > 0 && @{$scparray[$scpidx]} > 0) {
+                $spk = $scparray[$scpidx]->[0];
+                $count = $spk_count{$spk};
+                $nutt1 = $scpcount[$scpidx-1];
+                $nutt2 = $scpcount[$scpidx];
+                if( abs( ($nutt2-$count) - ($nutt1+$count))
+                    < abs($nutt2 - $nutt1))  { # Would decrease
+                    # size-diff by reassigning spk...
+                    $scpcount[$scpidx-1] += $count;
+                    $scpcount[$scpidx] -= $count;
+                    shift @{$scparray[$scpidx]};
+                    push @{$scparray[$scpidx-1]}, $spk;
+                    $changed = 1;
+                }
+            }
+        }
+    }
+    # Now print out the files...
+    for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
+        $scpfn = $OUTPUTS[$scpidx];
+        open(F, ">$scpfn") || die "Could not open scp file $scpfn for writing.";
+        $count = 0;
+        if(@{$scparray[$scpidx]} == 0) {
+            print STDERR "Error: split_scp.pl producing empty .scp file $scpfn (too many splits and too few speakers?)\n";
+            $error = 1;
+        } else {
+            foreach $spk ( @{$scparray[$scpidx]} ) {
+                print F $spk_data{$spk};
+                $count += $spk_count{$spk};
+            }
+            if($count != $scpcount[$scpidx]) { die "Count mismatch [code error]"; }
+        }
+        close(F);
+    }
+} else { 
+   # This block is the "normal" case where there is no --utt2spk 
+   # option and we just break into equal size chunks.
+
+    open(I, "<$inscp") || die "Opening input scp file $inscp";
+
+    $numscps = @OUTPUTS;  # size of array.
+    @F = ();
+    while(<I>) {
+        push @F, $_;
+    }
+    $numlines = @F;
+    if($numlines == 0) {
+        print STDERR "split_scp.pl: error: empty input scp file $inscp";
+        $error = 1;
+    }
+    $linesperscp = int( $numlines / $numscps); # the "whole part"..
+    $linesperscp >= 1 || die "You are splitting into too many pieces!";
+    $remainder = $numlines - ($linesperscp * $numscps);
+    ($remainder >= 0 && $remainder < $numlines) || die "bad remainder $remainder";
+    # [just doing int() rounds down].
+    $n = 0;
+    for($scpidx = 0; $scpidx < @OUTPUTS; $scpidx++) {
+        $scpfile = $OUTPUTS[$scpidx];
+        open(O, ">$scpfile") || die "Opening output scp file $scpfile";
+        for($k = 0; $k < $linesperscp + ($scpidx < $remainder ? 1 : 0); $k++) {
+            print O $F[$n++];
+        }
+        close(O) || die "Closing scp file $scpfile";
+    }
+    $n == $numlines || die "split_scp.pl: code error., $n != $numlines";
+}
+
+exit ($error ? 1 : 0);
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/subset_data_dir.sh b/egs/kaldi-vystadial-recipe/s5/utils/subset_data_dir.sh
new file mode 100755
index 00000000000..6afb49481db
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/subset_data_dir.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright 2010-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+
+# This script operates on a directory, such as in data/train/,
+# that contains some subset of the following files:
+#  feats.scp
+#  wav.scp
+#  spk2utt
+#  utt2spk
+#  text
+# It creates a subset of that data, consisting of some specified
+# number of utterances.  (The selected utterances are distributed
+# evenly throughout the file, by the program ./subset_scp.pl).
+
+
+# There are three options, none compatible with any other.
+
+# If you give the --per-spk option, it will attempt to select the supplied
+# number of utterances for each speaker (typically you would supply a much
+# smaller number in this case).
+
+# If you give the --shortest option, it will give you the n shortest utterances.
+
+# If you give the --first option it will just give you the n first utterances.
+
+shortest=false
+perspk=false
+first_opt=""
+
+if [ "$1" == "--per-spk" ]; then
+  perspk=true;
+  shift;
+elif [ "$1" == "--shortest" ]; then
+  shortest=true;
+  shift;
+elif [ "$1" == "--first" ]; then
+  first_opt="--first";
+  shift;
+elif [ "$1" == "--last" ]; then
+  first_opt="--last";
+  shift;
+fi
+
+
+
+if [ $# != 3 ]; then
+  echo "Usage: subset_data_dir.sh [--per-spk] <srcdir> <num-utt> <destdir>"
+  exit 1;
+fi
+
+srcdir=$1
+numutt=$2
+destdir=$3
+
+
+if [ ! -f $srcdir/utt2spk ]; then
+  echo "subset_data_dir.sh: no such file $srcdir/utt2spk" 
+  exit 1;
+fi
+
+
+function do_filtering {
+  # assumes the utt2spk and spk2utt files already exist.
+  [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
+  [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
+  [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
+  [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
+  [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
+  if [ -f $srcdir/segments ]; then
+     utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
+     awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
+     # The next line would override the command above for wav.scp, which would be incorrect.
+     [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
+     [ -f $srcdir/reco2file_and_channel ] && \
+       utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
+     rm $destdir/reco
+  fi
+  srcutts=`cat $srcdir/utt2spk | wc -l`
+  destutts=`cat $destdir/utt2spk | wc -l`
+  echo "Retained $numutt utterances per speaker from data-dir $srcdir and put it in $destdir, reducing #utt from $srcutts to $destutts"
+}
+
+
+## scripting note: $perspk evaluates to true or false
+## so this becomes the command true or false.
+if $perspk; then
+  mkdir -p $destdir
+  awk '{ n='$numutt'; printf("%s ",$1); skip=1; while(n*(skip+1) <= NF-1) { skip++; }
+         for(x=2; x<=NF && x <= n*skip; x += skip) { printf("%s ", $x); } 
+         printf("\n"); }' <$srcdir/spk2utt >$destdir/spk2utt
+  utils/spk2utt_to_utt2spk.pl < $destdir/spk2utt > $destdir/utt2spk
+  do_filtering; # bash function.
+  exit 0;
+else
+  if [ $numutt -gt `cat $srcdir/feats.scp | wc -l` ]; then
+    echo "subset_data_dir.sh: cannot subset to more utterances than you originally had."
+    exit 1;
+  fi 
+  mkdir -p $destdir || exit 1;
+
+  ## scripting note: $shortest evaluates to true or false
+  ## so this becomes the command true or false.
+  if $shortest; then
+    # select the n shortest utterances.
+    . ./path.sh
+    [ ! -f $srcdir/feats.scp ] && echo "$0: you selected --shortest but no feats.scp exist." && exit 1;
+    feat-to-len scp:$srcdir/feats.scp ark,t:$destdir/tmp.len || exit 1;
+    sort -n -k2 $destdir/tmp.len | awk '{print $1}' | head -$numutt >$destdir/tmp.uttlist
+    utils/filter_scp.pl $destdir/tmp.uttlist $srcdir/utt2spk >$destdir/utt2spk
+    rm $destdir/tmp.uttlist $destdir/tmp.len
+  else
+    utils/subset_scp.pl $first_opt $numutt $srcdir/utt2spk > $destdir/utt2spk || exit 1;
+  fi
+  utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
+  do_filtering;
+  exit 0;
+fi
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/subset_scp.pl b/egs/kaldi-vystadial-recipe/s5/utils/subset_scp.pl
new file mode 100755
index 00000000000..2ee3b338027
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/subset_scp.pl
@@ -0,0 +1,84 @@
+#!/usr/bin/perl -w
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# This program selects a subset of N elements in the scp.
+
+# By default, it selects them evenly from throughout the scp, in order to avoid
+# selecting too many from the same speaker.  It prints them on the standard
+# output.
+# With the option --first, it just selects the N first utterances.
+
+
+$first = 0;
+$last = 0;
+if ($ARGV[0] eq "--first") {
+  shift;
+  $first = 1;
+}
+if ($ARGV[0] eq "--last") {
+  shift;
+  $last = 1;
+}
+
+if(@ARGV < 2 ) {
+    die "Usage: subset_scp.pl N in.scp ";
+}
+
+$N = shift @ARGV;
+if($N == 0) {
+    die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
+}
+$inscp = shift @ARGV;
+open(I, "<$inscp") || die "Opening input scp file $inscp";
+
+@F = ();
+while(<I>) {
+    push @F, $_;
+}
+$numlines = @F;
+if($N > $numlines) {
+    die "You requested from subset_scp.pl more elements than available: $N > $numlines";
+}
+
+sub select_n {
+    my ($start,$end,$num_needed) = @_;
+    my $diff = $end - $start;
+    if($num_needed > $diff) { die "select_n: code error"; }
+    if($diff == 1 ) {
+        if($num_needed  > 0) {
+            print $F[$start];
+        }
+    } else {
+        my $halfdiff = int($diff/2);
+        my $halfneeded = int($num_needed/2);
+        select_n($start, $start+$halfdiff, $halfneeded);
+        select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
+    }
+}
+
+if ( ! $first && ! $last) {
+  select_n(0, $numlines, $N);
+} else {
+  if ($first) { # --first option: same as head.
+    for ($n = 0; $n < $N; $n++) {
+      print $F[$n];
+    }
+  } else { # --last option: same as tail.
+    for ($n = @F - $N; $n < @F; $n++) {
+      print $F[$n];
+    }
+  }
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/summarize_warnings.pl b/egs/kaldi-vystadial-recipe/s5/utils/summarize_warnings.pl
new file mode 100755
index 00000000000..ccbeb4186b9
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/summarize_warnings.pl
@@ -0,0 +1,46 @@
+#!/usr/bin/perl
+
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+ @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
+
+$dir = $ARGV[0];
+
+! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
+
+$dir =~ s:/$::; # Remove trailing slash.
+
+
+# Group the files into categories where all have the same base-name.
+foreach $f (glob ("$dir/*.log")) {
+  $f_category = $f;
+  # do next expression twice; s///g doesn't work as they overlap.
+  $f_category =~ s:\.\d+\.:.*.:;
+  $f_category =~ s:\.\d+\.:.*.:;
+  $fmap{$f_category} .= " $f";
+}
+
+sub split_hundreds { # split list of filenames into groups of 100.
+  my $names = shift @_;
+  my @A = split(" ", $names);
+  my @ans = ();
+  while (@A > 0) {
+    my $group = "";
+    for ($x = 0; $x < 100 && @A>0; $x++) {
+      $fname = pop @A;
+      $group .= "$fname ";
+    }
+    push @ans, $group;
+  }
+  return @ans;
+}
+
+foreach $c (keys %fmap) {
+  $n = 0;
+  foreach $fgroup (split_hundreds($fmap{$c})) {
+    $n += `grep -w WARNING $fgroup | wc -l`;
+  }
+  if ($n != 0) {
+    print "$n warnings in $c\n"
+  }
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/sym2int.pl b/egs/kaldi-vystadial-recipe/s5/utils/sym2int.pl
new file mode 100755
index 00000000000..f7334b7b4dd
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/sym2int.pl
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+# Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+$ignore_oov = 0;
+$ignore_first_field = 0;
+for($x = 0; $x < 2; $x++) {
+  if ($ARGV[0] eq "--map-oov") {
+    shift @ARGV; $map_oov = shift @ARGV;
+  }
+  if ($ARGV[0] eq "-f") {
+    shift @ARGV; 
+    $field_spec = shift @ARGV; 
+    if ($field_spec =~ m/^\d+$/) {
+      $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
+    }
+    if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
+      if ($1 ne "") {
+        $field_begin = $1 - 1;  # Change to zero-based indexing.
+      }
+      if ($2 ne "") {
+        $field_end = $2 - 1;    # Change to zero-based indexing.
+      }
+    }
+    if (!defined $field_begin && !defined $field_end) {
+      die "Bad argument to -f option: $field_spec"; 
+    }
+  }
+}
+
+$symtab = shift @ARGV;
+if (!defined $symtab) {
+  print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
+    "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
+      "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
+}
+open(F, "<$symtab") || die "Error opening symbol table file $symtab";
+while(<F>) {
+    @A = split(" ", $_);
+    @A == 2 || die "bad line in symbol table file: $_";
+    $sym2int{$A[0]} = $A[1] + 0;
+}
+
+if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
+  if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
+  $map_oov = $sym2int{$map_oov};
+}
+
+$num_warning = 0;
+# $max_warning = 20;  # Original
+$max_warning = 2000; # Ondra Changed
+
+while (<>) {
+  @A = split(" ", $_);
+  if (@A == 0) {
+    die "Empty line in transcriptions input.";
+  }
+  @B = ();
+  for ($n = 0; $n < @A; $n++) {
+    $a = $A[$n];
+    if ( (!defined $field_begin || $n >= $field_begin)
+         && (!defined $field_end || $n <= $field_end)) {
+      $i = $sym2int{$a};
+      if (!defined ($i)) {
+        if (defined $map_oov) {
+          if ($num_warning++ < $max_warning) {
+            print STDERR "sym2int.pl: replacing $a with $map_oov\n";
+            if ($num_warning == $max_warning) {
+              print STDERR "sym2int.pl: not warning for OOVs any more times\n";
+            }
+          }
+          $i = $map_oov;
+        } else {
+          $pos = $n+1;
+          die "sym2int.pl: undefined symbol $a (in position $pos)\n";
+        }
+      }
+      $a = $i;
+    }
+    push @B, $a;
+  }
+  print join(" ", @B);
+  print "\n";
+}
+
+exit(0);
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/utt2spk_to_spk2utt.pl b/egs/kaldi-vystadial-recipe/s5/utils/utt2spk_to_spk2utt.pl
new file mode 100755
index 00000000000..0c9e6417c82
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/utt2spk_to_spk2utt.pl
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# converts an utt2spk file to a spk2utt file.
+# Takes input from the stdin or from a file argument;
+# output goes to the standard out.
+
+if ( @ARGV > 1 ) {
+    die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
+}
+
+while(<>){ 
+    @A = split(" ", $_);
+    @A == 2 || die "Invalid line in utt2spk file: $_";
+    ($u,$s) = @A;
+    if(!$seen_spk{$s}) {
+        $seen_spk{$s} = 1;
+        push @spklist, $s;
+    }
+    $uttlist{$s} = $uttlist{$s} . "$u ";
+}
+foreach $s (@spklist) {
+    $l = $uttlist{$s};
+    $l =~ s: $::; # remove trailing space.
+    print "$s $l\n";
+}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/validate_dict_dir.pl b/egs/kaldi-vystadial-recipe/s5/utils/validate_dict_dir.pl
new file mode 100755
index 00000000000..7654e8ffcdb
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/validate_dict_dir.pl
@@ -0,0 +1,142 @@
+#!/usr/bin/perl
+
+# Guoguo Chen (guoguo@jhu.edu)
+#
+# Validation script for data/local/dict
+
+if(@ARGV != 1) {
+  die "Usage: validate_dict_dir.pl dict_directory\n";
+}
+
+$dict = shift @ARGV;
+
+$exit = 0;
+# Checking silence_phones.txt -------------------------------
+print "Checking $dict/silence_phones.txt ...\n";
+if(-z "$dict/silence_phones.txt") {print "--> ERROR: $dict/silence_phones.txt is empty or not exists\n"; exit 1;}
+if(!open(S, "<$dict/silence_phones.txt")) {print "--> ERROR: fail to open $dict/silence_phones.txt\n"; exit 1;}
+$idx = 1;
+%silence = ();
+$success = 1;
+print "--> reading $dict/silence_phones.txt\n";
+while(<S>) {
+  chomp;
+  my @col = split(" ", $_);
+  foreach(0 .. @col-1) {
+    if($silence{@col[$_]}) {$exit = 1; print "--> ERROR: phone \"@col[$_]\" duplicates in $dict/silence_phones.txt (line $idx)\n"; $success = 0;}
+    else {$silence{@col[$_]} = 1;}
+  }
+  $idx ++;
+}
+close(S);
+$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
+print "\n";
+
+# Checking optional_silence.txt -------------------------------
+print "Checking $dict/optional_silence.txt ...\n";
+if(-z "$dict/optional_silence.txt") {print "--> ERROR: $dict/optional_silence.txt is empty or not exists\n"; exit 1;}
+if(!open(OS, "<$dict/optional_silence.txt")) {print "--> ERROR: fail to open $dict/optional_silence.txt\n"; exit 1;}
+$idx = 1;
+$success = 1;
+print "--> reading $dict/optional_silence.txt\n";
+while(<OS>) {
+  chomp;
+  my @col = split(" ", $_);
+  if ($idx > 1 or @col > 1) {
+    $exit = 1; print "--> ERROR: only 1 phone expected in $dict/optional_silence.txt\n"; $success = 0;
+  } elsif (!$silence{$col[0]}) {
+    $exit = 1; print "--> ERROR: phone $col[0] not found in $dict/silence_phones.txt\n"; $success = 0;
+  }
+  $idx ++;
+}
+close(OS);
+$success == 0 || print "--> $dict/optional_silence.txt is OK\n";
+print "\n";
+
+# Checking nonsilence_phones.txt -------------------------------
+print "Checking $dict/nonsilence_phones.txt ...\n";
+if(-z "$dict/nonsilence_phones.txt") {print "--> ERROR: $dict/nonsilence_phones.txt is empty or not exists\n"; exit 1;}
+if(!open(NS, "<$dict/nonsilence_phones.txt")) {print "--> ERROR: fail to open $dict/nonsilence_phones.txt\n"; exit 1;}
+$idx = 1;
+%nonsilence = ();
+$success = 1;
+print "--> reading $dict/nonsilence_phones.txt\n";
+while(<NS>) {
+  chomp;
+  my @col = split(" ", $_);
+  foreach(0 .. @col-1) {
+    if($nonsilence{@col[$_]}) {$exit = 1; print "--> ERROR: phone \"@col[$_]\" duplicates in $dict/nonsilence_phones.txt (line $idx)\n"; $success = 0;}
+    else {$nonsilence{@col[$_]} = 1;}
+  }
+  $idx ++;
+}
+close(NS);
+$success == 0 || print "--> $dict/silence_phones.txt is OK\n";
+print "\n";
+
+# Checking disjoint -------------------------------
+sub intersect {
+  my ($a, $b) = @_;
+  @itset = ();
+  %itset = ();
+  foreach(keys %$a) {
+    if(exists $b->{$_} and !$itset{$_}) {
+      push(@itset, $_);
+      $itset{$_} = 1;
+    }
+  }
+  return @itset;
+}
+
+print "Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n";
+@itset = intersect(\%silence, \%nonsilence);
+if(@itset == 0) {print "--> disjoint property is OK.\n";}
+else {$exit = 1; print "--> ERROR: silence_phones.txt and nonsilence_phones.txt has overlop: "; foreach(@itset) {print "$_ ";} print "\n";}
+print "\n";
+
+# Checking lexicon.txt -------------------------------
+print "Checking $dict/lexicon.txt\n";
+if(-z "$dict/lexicon.txt") {$exit = 1; print "--> ERROR: $dict/lexicon.txt is empty or not exists\n";}
+if(!open(L, "<$dict/lexicon.txt")) {$exit = 1; print "--> ERROR: fail to open $dict/lexicon.txt\n";}
+$idx = 1;
+$success = 1;
+print "--> reading $dict/lexicon.txt\n";
+while(<L>) {
+  chomp;
+  my @col = split(" ", $_);
+  $word = shift @col;
+  foreach(0 .. @col-1) {
+    if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
+      $exit = 1; print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt (line $idx)\n"; 
+      $success = 0;
+    }
+  }
+  $idx ++;
+}
+close(L);
+$success == 0 || print "--> $dict/lexicon.txt is OK\n";
+print "\n";
+
+# Checking extra_questions.txt -------------------------------
+print "Checking $dict/extra_questions.txt ...\n";
+if(-s "$dict/extra_questions.txt") {
+  if(!open(EX, "<$dict/extra_questions.txt")) {$exit = 1; print "--> ERROR: fail to open $dict/extra_questions.txt\n";}
+  $idx = 1;
+  $success = 1;
+  print "--> reading $dict/extra_questions.txt\n";
+  while(<EX>) {
+    chomp;
+    my @col = split(" ", $_);
+    foreach(0 .. @col-1) {
+      if(!$silence{@col[$_]} and !$nonsilence{@col[$_]}) {
+        $exit = 1; print "--> ERROR: phone \"@col[$_]\" is not in {, non}silence.txt (line $idx, block ", $_+1, ")\n"; 
+        $success = 0;
+      }
+    }
+    $idx ++;
+  } 
+  close(EX);
+  $success == 0 || print "--> $dict/extra_questions.txt is OK\n";
+} else {print "--> $dict/extra_phones.txt is empty\n";}
+
+if($exit == 1) {exit 1;}
diff --git a/egs/kaldi-vystadial-recipe/s5/utils/validate_lang.pl b/egs/kaldi-vystadial-recipe/s5/utils/validate_lang.pl
new file mode 100755
index 00000000000..ab74dbda983
--- /dev/null
+++ b/egs/kaldi-vystadial-recipe/s5/utils/validate_lang.pl
@@ -0,0 +1,501 @@
+#!/usr/bin/perl
+
+# Guoguo Chen (guoguo@jhu.edu)
+#
+# Validation script for data/lang
+
+if(@ARGV != 1) {
+  die "Usage: validate_lang.pl lang_directory\n";
+}
+
+$lang = shift @ARGV;
+$exit = 0;
+# Checking phones.txt -------------------------------
+print "Checking $lang/phones.txt ...\n";
+if(-z "$lang/phones.txt") {print "--> ERROR: $lang/phones.txt is empty or not exists\n"; exit 1;}
+if(!open(P, "<$lang/phones.txt")) {print "--> ERROR: fail to open $lang/phones.txt\n"; exit 1;}
+$idx = 1;
+%psymtab = ();
+while(<P>) {
+  chomp;
+  my @col = split(" ", $_);
+  if(@col != 2) {print "--> ERROR: expect 2 columns in $lang/phones.txt (break at line $idx)\n"; exit 1;}
+  my $phone = shift @col;
+  my $id = shift @col;
+  $psymtab{$phone} = $id;
+  $idx ++;
+}
+close(P);
+%pint2sym = (); 
+foreach(keys %psymtab) {
+  if($pint2sym{$psymtab{$_}}) {print "--> ERROR: ID \"$psymtab{$_}\" duplicates\n"; exit 1;} 
+  else {$pint2sym{$psymtab{$_}} = $_;}
+}
+print "--> $lang/phones.txt is OK\n";
+print "\n";
+
+# Check word.txt -------------------------------
+print "Checking words.txt: #0 ...\n";
+if(-z "$lang/words.txt") {print "--> ERROR: $lang/words.txt is empty or not exists\n"; exit 1;}
+if(!open(W, "<$lang/words.txt")) {print "--> ERROR: fail to open $lang/words.txt\n"; exit 1;}
+$idx = 1;
+%wsymtab = ();
+while(<W>) {
+  chomp;
+  my @col = split(" ", $_);
+  if(@col != 2) {print "--> ERROR: expect 2 columns in $lang/words.txt (line $idx)\n"; exit 1;}
+  $word = shift @col;
+  $id = shift @col;
+  $wsymtab{$word} = $id;
+  $idx ++;
+}
+close(W);
+%wint2sym = (); 
+foreach(keys %wsymtab) {
+  if($wint2sym{$wsymtab{$_}}) {print "--> ERROR: ID \"$wsymtab{$_}\" duplicates\n"; exit 1;} 
+  else {$wint2sym{$wsymtab{$_}} = $_;}
+}
+if(exists $wsymtab{"#0"}) {
+  print "--> $lang/words.txt has \"#0\"\n";
+  print "--> $lang/words.txt is OK\n";
+} else {print "--> ERROR: $lang/words.txt doesn't have \"#0\"\n"; $exit = 1;}
+print "\n";
+
+# Checking phones/* -------------------------------
+sub check_txt_int_csl {
+  my ($cat, $symtab) = @_;
+  print "Checking $cat.\{txt, int, csl\} ...\n";
+  if(-z "$cat.txt") {$exit = 1; return print "--> ERROR: $cat.txt is empty or not exists\n";}
+  if(-z "$cat.int") {$exit = 1; return print "--> ERROR: $cat.int is empty or not exists\n";}
+  if(-z "$cat.csl") {$exit = 1; return print "--> ERROR: $cat.csl is empty or not exists\n";}
+  if(!open(TXT, "<$cat.txt")) {$exit = 1; return print "--> ERROR: fail to open $cat.txt\n";}
+  if(!open(INT, "<$cat.int")) {$exit = 1; return print "--> ERROR: fail to open $cat.int\n";}
+  if(!open(CSL, "<$cat.csl")) {$exit = 1; return print "--> ERROR: fail to open $cat.csl\n";}
+
+  $idx1 = 1;
+  while(<TXT>) {
+    chomp;
+    my @col = split(" ", $_);
+    if(@col != 1) {$exit = 1; return print "--> ERROR: expect 1 column in $cat.txt (break at line $idx1)\n";}
+    $entry[$idx1] = shift @col;
+    $idx1 ++;
+  }
+  close(TXT); $idx1 --;
+  print "--> $idx1 entry/entries in $cat.txt\n";
+
+  $idx2 = 1;
+  while(<INT>) {
+    chomp;
+    my @col = split(" ", $_);
+    if(@col != 1) {$exit = 1; return print "--> ERROR: expect 1 column in $cat.int (break at line $idx2)\n";}
+    if($symtab->{$entry[$idx2]} ne shift @col) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line $idx2)\n";}
+    $idx2 ++;
+  }
+  close(INT); $idx2 --;
+  if($idx1 != $idx2) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line ", $idx2+1, ")\n";}
+  print "--> $cat.int corresponds to $cat.txt\n";
+
+  $idx3 = 1;
+  while(<CSL>) {
+    chomp;
+    my @col = split(":", $_);
+    if(@col != $idx1) {$exit = 1; return print "--> ERROR: expect $idx1 block/blocks in $cat.csl (break at line $idx3)\n";}
+    foreach(1 .. $idx1) {
+      if($symtab->{$entry[$_]} ne @col[$_-1]) {$exit = 1; return print "--> ERROR: $cat.csl doesn't correspond to $cat.txt (break at line $idx3, block $_)\n";}
+    }
+    $idx3 ++;
+  }
+  close(CSL); $idx3 --;
+  if($idx3 != 1) {$exit = 1; return print "--> ERROR: expect 1 row in $cat.csl (break at line ", $idx3+1, ")\n";}
+  print "--> $cat.csl corresponds to $cat.txt\n";
+
+  return print "--> $cat.\{txt, int, csl\} are OK\n";
+}
+
+sub check_txt_int {
+  my ($cat, $symtab) = @_;
+  print "Checking $cat.\{txt, int\} ...\n";
+  if(-z "$cat.txt") {$exit = 1; return print "--> ERROR: $cat.txt is empty or not exists\n";}
+  if(-z "$cat.int") {$exit = 1; return print "--> ERROR: $cat.int is empty or not exists\n";}
+  if(!open(TXT, "<$cat.txt")) {$exit = 1; return print "--> ERROR: fail to open $cat.txt\n";}
+  if(!open(INT, "<$cat.int")) {$exit = 1; return print "--> ERROR: fail to open $cat.int\n";}
+
+  $idx1 = 1;
+  while(<TXT>) {
+    chomp;
+    s/^(shared|not-shared) (split|not-split) //g;
+    s/ nonword$//g;
+    s/ begin$//g;
+    s/ end$//g;
+    s/ internal$//g;
+    s/ singleton$//g;
+    $entry[$idx1] = $_;
+    $idx1 ++; 
+  }
+  close(TXT); $idx1 --;
+  print "--> $idx1 entry/entries in $cat.txt\n";
+
+  $idx2 = 1;
+  while(<INT>) {
+    chomp;
+    s/^(shared|not-shared) (split|not-split) //g;
+    s/ nonword$//g;
+    s/ begin$//g;
+    s/ end$//g;
+    s/ internal$//g;
+    s/ singleton$//g;
+    my @col = split(" ", $_);
+    @set = split(" ", $entry[$idx2]);
+    if(@set != @col) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line $idx2)\n";}
+    foreach(0 .. @set-1) {
+      if($symtab->{@set[$_]} ne @col[$_]) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line $idx2, block " ,$_+1, ")\n";}
+    }
+    $idx2 ++;
+  }
+  close(INT); $idx2 --;
+  if($idx1 != $idx2) {$exit = 1; return print "--> ERROR: $cat.int doesn't correspond to $cat.txt (break at line ", $idx2+1, ")\n";}
+  print "--> $cat.int corresponds to $cat.txt\n";
+
+  return print "--> $cat.\{txt, int\} are OK\n";
+}
+
+@list1 = ("context_indep", "disambig", "nonsilence", "silence", "optional_silence");
+@list2 = ("extra_questions", "roots", "sets");
+foreach(@list1) {
+  check_txt_int_csl("$lang/phones/$_", \%psymtab); print "\n";
+}
+foreach(@list2) {
+  check_txt_int("$lang/phones/$_", \%psymtab); print "\n";
+}
+if(-e "$lang/phones/word_boundary.txt") {
+  check_txt_int("$lang/phones/word_boundary", \%psymtab); print "\n";
+}
+
+# Check disjoint and summation -------------------------------
+sub intersect {
+  my ($a, $b) = @_;
+  @itset = ();
+  %itset = ();
+  foreach(keys %$a) {
+    if(exists $b->{$_} and !$itset{$_}) {
+      push(@itset, $_);
+      $itset{$_} = 1;
+    }
+  }
+  return @itset;
+}
+
+sub check_disjoint {
+  print "Checking disjoint: silence.txt, nosilenct.txt, disambig.txt ...\n";
+  if(!open(S, "<$lang/phones/silence.txt"))    {$exit = 1; return print "--> ERROR: fail to open $lang/phones/silence.txt\n";}
+  if(!open(N, "<$lang/phones/nonsilence.txt")) {$exit = 1; return print "--> ERROR: fail to open $lang/phones/nonsilence.txt\n";}
+  if(!open(D, "<$lang/phones/disambig.txt"))   {$exit = 1; return print "--> ERROR: fail to open $lang/phones/disambig.txt\n";}
+
+  $idx = 1;
+  while(<S>) {
+    chomp;
+    my @col = split(" ", $_);
+    $phone = shift @col;
+    if($silence{$phone}) {$exit = 1; print "--> ERROR: phone \"$phone\" duplicates in $lang/phones/silence.txt (line $idx)\n";}
+    $silence{$phone} = 1;
+    push(@silence, $phone);
+    $idx ++;
+  }
+  close(S);
+
+  $idx = 1; 
+  while(<N>) {
+    chomp;
+    my @col = split(" ", $_);
+    $phone = shift @col;
+    if($nonsilence{$phone}) {$exit = 1; print "--> ERROR: phone \"$phone\" duplicates in $lang/phones/nonsilence.txt (line $idx)\n";}
+    $nonsilence{$phone} = 1;
+    push(@nonsilence, $phone);
+    $idx ++;
+  }
+  close(N);
+
+  $idx = 1;
+  while(<D>) {
+    chomp;
+    my @col = split(" ", $_);
+    $phone = shift @col;
+    if($disambig{$phone}) {$exit = 1; print "--> ERROR: phone \"$phone\" duplicates in $lang/phones/disambig.txt (line $idx)\n";}
+    $disambig{$phone} = 1;
+    $idx ++;
+  }
+  close(D);
+
+  my @itsect1 = intersect(\%silence, \%nonsilence);
+  my @itsect2 = intersect(\%silence, \%disambig);
+  my @itsect3 = intersect(\%disambig, \%nonsilence);
+
+  $success = 1;
+  if(@itsect1 != 0) {
+    $success = 0;
+    $exit = 1; print "--> ERROR: silence.txt and nonsilence.txt have intersection -- ";
+    foreach(@itsect1) {
+      print $_, " ";
+    }
+    print "\n";
+  } else {print "--> silence.txt and nonsilence.txt are disjoint\n";}
+
+  if(@itsect2 != 0) {
+    $success = 0;
+    $exit = 1; print "--> ERROR: silence.txt and disambig.txt have intersection -- ";
+    foreach(@itsect2) {
+      print $_, " ";
+    }
+    print "\n";
+  } else {print "--> silence.txt and disambig.txt are disjoint\n";}
+
+  if(@itsect3 != 0) {
+    $success = 0;
+    $exit = 1; print "--> ERROR: disambig.txt and nonsilence.txt have intersection -- ";
+    foreach(@itsect1) {
+      print $_, " ";
+    }
+    print "\n";
+  } else {print "--> disambig.txt and nonsilence.txt are disjoint\n";}
+
+  $success == 0 || print "--> disjoint property is OK\n";
+  return;
+}
+
+sub check_summation {
+  print "Checking sumation: silence.txt, nonsilence.txt, disambig.txt ...\n";
+  if(scalar(keys %silence) == 0)      {$exit = 1; return print "--> ERROR: $lang/phones/silence.txt is empty or not exists\n";}
+  if(scalar(keys %nonsilence) == 0)   {$exit = 1; return print "--> ERROR: $lang/phones/nonsilence.txt is empty or not exists\n";}
+  if(scalar(keys %disambig) == 0)     {$exit = 1; return print "--> ERROR: $lang/phones/disambig.txt is empty or not exists\n";}
+
+  %sum = (%silence, %nonsilence, %disambig);
+  $sum{"<eps>"} = 1;
+
+  my @itset = intersect(\%sum, \%psymtab);
+  my @key1 = keys %sum;
+  my @key2 = keys %psymtab;
+  my %itset = (); foreach(@itset) {$itset{$_} = 1;}
+  if(@itset < @key1) {
+    $exit = 1; print "--> ERROR: phones in silence.txt, nonsilence.txt, disambig.txt but not in phones.txt -- ";
+    foreach(@key1) {
+      if(!$itset{$_}) {print "$_ ";}
+    }
+    print "\n";
+  }
+
+  if(@itset < @key2) {
+    $exit = 1; print "--> ERROR: phones in phones.txt but not in silence.txt, nonsilence.txt, disambig.txt -- ";
+    foreach(@key2) {
+      if(!$itset{$_}) {print "$_ ";}
+    }
+    print "\n";
+  }
+
+  if(@itset == @key1 and @itset == @key2) {
+    print "--> summation property is OK\n";
+  }
+  return;
+}
+
+%silence = ();
+@silence = ();
+%nonsilence = ();
+@nonsilence = ();
+%disambig = ();
+check_disjoint; print "\n";
+check_summation; print "\n";
+
+# Checking optional_silence.txt -------------------------------
+print "Checking optional_silence.txt ...\n";
+$idx = 1;
+$success = 1;
+if(-z "$lang/phones/optional_silence.txt") {$exit = 1; $success = 0; print "--> ERROR: $lang/phones/optional_silence.txt is empty or not exists\n";}
+if(!open(OS, "<$lang/phones/optional_silence.txt")) {$exit = 1; $success = 0; print "--> ERROR: fail to open $lang/phones/optional_silence.txt\n";}
+print "--> reading $lang/phones/optional_silence.txt\n";
+while(<OS>) {
+  chomp;
+  my @col = split(" ", $_);
+  if ($idx > 1 or @col > 1) {
+    $exit = 1; print "--> ERROR: only 1 phone expected in $lang/phones/optional_silence.txt\n"; $success = 0;
+  } elsif (!$silence{$col[0]}) {
+    $exit = 1; print "--> ERROR: phone $col[0] not found in $lang/phones/silence_phones.txt\n"; $success = 0;
+  }
+  $idx ++;
+}
+close(OS);
+$success == 0 || print "--> $lang/phones/optional_silence.txt is OK\n";
+print "\n";
+
+# Check disambiguation symbols -------------------------------
+print "Checking disambiguation symbols: #0 and #1\n";
+if(scalar(keys %disambig) == 0) {$exit = 1; print "--> ERROR: $lang/phones/disambig.txt is empty or not exists\n";}
+if(exists $disambig{"#0"} and exists $disambig{"#1"}) {
+  print "--> $lang/phones/disambig.txt has \"#0\" and \"#1\"\n";
+  print "--> $lang/phones/disambig.txt is OK\n\n";
+} else {
+  $exit = 1; print "--> ERROR: $lang/phones/disambig.txt doesn't have \"#0\" or \"#1\"\n";
+}
+
+
+# Check topo -------------------------------
+print "Checking topo ...\n";
+if(-z "$lang/topo") {$exit = 1; print "--> ERROR: $lang/topo is empty or not exists\n";}
+if(!open(T, "<$lang/topo")) {$exit = 1; print "--> ERROR: fail to open $lang/topo\n";}
+$idx = 1;
+while(<T>) {
+  chomp;
+  next if(m/^<.*>[ ]*$/);
+  if($idx == 1) {$nonsilence_seq = $_; $idx ++;}
+  if($idx == 2) {$silence_seq = $_;}
+}
+close(T);
+if($silence_seq == 0 || $nonsilence_seq == 0) {$exit = 1; print "--> ERROR: $lang/topo doesn't have nonsilence section or silence section\n";}
+@silence_seq = split(" ", $silence_seq);
+@nonsilence_seq = split(" ", $nonsilence_seq);
+$success1 = 1;
+if(@nonsilence_seq != @nonsilence) {$exit = 1; print "--> ERROR: $lang/topo's nonsilence section doesn't correspond to nonsilence.txt\n";}
+else {
+  foreach(0 .. scalar(@nonsilence)-1) {
+    if($psymtab{@nonsilence[$_]} ne @nonsilence_seq[$_]) {
+      $exit = 1; print "--> ERROR: $lang/topo's nonsilence section doesn't correspond to nonsilence.txt\n";
+      $success = 0;
+    }
+  }
+}
+$success1 != 1 || print "--> $lang/topo's nonsilence section is OK\n";
+$success2 = 1;
+if(@silence_seq != @silence) {$exit = 1; print "--> ERROR: $lang/topo's silence section doesn't correspond to silence.txt\n";}
+else {
+  foreach(0 .. scalar(@silence)-1) {
+    if($psymtab{@silence[$_]} ne @silence_seq[$_]) {
+      $exit = 1; print "--> ERROR: $lang/topo's silence section doesn't correspond to silence.txt\n";
+      $success = 0;
+    }
+  }
+}
+$success2 != 1 || print "--> $lang/topo's silence section is OK\n";
+$success1 != 1 or $success2 != 1 || print "--> $lang/topo is OK\n";
+print "\n";
+
+# Check word_boundary -------------------------------
+$nonword   = "";
+$begin     = "";
+$end       = "";
+$internal  = "";
+$singleton = "";
+if(-s "$lang/phones/word_boundary.txt") {
+  print "Checking word_boundary.txt: silence.txt, nonsilence.txt, disambig.txt ...\n";
+  if(!open (W, "<$lang/phones/word_boundary.txt")) {$exit = 1; print "--> ERROR: fail to open $lang/phones/word_boundary.txt\n";}
+  $idx = 1;
+  %wb = ();
+  while(<W>) {
+    chomp;
+    my @col;
+    if (m/^.*nonword$/  ) {s/ nonword//g;    @col = split(" ", $_); if (@col == 1) {$nonword   .= "$col[0] ";}}
+    if (m/^.*begin$/    ) {s/ begin$//g;     @col = split(" ", $_); if (@col == 1) {$begin     .= "$col[0] ";}}
+    if (m/^.*end$/      ) {s/ end$//g;       @col = split(" ", $_); if (@col == 1) {$end       .= "$col[0] ";}}
+    if (m/^.*internal$/ ) {s/ internal$//g;  @col = split(" ", $_); if (@col == 1) {$internal  .= "$col[0] ";}}
+    if (m/^.*singleton$/) {s/ singleton$//g; @col = split(" ", $_); if (@col == 1) {$singleton .= "$col[0] ";}}
+    if(@col != 1) {$exit = 1; print "--> ERROR: expect 1 column in $lang/phones/word_boundary.txt (line $idx)\n";}
+    $wb{shift @col} = 1;
+    $idx ++;
+  }
+  close(W);
+
+  @itset = intersect(\%disambig, \%wb);
+  $success1 = 1;
+  if(@itset != 0) {
+    $success1 = 0;
+    $exit = 1; print "--> ERROR: $lang/phones/word_boundary.txt has disambiguation symbols -- ";
+    foreach(@itset) {print "$_ ";}
+    print "\n";
+  }
+  $success1 == 0 || print "--> $lang/phones/word_boundary.txt doesn't include disambiguation symbols\n";
+
+  %sum = (%silence, %nonsilence);
+  @itset = intersect(\%sum, \%wb);
+  %itset = (); foreach(@itset) {$itset{$_} = 1;}
+  $success2 = 1;
+  if(@itset < scalar(keys %sum)) {
+    $success2 = 0;
+    $exit = 1; print "--> ERROR: phones in nonsilence.txt and silence.txt but not in word_boundary.txt -- ";
+    foreach(keys %sum) {
+      if(!$itset{$_}) {print "$_ ";}            
+    }
+    print "\n";
+  }
+  if(@itset < scalar(keys %wb)) {
+    $success2 = 0;
+    $exit = 1; print "--> ERROR: phones in word_boundary.txt but not in nonsilence.txt or silence.txt -- ";
+    foreach(keys %wb) {
+      if(!$itset{$_}) {print "$_ ";}
+    }
+    print "\n";
+  }
+  $success2 == 0 || print "--> $lang/phones/word_boundary.txt is the union of nonsilence.txt and silence.txt\n";
+  $success1 != 1 or $success2 != 1 || print "--> $lang/phones/word_boundary.txt is OK\n";
+
+
+  # Check L.fst -------------------------------
+  print "--> checking L.fst and L_disambig.fst...\n";
+  $nonword   =~ s/ $//g;
+  $nonword   =~ s/ / |/g;
+  $begin     =~ s/ $//g;
+  $begin     =~ s/ / |/g;
+  $end       =~ s/ $//g;
+  $end       =~ s/ / |/g;
+  $internal  =~ s/ $//g;
+  $internal  =~ s/ / |/g;
+  $singleton =~ s/ $//g;
+  $singleton =~ s/ / |/g;
+  
+  # Now handle the escape characters
+  foreach $esc(("^", "\$", "(", ")", "/", "@", "[", "]", "{", "}", "?", ".", "+", "*")) {
+    $tmp = "\\" . $esc;
+    $nonword   =~ s/$tmp/\\$esc/g;
+    $begin     =~ s/$tmp/\\$esc/g;
+    $end       =~ s/$tmp/\\$esc/g;
+    $internal  =~ s/$tmp/\\$esc/g;
+    $singleton =~ s/$tmp/\\$esc/g;
+  }
+
+  $wlen = int(rand(100)) + 1;
+  print "--> generating a $wlen words sequence\n";
+  $wordseq = "";
+  $sid = 0;
+  foreach(1 .. $wlen) {
+    $id = int(rand(scalar(%wint2sym)));
+    while($wint2sym{$id} =~ m/^#[0-9]*$/ or $id == 0) {$id = int(rand(scalar(%wint2sym)));}
+    $wordseq = $wordseq . "$sid ". ($sid + 1) . " $id $id 0\n";
+    $sid ++;
+  }
+  $wordseq = $wordseq . "$sid 0";
+  $phoneseq = `echo \"$wordseq" | fstcompile > tmp.fst; fstcompose $lang/L.fst tmp.fst | fstproject | fstrandgen | fstrmepsilon | fsttopsort | fstprint --isymbols=$lang/phones.txt --osymbols=$lang/phones.txt | awk '{if(NF > 2) {print \$3}}'; rm tmp.fst`;
+  $phoneseq =~ s/\s/ /g;
+  $phoneseq =~ m/^($nonword )*(((($begin )($internal )*($end ))|($singleton ))($nonword )*){$wlen}$/;
+  if(length($2) == 0) {
+    $exit = 1; print "--> ERROR: resulting phone sequence from L.fst doesn't correspond to the word sequence; check L.log.fst\n";
+    open(LOG, ">L.log.fst"); print LOG $wordseq; close(LOG);
+  } else {
+    print "--> resulting phone sequence from L.fst corresponds to the word sequence\n";
+    print "--> L.fst is OK\n";
+  }
+
+  $phoneseq = `echo \"$wordseq" | fstcompile > tmp.fst; fstcompose $lang/L_disambig.fst tmp.fst | fstproject | fstrandgen | fstrmepsilon | fsttopsort | fstprint --isymbols=$lang/phones.txt --osymbols=$lang/phones.txt | awk '{if(NF > 2) {print \$3}}'; rm tmp.fst`;
+  $phoneseq =~ s/\s/ /g;
+  $phoneseq =~ m/^(($nonword )(#[0-9]* )*)*(((($begin )($internal )*($end ))|($singleton ))(#[0-9]* )*(($nonword )(#[0-9]* )*)*){$wlen}$/;
+  if(length($4) == 0) {
+    $exit = 1; print "--> ERROR: resulting phone sequence from L_disambig.fst doesn't correspond to the word sequence; check L_disambig.log.fst\n";
+    open(LOG, ">L_disambig.log.fst"); print LOG $wordseq; close(LOG);
+  } else {
+    print "--> resulting phone sequence from L_disambig.fst corresponds to the word sequence\n";
+    print "--> L_disambig.fst is OK\n";
+  }
+  print "\n";
+}
+
+# Check oov -------------------------------
+check_txt_int("$lang/oov", \%wsymtab); print "\n";
+
+
+if ($exit == 1) {exit 1;}
diff --git a/egs/voxforge/online_demo/.gitignore b/egs/voxforge/online_demo/.gitignore
new file mode 100644
index 00000000000..893758f420d
--- /dev/null
+++ b/egs/voxforge/online_demo/.gitignore
@@ -0,0 +1,9 @@
+.gitignore
+online-data
+online-data.tar.bz2
+online-data_original/
+online-data_voxforge.zip
+online-data_voxforge/
+online-data_vystadial.zip
+online-data_vystadial/
+work/
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 00000000000..573171ce2b9
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,115 @@
+*.o
+*.a
+*.fst
+*.scp
+*.tmp
+*.gz
+tmpf
+bin/build-pfile-from-ali
+bin/copy-post
+bin/duplicate-matrix
+bin/extract-ctx
+bin/get-post-on-ali
+bin/latgen-tracking-mapped
+bin/logprob-to-post
+bin/matrix-logprob
+bin/matrix-sum
+bin/pdf-to-counts
+bin/post-to-pdf-post
+bin/post-to-phone-post
+bin/prob-to-post
+featbin/copy-feats-to-htk
+featbin/interpolate-pitch
+featbin/paste-feats
+featbin/process-pitch-feats
+featbin/select-feats
+featbin/subsample-feats
+fstbin/fstpushspecial
+fstbin/fstrhocompose
+fstbin/fsts-to-transcripts
+gmmbin/gmm-adapt-map
+gmmbin/gmm-est-fmllr-raw
+gmmbin/gmm-est-fmllr-raw-gpost
+gmmbin/gmm-latgen-faster-parallel
+gmmbin/gmm-latgen-tracking
+kwsbin/generate-proxy-keywords
+kwsbin/kws-index-union
+kwsbin/kws-search
+kwsbin/lattice-to-kws-index
+kwsbin/transcripts-to-fsts
+latbin/lattice-add-penalty
+latbin/lattice-align-phones
+latbin/lattice-align-words-lexicon
+latbin/lattice-combine
+latbin/lattice-depth
+latbin/lattice-determinize-pruned-parallel
+latbin/lattice-push
+latbin/lattice-rescore-mapped
+latbin/lattice-reverse
+latbin/lattice-to-smbr-post
+nnet-cpubin/nnet-align-compiled
+nnet-cpubin/nnet-am-average
+nnet-cpubin/nnet-am-combine
+nnet-cpubin/nnet-am-compute
+nnet-cpubin/nnet-am-copy
+nnet-cpubin/nnet-am-fix
+nnet-cpubin/nnet-am-info
+nnet-cpubin/nnet-am-init
+nnet-cpubin/nnet-am-limit-rank
+nnet-cpubin/nnet-am-mixup
+nnet-cpubin/nnet-am-rescale
+nnet-cpubin/nnet-am-shrink
+nnet-cpubin/nnet-am-stats
+nnet-cpubin/nnet-combine
+nnet-cpubin/nnet-combine-a
+nnet-cpubin/nnet-combine-fast
+nnet-cpubin/nnet-compute-prob
+nnet-cpubin/nnet-copy-egs
+nnet-cpubin/nnet-get-egs
+nnet-cpubin/nnet-get-preconditioner
+nnet-cpubin/nnet-gradient
+nnet-cpubin/nnet-init
+nnet-cpubin/nnet-insert
+nnet-cpubin/nnet-latgen-faster
+nnet-cpubin/nnet-latgen-faster-parallel
+nnet-cpubin/nnet-logprob
+nnet-cpubin/nnet-logprob-parallel
+nnet-cpubin/nnet-logprob2
+nnet-cpubin/nnet-logprob2-parallel
+nnet-cpubin/nnet-precondition
+nnet-cpubin/nnet-randomize-frames
+nnet-cpubin/nnet-select-egs
+nnet-cpubin/nnet-shrink
+nnet-cpubin/nnet-shuffle-egs
+nnet-cpubin/nnet-subset-egs
+nnet-cpubin/nnet-train
+nnet-cpubin/nnet-train-lbfgs
+nnet-cpubin/nnet-train-parallel
+nnet-cpubin/nnet-train-simple
+nnet-cpubin/nnet-train-transitions
+nnetbin/cmvn-to-nnet
+nnetbin/nnet-concat
+nnetbin/nnet-train-mpe-sequential
+nnetbin/nnet-train-xent-hardlab-frmshuff-prior
+nnetbin/transf-to-nnet
+onlinebin/online-gmm-decode-faster
+onlinebin/online-net-client
+onlinebin/online-server-gmm-decode-faster
+onlinebin/online-wav-gmm-decode-faster
+feat/tmp.test.wav.*
+fstext/push-special-test
+gmm/tmp_stats
+gmm/tmpfb
+gmm/tmpfeats
+nnet-cpu/nnet-precondition-test
+sgmm/estimate-am-sgmm-multi-test
+sgmm/tmpfb
+sgmm2/tmpfb
+thread/kaldi-task-sequence-test
+thread/kaldi-thread-test
+tied/tmpfb
+transform/fmllr-raw-test
+transform/tmp_regtree
+transform/tmp_stats
+transform/tmpfb
+sgmm2bin/sgmm2-latgen-faster-parallel
diff --git a/src/Makefile b/src/Makefile
index e6af5e650c2..ab2d6089e8f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -10,7 +10,11 @@ SUBDIRS = base matrix util feat tree thread gmm tied transform sgmm \
           nnetbin latbin sgmm2 sgmm2bin nnet-cpu nnet-cpubin kwsbin
 
 # Optional subdirectories
+<<<<<<< HEAD
 EXT_SUBDIRS = online onlinebin
+=======
+EXT_SUBDIRS = online onlinebin python-kaldi-decoding
+>>>>>>> master
 
 all: test_install kaldi.mk $(SUBDIRS) 
 	echo Done
@@ -89,5 +93,6 @@ nnet: base util matrix cudamatrix
 nnet-cpu: base util matrix thread
 #3)Dependencies for optional parts of Kaldi
 onlinebin: base matrix util feat tree optimization gmm tied transform sgmm sgmm2 fstext hmm lm decoder lat cudamatrix nnet nnet-cpu online
+python-kaldi-decoding: base matrix util feat tree optimization thread gmm tied transform sgmm sgmm2 fstext hmm decoder lat online
 online: decoder
 kwsbin: fstext lat base util
diff --git a/src/configure b/src/configure
index cd3748767de..13afd9791a7 100755
--- a/src/configure
+++ b/src/configure
@@ -62,13 +62,14 @@ unset MKLLIBDIR
 function usage {
   echo 'Usage: ./configure [--threaded-atlas={yes|no}] [--atlas-root=ATLASROOT] [--fst-root=FSTROOT] 
   [--openblas-root=OPENBLASROOOT] [--clapack-root=CLAPACKROOT] [--mkl-root=MKLROOT] [--mkl-libdir=MKLLIBDIR]
-  [--omp-libdir=OMPDIR]  [--static-math={yes|no}] [--threaded-math={yes|no}] [--mathlib=ATLAS|MKL|CLAPACK|OPENBLAS] 
+  [--omp-libdir=OMPDIR] [--static-fst={yes|no}] [--static-math={yes|no}] [--threaded-math={yes|no}] [--mathlib=ATLAS|MKL|CLAPACK|OPENBLAS] 
   [--use-cuda={yes|no}] [--cudatk-dir=CUDATKDIR]';
 }
 
 threaded_atlas=false #  By default, use the un-threaded version of ATLAS.
 threaded_math=${threaded_atlas}
 static_math=false
+static_fst=false
 use_cuda=true
 
 while [ $# -gt 0 ];
@@ -93,6 +94,10 @@ do
   static_math=true; shift ;;
   --static-math=no)
   static_math=false; shift ;;
+  --static-fst=yes)
+  static_fst=true; shift ;;
+  --static-fst=no)
+  static_fst=false; shift ;;
   --fst-root=*)
   FSTROOT=`read_dirname $1`; shift ;;
   --clapack-root=*)
@@ -531,9 +536,17 @@ if [ "`uname -o`" == "Cygwin"  ]; then
 fi
 
 if [ "`uname`" == "Linux" ]; then
-  if [ ! -f $FSTROOT/lib/libfst.a ]; then
-    failure "Static OpenFST library not found:  See ../tools/INSTALL"
+  if  $static_fst ; then
+      OPENFSTLIBS="$FSTROOT/lib/libfst.a"
+      fst_type='a'
+  else
+      OPENFSTLIBS="-L${FSTROOT}/lib -lfst -Wl,-rpath=${FSTROOT}/lib"
+      fst_type='so'
+  fi
+  if [ ! -f "$FSTROOT/lib/libfst.${fst_type}" ]; then
+    failure "Static=[$static_fst] OpenFST library not found:  See ../tools/INSTALL"
   fi
+  echo OPENFSTLIBS = $OPENFSTLIBS >> kaldi.mk
   echo FSTROOT = $FSTROOT >> kaldi.mk
 
   echo "On Linux: Checking for linear algebra header files ..."
diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk
index a1dce3f3e65..db49336cfb2 100644
--- a/src/makefiles/cygwin.mk
+++ b/src/makefiles/cygwin.mk
@@ -5,6 +5,7 @@ $(error FSTROOT not defined.)
 endif
 
 CXXFLAGS = -msse -msse2 -Wall -I.. -DKALDI_DOUBLEPRECISION=0  \
+	-fPIC \
     -DHAVE_POSIX_MEMALIGN -DHAVE_CLAPACK -I ../../tools/CLAPACK/ \
     -Wno-sign-compare -Winit-self \
     -I ../../tools/CLAPACK/ \
diff --git a/src/makefiles/darwin_10_5.mk b/src/makefiles/darwin_10_5.mk
index 30a392fcfd4..a543b47a042 100644
--- a/src/makefiles/darwin_10_5.mk
+++ b/src/makefiles/darwin_10_5.mk
@@ -5,6 +5,7 @@ $(error FSTROOT not defined.)
 endif
 
 CXXFLAGS = -msse -msse2 -Wall -I.. \
+	  -fPIC \
       -DKALDI_DOUBLEPRECISION=0  \
       -Wno-sign-compare -Winit-self \
       -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \
diff --git a/src/makefiles/darwin_10_6.mk b/src/makefiles/darwin_10_6.mk
index 6fd0d360c28..62802ee6b5b 100644
--- a/src/makefiles/darwin_10_6.mk
+++ b/src/makefiles/darwin_10_6.mk
@@ -5,6 +5,7 @@ $(error FSTROOT not defined.)
 endif
 
 CXXFLAGS = -msse -msse2 -Wall -I.. \
+	  -fPIC \
       -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \
       -Wno-sign-compare -Winit-self \
       -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \
diff --git a/src/makefiles/darwin_10_7.mk b/src/makefiles/darwin_10_7.mk
index 63d762a6758..1138202043e 100644
--- a/src/makefiles/darwin_10_7.mk
+++ b/src/makefiles/darwin_10_7.mk
@@ -5,6 +5,7 @@ $(error FSTROOT not defined.)
 endif
 
 CXXFLAGS = -msse -msse2 -Wall -I.. \
+	  -fPIC \
       -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \
       -Wno-sign-compare -Winit-self \
       -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \
diff --git a/src/makefiles/darwin_10_8.mk b/src/makefiles/darwin_10_8.mk
index 63d762a6758..1138202043e 100644
--- a/src/makefiles/darwin_10_8.mk
+++ b/src/makefiles/darwin_10_8.mk
@@ -5,6 +5,7 @@ $(error FSTROOT not defined.)
 endif
 
 CXXFLAGS = -msse -msse2 -Wall -I.. \
+	  -fPIC \
       -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \
       -Wno-sign-compare -Winit-self \
       -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \
diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk
index 95ff3a886d6..be9bb0b9cfd 100644
--- a/src/makefiles/linux_atlas.mk
+++ b/src/makefiles/linux_atlas.mk
@@ -14,6 +14,7 @@ endif
 
 
 CXXFLAGS = -msse -msse2 -Wall -I.. \
+	  -fPIC \
       -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \
       -Wno-sign-compare -Winit-self \
       -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \
diff --git a/src/makefiles/linux_atlas_64bit.mk b/src/makefiles/linux_atlas_64bit.mk
index b00a1f76cd5..444c917de81 100644
--- a/src/makefiles/linux_atlas_64bit.mk
+++ b/src/makefiles/linux_atlas_64bit.mk
@@ -36,6 +36,7 @@ endif
 
 
 CXXFLAGS = -msse -msse2 -Wall -I.. \
+	  -fPIC \
       -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \
       -Wno-sign-compare -Winit-self \
       -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \
diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk
index 04947a1b9b5..8d826f5a957 100644
--- a/src/makefiles/linux_clapack.mk
+++ b/src/makefiles/linux_clapack.mk
@@ -1,6 +1,7 @@
 # You have to make sure CLAPACKLIBS is set...
 
 CXXFLAGS = -msse -Wall -I.. \
+	  -fPIC \
       -DKALDI_DOUBLEPRECISION=0 -msse2 -DHAVE_POSIX_MEMALIGN \
       -Wno-sign-compare \
       -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \
diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk
index c5a42a65d17..9b799d430fe 100644
--- a/src/makefiles/linux_openblas.mk
+++ b/src/makefiles/linux_openblas.mk
@@ -1,16 +1,18 @@
 # You have to make sure CLAPACKLIBS is set...
 
 CXXFLAGS = -msse -Wall -I.. \
+	  -fPIC \
       -DKALDI_DOUBLEPRECISION=0 -msse2 -DHAVE_POSIX_MEMALIGN \
       -Wno-sign-compare \
       -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \
       -DUSE_KALDI_SVD -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include \
       -I ../../tools/openfst/include \
+      -I $(FSTROOT)/include \
       $(EXTRA_CXXFLAGS) \
       -g # -O0 -DKALDI_PARANOID 
 
 LDFLAGS = -rdynamic
-LDLIBS = $(EXTRA_LDLIBS) ../../tools/openfst/lib/libfst.a -ldl $(OPENBLASLIBS) -lm -lpthread
+LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl 
 CC = g++
 CXX = g++
 AR = ar
diff --git a/src/python-kaldi-decoding/.gitignore b/src/python-kaldi-decoding/.gitignore
new file mode 100644
index 00000000000..988c8be5cd9
--- /dev/null
+++ b/src/python-kaldi-decoding/.gitignore
@@ -0,0 +1,17 @@
+.valgrind
+*.o
+*.so
+*.a
+valgrind.out
+.depend.mk
+data_voip_en
+decode
+mfcc
+utils
+data_voip_en
+compute-mfcc-feats-test
+gmm-latgen-faster-test
+online-wav-gmm-decode-faster-test
+compute-wer-test
+lattice-best-path-test
+*.d
diff --git a/src/python-kaldi-decoding/Makefile b/src/python-kaldi-decoding/Makefile
new file mode 100644
index 00000000000..61591c68b4c
--- /dev/null
+++ b/src/python-kaldi-decoding/Makefile
@@ -0,0 +1,82 @@
+# We suppose that this Makefile sits in kaldi-trunk/src/ThisDirectory
+all:
+
+EXTRA_CXXFLAGS = -Wno-sign-compare -I ../../tools/portaudio/install/include
+EXTRA_LDLIBS =
+
+include ../kaldi.mk
+CC = gcc
+
+TESTFILES = compute-wer-test gmm-latgen-faster-test compute-mfcc-feats-test \
+			lattice-best-path-test online-wav-gmm-decode-faster-test
+OBJFILES = compute-wer.o gmm-latgen-faster.o compute-mfcc-feats.o \
+		   lattice-best-path.o online-wav-gmm-decode-faster.o 
+
+LDLIBFILE = libkaldi-cffi.so
+LIBFILE = kaldi-cffi.a
+
+
+UNAME=$(shell uname)
+ifeq ($(UNAME), Linux)
+    EXTRA_LDLIBS += ../../tools/portaudio/install/lib/libportaudio.a
+ifneq ($(wildcard ../../tools/portaudio/install/include/pa_linux_alsa.h),)
+    EXTRA_LDLIBS += -lasound
+else
+    EXTRA_LDLIBS += -lrt
+endif
+else
+    EXTRA_LDLIBS += -L $(PA_LDD) -lportaudio
+endif
+
+all:  $(LIBFILE) $(LDLIBFILE)
+
+# I can not use ../decoder/kaldi-decoder.a because there are two types of decoders for two types sgmm vs sgmm2
+$(LDLIBFILE): $(OBJFILES) \
+	../decoder/decodable-am-diag-gmm.o ../decoder/lattice-faster-decoder.o ../decoder/faster-decoder.o \
+	../online/kaldi-online.a ../thread/kaldi-thread.a ../lat/kaldi-lat.a ../hmm/kaldi-hmm.a \
+	../transform/kaldi-transform.a ../gmm/kaldi-gmm.a ../fstext/kaldi-fstext.a ../tree/kaldi-tree.a \
+	../matrix/kaldi-matrix.a ../feat/kaldi-feature.a ../util/kaldi-util.a ../base/kaldi-base.a
+	$(CC) -fPIC -shared -o $@ -Wl,-export-dynamic,--whole-archive $^ -Wl,--no-whole-archive $(LDLIBS)
+
+
+$(LIBFILE): $(OBJFILES)
+	$(AR) -cru $(LIBFILE) $(OBJFILES)
+	$(RANLIB) $(LIBFILE)
+
+# Rule below would expand to, e.g.:
+# ../base/kaldi-base.a:
+# 	make -c ../base kaldi-base.a
+# -c option to make is same as changing directory.
+%.a:
+	$(MAKE) -C ${@D} ${@F}
+
+clean:
+		-rm -f *.o *.a tmp* *.tmp *.so .depend.mk $(TESTFILES) valgrind.out
+
+depend:
+	-$(CXX) -M $(CXXFLAGS) *.cc > .depend.mk  
+
+# removing automatic making of "depend" as it's quite slow.
+# .depend.mk: depend
+
+-include .depend.mk
+
+.valgrind: $(TESTFILES)
+
+.PHONY: test_dyn python_test test
+
+### TESTS - launching command --help ### 
+test_dyn: $(LDLIBFILE) $(TESTFILES)
+	# Before running the binaries set the LD_LIBRARY_PATH variable as we do!
+	@result=0; for x in $(TESTFILES); do \
+		echo -n "Running $$x :  "; \
+		LD_LIBRARY_PATH=$(OBLAS_LDD):$(FST_LDD):$:`pwd` ./$$x --help  > /dev/null 2>&1; \
+		if [ $$? -ne 0 ]; then echo "... FAIL"; result=1; else echo "... SUCCESS";  fi;  \
+		done; exit $$result
+
+python_test: run.py $(LDLIBFILE) 
+	LD_LIBRARY_PATH=$(OBLAS_LDD):$(FST_LDD):`pwd` python $<
+	echo $?
+
+# test: test_dyn python_test
+test: test_dyn
diff --git a/src/python-kaldi-decoding/README.md b/src/python-kaldi-decoding/README.md
new file mode 100644
index 00000000000..efdd7d48181
--- /dev/null
+++ b/src/python-kaldi-decoding/README.md
@@ -0,0 +1,65 @@
+Intro
+-----
+The goal of this project is to test
+Kaldi decoding pipeline called from Python
+
+Prerequisities
+--------------
+
+ * Install *cffi*! See the docs
+[http://cffi.readthedocs.org/](http://cffi.readthedocs.org/) for more info.
+ * Build kaldi with `OpenBlAS` support and `-fPIC` flags in `CXXFLAGS` or `EXTRA_CXXFLAGS` in the main Makefile
+ * Before building Kaldi build `OpenBLAS` and openfst by 
+
+ ```sh
+ cd kaldi-trunk/tools
+ make openblas
+ ```
+
+ and 
+
+```sh
+ cd kaldi-trunk/tools
+# replace line in kaldi-trunk/tools/Makefile by following "patch" change line 37!
+# switching from disable-shared -> enable-shared
+*** Makefile 
+************
+*** 34,38 ****
+
+openfst-1.3.2/Makefile: openfst-1.3.2/.patched
+		cd openfst-1.3.2/; \
+!		./configure --prefix=`pwd` --enable-static --disable-shared --enable-far --enable-ngram-fsts
+
+--- 34,38 ----
+
+openfst-1.3.2/Makefile: openfst-1.3.2/.patched
+		cd openfst-1.3.2/; \
+!		./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts
+
+# and build it
+make openfst_tgt
+```
+
+
+Running and building examples
+-----------------------------
+
+In order to build shared libraries and run C test binaries
+```sh
+$make all
+```
+To run `run.py` set up specify where are the shared libraries. E.g. by running from `kaldi-trunk/src/python-kaldi-decoding`
+
+```sh
+LD_LIBRARY_PATH=`pwd`/../../tools/OpenBLAS:`pwd`/../../tools/openfst/lib:`pwd` ./run.py
+```
+
+
+Remarks on linking
+-------
+ * [How to use dlopen](http://www.isotton.com/devel/docs/C++-dlopen-mini-HOWTO/C++-dlopen-mini-HOWTO.html)
+ * [Stackoverflow little off topic explanation](http://stackoverflow.com/questions/12762910/c-undefined-symbols-when-loading-shared-library-with-dlopen)
+ * [http://kaldi.sourceforge.net/matrixwrap.html](See Missing the ATLAS implementation of  CLAPACK)
+ * I spent a lot of time to set right linking. 
+    I was linking `lapack` libraries instead of `lapack_atlas`.
+    I was getting error `undefined symbol: clapack_dgetrf`
diff --git a/src/python-kaldi-decoding/compute-mfcc-feats-test.c b/src/python-kaldi-decoding/compute-mfcc-feats-test.c
new file mode 100644
index 00000000000..54a50409f4e
--- /dev/null
+++ b/src/python-kaldi-decoding/compute-mfcc-feats-test.c
@@ -0,0 +1,5 @@
+#include "test_cffi_python_dyn.h"
+
+int main(int argc, char **argv) {
+    return testSharedLib("libkaldi-cffi.so", "compute_mfcc_feats_like_main", argc, argv);
+}
diff --git a/src/python-kaldi-decoding/compute-mfcc-feats.cc b/src/python-kaldi-decoding/compute-mfcc-feats.cc
new file mode 100644
index 00000000000..e9934e00231
--- /dev/null
+++ b/src/python-kaldi-decoding/compute-mfcc-feats.cc
@@ -0,0 +1,185 @@
+// featbin/compute-mfcc-feats.cc
+
+// Copyright 2009-2012  Microsoft Corporation
+//                      Johns Hopkins University (author: Daniel Povey)
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "feat/feature-mfcc.h"
+#include "feat/wave-reader.h"
+
+#include "compute-mfcc-feats.h"
+
+int compute_mfcc_feats_like_main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    const char *usage =
+        "Create MFCC feature files.\n"
+        "Usage:  compute-mfcc-feats [options...] <wav-rspecifier> <feats-wspecifier>\n";
+
+    // construct all the global objects
+    ParseOptions po(usage);
+    MfccOptions mfcc_opts;
+    bool subtract_mean = false;
+    BaseFloat vtln_warp = 1.0;
+    std::string vtln_map_rspecifier;
+    std::string utt2spk_rspecifier;
+    int32 channel = -1;
+    BaseFloat min_duration = 0.0;
+    // Define defaults for gobal options
+    std::string output_format = "kaldi";
+
+    // Register the MFCC option struct
+    mfcc_opts.Register(&po);
+
+    // Register the options
+    po.Register("output-format", &output_format, "Format of the output files [kaldi, htk]");
+    po.Register("subtract-mean", &subtract_mean, "Subtract mean of each feature file [CMS]; not recommended to do it this way. ");
+    po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable if vtln-map not specified)");
+    po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or speaker-id to vtln warp factor (rspecifier)");
+    po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id map (if doing VTLN and you have warps per speaker)");
+    po.Register("channel", &channel, "Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)");
+    po.Register("min-duration", &min_duration, "Minimum duration of segments to process (in seconds).");
+
+    // OPTION PARSING ..........................................................
+    //
+
+    // parse options (+filling the registered variables)
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string wav_rspecifier = po.GetArg(1);
+
+    std::string output_wspecifier = po.GetArg(2);
+
+    Mfcc mfcc(mfcc_opts);
+
+    SequentialTableReader<WaveHolder> reader(wav_rspecifier);
+    BaseFloatMatrixWriter kaldi_writer;  // typedef to TableWriter<something>.
+    TableWriter<HtkMatrixHolder> htk_writer;
+
+    if (utt2spk_rspecifier != "")
+      KALDI_ASSERT(vtln_map_rspecifier != "" && "the utt2spk option is only "
+                   "needed if the vtln-map option is used.");
+    RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier,
+                                                      utt2spk_rspecifier);
+    
+    if (output_format == "kaldi") {
+      if (!kaldi_writer.Open(output_wspecifier))
+        KALDI_ERR << "Could not initialize output with wspecifier "
+                  << output_wspecifier;
+    } else if (output_format == "htk") {
+      if (!htk_writer.Open(output_wspecifier))
+        KALDI_ERR << "Could not initialize output with wspecifier "
+                  << output_wspecifier;
+    } else {
+      KALDI_ERR << "Invalid output_format string " << output_format;
+    }
+
+    int32 num_utts = 0, num_success = 0;
+    for (; !reader.Done(); reader.Next()) {
+      num_utts++;
+      std::string utt = reader.Key();
+      const WaveData &wave_data = reader.Value();
+      if (wave_data.Duration() < min_duration) {
+        KALDI_WARN << "File: " << utt << " is too short ("
+                   << wave_data.Duration() << " sec): producing no output.";
+        continue;
+      }
+      int32 num_chan = wave_data.Data().NumRows(), this_chan = channel;
+      {  // This block works out the channel (0=left, 1=right...)
+        KALDI_ASSERT(num_chan > 0);  // should have been caught in
+        // reading code if no channels.
+        if (channel == -1) {
+          this_chan = 0;
+          if (num_chan != 1)
+            KALDI_WARN << "Channel not specified but you have data with "
+                       << num_chan  << " channels; defaulting to zero";
+        } else {
+          if (this_chan >= num_chan) {
+            KALDI_WARN << "File with id " << utt << " has "
+                       << num_chan << " channels but you specified channel "
+                       << channel << ", producing no output.";
+            continue;
+          }
+        }
+      }
+      BaseFloat vtln_warp_local;  // Work out VTLN warp factor.
+      if (vtln_map_rspecifier != "") {
+        if (!vtln_map_reader.HasKey(utt)) {
+          KALDI_WARN << "No vtln-map entry for utterance-id (or speaker-id) "
+                     << utt;
+          continue;
+        }
+        vtln_warp_local = vtln_map_reader.Value(utt);
+      } else {
+        vtln_warp_local = vtln_warp;
+      }
+      if (mfcc_opts.frame_opts.samp_freq != wave_data.SampFreq())
+        KALDI_ERR << "Sample frequency mismatch: you specified "
+                  << mfcc_opts.frame_opts.samp_freq << " but data has "
+                  << wave_data.SampFreq() << " (use --sample-frequency option)";
+
+      SubVector<BaseFloat> waveform(wave_data.Data(), this_chan);
+      Matrix<BaseFloat> features;
+      try {
+        mfcc.Compute(waveform, vtln_warp_local, &features, NULL);
+      } catch (...) {
+        KALDI_WARN << "Failed to compute features for utterance "
+                   << utt;
+        continue;
+      }
+      if (subtract_mean) {
+        Vector<BaseFloat> mean(features.NumCols());
+        mean.AddRowSumMat(1.0, features);
+        mean.Scale(1.0 / features.NumRows());
+        for (int32 i = 0; i < features.NumRows(); i++)
+          features.Row(i).AddVec(-1.0, mean);
+      }
+      if (output_format == "kaldi") {
+        kaldi_writer.Write(utt, features);
+      } else {
+        std::pair<Matrix<BaseFloat>, HtkHeader> p;
+        p.first.Resize(features.NumRows(), features.NumCols());
+        p.first.CopyFromMat(features);
+        HtkHeader header = {
+          features.NumRows(),
+          100000,  // 10ms shift
+          sizeof(float)*features.NumCols(),
+          006 | // MFCC
+          (mfcc_opts.use_energy ? 0100 : 020000) // energy; otherwise c0
+        };
+        p.second = header;
+        htk_writer.Write(utt, p);
+      }
+      if (num_utts % 10 == 0)
+        KALDI_LOG << "Processed " << num_utts << " utterances";
+      KALDI_VLOG(2) << "Processed features for key " << utt;
+      num_success++;
+    }
+    KALDI_LOG << " Done " << num_success << " out of " << num_utts
+              << " utterances.";
+    return (num_success != 0 ? 0 : 1);
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
+
diff --git a/src/python-kaldi-decoding/compute-mfcc-feats.h b/src/python-kaldi-decoding/compute-mfcc-feats.h
new file mode 100644
index 00000000000..530bcf82c18
--- /dev/null
+++ b/src/python-kaldi-decoding/compute-mfcc-feats.h
@@ -0,0 +1,16 @@
+// -*- coding: utf-8 -*-
+
+#ifndef COMPUTE_MFCC_FEATS_H
+#define COMPUTE_MFCC_FEATS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int compute_mfcc_feats_like_main(int argc, char **argv);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // #ifndef COMPUTE_MFCC_FEATS_H
diff --git a/src/python-kaldi-decoding/compute-wer-test.c b/src/python-kaldi-decoding/compute-wer-test.c
new file mode 100644
index 00000000000..797b2651fe8
--- /dev/null
+++ b/src/python-kaldi-decoding/compute-wer-test.c
@@ -0,0 +1,5 @@
+#include "test_cffi_python_dyn.h"
+
+int main(int argc, char **argv) {
+    return testSharedLib("libkaldi-cffi.so", "compute_wer_like_main", argc, argv);
+}
diff --git a/src/python-kaldi-decoding/compute-wer.cc b/src/python-kaldi-decoding/compute-wer.cc
new file mode 100644
index 00000000000..76f694fbd9f
--- /dev/null
+++ b/src/python-kaldi-decoding/compute-wer.cc
@@ -0,0 +1,144 @@
+// bin/compute-wer.cc
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "util/parse-options.h"
+#include "tree/context-dep.h"
+#include "util/edit-distance.h"
+
+#include "compute-wer.h"
+
+
+int compute_wer_like_main(int argc, char *argv[]) {
+  using namespace kaldi;
+  typedef kaldi::int32 int32;
+
+  try {
+    const char *usage =
+        "Compute WER by comparing different transcriptions\n"
+        "Takes two transcription files, in kaldi integer format\n"
+        "Usage: compute-wer [options] <ref-rspecifier> <hyp-rspecifier>\n";
+    ParseOptions po(usage);
+
+    std::string mode = "strict";
+    bool text_input = false;  //  if this is true, we expect symbols as strings,
+
+    po.Register("mode", &mode,
+                "Scoring mode: \"present\"|\"all\"|\"strict\":\n"
+                "  \"present\" means score those we have transcriptions for\n"
+                "  \"all\" means treat absent transcriptions as empty\n"
+                "  \"strict\" means die if all in ref not also in hyp");
+    po.Register("text", &text_input, "Expect strings, not integers, as input.");
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string ref_rspecifier = po.GetArg(1);
+    std::string hyp_rspecifier = po.GetArg(2);
+
+    if (mode != "strict"
+       && mode != "present"
+       && mode != "all") {
+      KALDI_ERR << "--mode option invalid: expected \"present\"|\"all\"|\"strict\", got "<<mode;
+    }
+
+
+
+    int32 num_words = 0, word_errs = 0, num_sent = 0, sent_errs = 0,
+        num_ins = 0, num_del = 0, num_sub = 0, num_absent_sents = 0;
+
+    if (!text_input) {
+
+      SequentialInt32VectorReader ref_reader(ref_rspecifier);
+      RandomAccessInt32VectorReader hyp_reader(hyp_rspecifier);
+
+      for (; !ref_reader.Done(); ref_reader.Next()) {
+        std::string key = ref_reader.Key();
+        const std::vector<int32> &ref_sent = ref_reader.Value();
+        std::vector<int32> hyp_sent;
+        if (!hyp_reader.HasKey(key)) {
+          if (mode == "strict")
+            KALDI_ERR << "No hypothesis for key " << key << " and strict "
+                "mode specifier.";
+          num_absent_sents++;
+          if (mode == "present") // do not score this one.
+            continue;
+        } else {
+          hyp_sent = hyp_reader.Value(key);
+        }
+        num_words += ref_sent.size();
+        int32 ins, del, sub;
+        word_errs += LevenshteinEditDistance(ref_sent, hyp_sent, &ins, &del, &sub);
+        num_ins += ins; num_del += del; num_sub += sub;
+
+        num_sent++;
+        sent_errs += (ref_sent != hyp_sent);
+      }
+    } else {
+      SequentialTokenVectorReader ref_reader(ref_rspecifier);
+      RandomAccessTokenVectorReader hyp_reader(hyp_rspecifier);
+
+      for (; !ref_reader.Done(); ref_reader.Next()) {
+        std::string key = ref_reader.Key();
+        const std::vector<std::string> &ref_sent = ref_reader.Value();
+        std::vector<std::string> hyp_sent;
+        if (!hyp_reader.HasKey(key)) {
+          if (mode == "strict")
+            KALDI_ERR << "No hypothesis for key " << key << " and strict "
+                "mode specifier.";
+          num_absent_sents++;
+          if (mode == "present") // do not score this one.
+            continue;
+        } else {
+          hyp_sent = hyp_reader.Value(key);
+        }
+        num_words += ref_sent.size();
+        int32 ins, del, sub;
+        word_errs += LevenshteinEditDistance(ref_sent, hyp_sent, &ins, &del, &sub);
+        num_ins += ins; num_del += del; num_sub += sub;
+
+        num_sent++;
+        sent_errs += (ref_sent != hyp_sent);
+      }
+    }
+
+    BaseFloat percent_wer = 100.0 * static_cast<BaseFloat>(word_errs)
+        / static_cast<BaseFloat>(num_words);
+    std::cout.precision(2);
+    std::cerr.precision(2);
+    std::cout << "%WER "<<std::fixed<<percent_wer<< " [ "<<word_errs<<" / "<<num_words
+              <<", "<<num_ins<<" ins, "<<num_del<<" del, "<<num_sub<<" sub ]"
+              << (num_absent_sents !=  0 ? " [PARTIAL]" : "") << '\n';
+    BaseFloat percent_ser = 100.0 * static_cast<BaseFloat>(sent_errs)
+        / static_cast<BaseFloat>(num_sent);
+    std::cout << "%SER "<<std::fixed<<percent_ser<< " [ "
+              <<sent_errs<<" / "<<num_sent<<" ]\n";
+    std::cout << "Scored " << num_sent << " sentences, "
+              << num_absent_sents << " not present in hyp.\n";
+    return 0;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
+
+
diff --git a/src/python-kaldi-decoding/compute-wer.h b/src/python-kaldi-decoding/compute-wer.h
new file mode 100644
index 00000000000..e4a2464b43b
--- /dev/null
+++ b/src/python-kaldi-decoding/compute-wer.h
@@ -0,0 +1,16 @@
+// -*- coding: utf-8 -*-
+
+#ifndef COMPUTE_WER_H
+#define COMPUTE_WER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int compute_wer_like_main(int argc, char **argv);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // #ifndef COMPUTE_MFCC_WER_H
diff --git a/src/python-kaldi-decoding/gmm-latgen-faster-test.c b/src/python-kaldi-decoding/gmm-latgen-faster-test.c
new file mode 100644
index 00000000000..501bc07fd3c
--- /dev/null
+++ b/src/python-kaldi-decoding/gmm-latgen-faster-test.c
@@ -0,0 +1,5 @@
+#include "test_cffi_python_dyn.h"
+
+int main(int argc, char **argv) {
+    return testSharedLib("libkaldi-cffi.so", "gmm_latgen_faster_like_main", argc, argv);
+}
diff --git a/src/python-kaldi-decoding/gmm-latgen-faster.cc b/src/python-kaldi-decoding/gmm-latgen-faster.cc
new file mode 100644
index 00000000000..50dd391f44d
--- /dev/null
+++ b/src/python-kaldi-decoding/gmm-latgen-faster.cc
@@ -0,0 +1,196 @@
+// gmmbin/gmm-latgen-faster.cc
+
+// Copyright 2009-2012  Microsoft Corporation
+//                      Johns Hopkins University (author: Daniel Povey)
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "gmm/am-diag-gmm.h"
+#include "tree/context-dep.h"
+#include "hmm/transition-model.h"
+#include "fstext/fstext-lib.h"
+#include "decoder/lattice-faster-decoder.h"
+#include "decoder/decodable-am-diag-gmm.h"
+#include "util/timer.h"
+#include "feat/feature-functions.h"  // feature reversal
+
+#include "gmm-latgen-faster.h"
+
+int gmm_latgen_faster_like_main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+    using fst::SymbolTable;
+    using fst::VectorFst;
+    using fst::StdArc;
+
+    const char *usage =
+        "Generate lattices using GMM-based model.\n"
+        "Usage: gmm-latgen-faster [options] model-in (fst-in|fsts-rspecifier) features-rspecifier"
+        " lattice-wspecifier [ words-wspecifier [alignments-wspecifier] ]\n";
+    ParseOptions po(usage);
+    Timer timer;
+    bool allow_partial = false;
+    BaseFloat acoustic_scale = 0.1;
+    LatticeFasterDecoderConfig config;
+    
+    std::string word_syms_filename;
+    config.Register(&po);
+    po.Register("acoustic-scale", &acoustic_scale,
+                "Scaling factor for acoustic likelihoods");
+    po.Register("word-symbol-table", &word_syms_filename,
+                "Symbol table for words [for debug output]");
+    po.Register("allow-partial", &allow_partial,
+                "If true, produce output even if end state was not reached.");
+    
+    po.Read(argc, argv);
+
+    if (po.NumArgs() < 4 || po.NumArgs() > 6) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string model_in_filename = po.GetArg(1),
+        fst_in_str = po.GetArg(2),
+        feature_rspecifier = po.GetArg(3),
+        lattice_wspecifier = po.GetArg(4),
+        words_wspecifier = po.GetOptArg(5),
+        alignment_wspecifier = po.GetOptArg(6);
+    
+    TransitionModel trans_model;
+    AmDiagGmm am_gmm;
+    {
+      bool binary;
+      Input ki(model_in_filename, &binary);
+      trans_model.Read(ki.Stream(), binary);
+      am_gmm.Read(ki.Stream(), binary);
+    }
+
+    bool determinize = config.determinize_lattice;
+    CompactLatticeWriter compact_lattice_writer;
+    LatticeWriter lattice_writer;
+    if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier)
+           : lattice_writer.Open(lattice_wspecifier)))
+      KALDI_ERR << "Could not open table for writing lattices: "
+                 << lattice_wspecifier;
+
+    Int32VectorWriter words_writer(words_wspecifier);
+
+    Int32VectorWriter alignment_writer(alignment_wspecifier);
+
+    fst::SymbolTable *word_syms = NULL;
+    if (word_syms_filename != "") 
+      if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
+        KALDI_ERR << "Could not read symbol table from file "
+                   << word_syms_filename;
+
+    double tot_like = 0.0;
+    kaldi::int64 frame_count = 0;
+    int num_done = 0, num_err = 0;
+
+    if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) {
+      SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
+      // Input FST is just one FST, not a table of FSTs.
+      VectorFst<StdArc> *decode_fst = NULL;
+      {
+        std::ifstream is(fst_in_str.c_str(), std::ifstream::binary);
+        if (!is.good()) KALDI_ERR << "Could not open decoding-graph FST "
+                                   << fst_in_str;
+        decode_fst =
+            VectorFst<StdArc>::Read(is, fst::FstReadOptions(fst_in_str));
+        if (decode_fst == NULL) // fst code will warn.
+          exit(1);
+      }
+
+      {
+        LatticeFasterDecoder decoder(*decode_fst, config);
+    
+        for (; !feature_reader.Done(); feature_reader.Next()) {
+          std::string utt = feature_reader.Key();
+          Matrix<BaseFloat> features (feature_reader.Value());
+          feature_reader.FreeCurrent();
+          if (features.NumRows() == 0) {
+            KALDI_WARN << "Zero-length utterance: " << utt;
+            num_err++;
+            continue;
+          }
+          
+          DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, features,
+                                                 acoustic_scale);
+
+          double like;
+          if (DecodeUtteranceLatticeFaster(
+                  decoder, gmm_decodable, word_syms, utt, acoustic_scale,
+                  determinize, allow_partial, &alignment_writer, &words_writer,
+                  &compact_lattice_writer, &lattice_writer, &like)) {
+            tot_like += like;
+            frame_count += features.NumRows();
+            num_done++;
+          } else num_err++;
+        }
+      }
+      delete decode_fst; // delete this only after decoder goes out of scope.
+    } else { // We have different FSTs for different utterances.
+      SequentialTableReader<fst::VectorFstHolder> fst_reader(fst_in_str);
+      RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);          
+      for (; !fst_reader.Done(); fst_reader.Next()) {
+        std::string utt = fst_reader.Key();
+        if (!feature_reader.HasKey(utt)) {
+          KALDI_WARN << "Not decoding utterance " << utt
+                     << " because no features available.";
+          num_err++;
+          continue;
+        }
+        const Matrix<BaseFloat> &features = feature_reader.Value(utt);
+        if (features.NumRows() == 0) {
+          KALDI_WARN << "Zero-length utterance: " << utt;
+          num_err++;
+          continue;
+        }
+
+        LatticeFasterDecoder decoder(fst_reader.Value(), config);
+        DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, features,
+                                               acoustic_scale);
+        double like;
+        if (DecodeUtteranceLatticeFaster(
+                decoder, gmm_decodable, word_syms, utt, acoustic_scale,
+                determinize, allow_partial, &alignment_writer, &words_writer,
+                &compact_lattice_writer, &lattice_writer, &like)) {
+          tot_like += like;
+          frame_count += features.NumRows();
+          num_done++;
+        } else num_err++;
+      }
+    }
+      
+    double elapsed = timer.Elapsed();
+    KALDI_LOG << "Time taken "<< elapsed
+              << "s: real-time factor assuming 100 frames/sec is "
+              << (elapsed*100.0/frame_count);
+    KALDI_LOG << "Done " << num_done << " utterances, failed for "
+              << num_err;
+    KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like/frame_count) << " over "
+              << frame_count << " frames.";
+
+    if (word_syms) delete word_syms;
+    if (num_done != 0) return 0;
+    else return 1;
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/python-kaldi-decoding/gmm-latgen-faster.h b/src/python-kaldi-decoding/gmm-latgen-faster.h
new file mode 100644
index 00000000000..c5cd487275e
--- /dev/null
+++ b/src/python-kaldi-decoding/gmm-latgen-faster.h
@@ -0,0 +1,16 @@
+// -*- coding: utf-8 -*-
+
+#ifndef GMM_LATGEN_FASTER_H
+#define GMM_LATGEN_FASTER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int gmm_latgen_faster_like_main(int argc, char **argv);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // #ifndef GMM_LATGEN_FASTER_H
diff --git a/src/python-kaldi-decoding/lattice-best-path-test.c b/src/python-kaldi-decoding/lattice-best-path-test.c
new file mode 100644
index 00000000000..656052adc8e
--- /dev/null
+++ b/src/python-kaldi-decoding/lattice-best-path-test.c
@@ -0,0 +1,5 @@
+#include "test_cffi_python_dyn.h"
+
+int main(int argc, char **argv) {
+    return testSharedLib("libkaldi-cffi.so", "lattice_best_path_like_main", argc, argv);
+}
diff --git a/src/python-kaldi-decoding/lattice-best-path.cc b/src/python-kaldi-decoding/lattice-best-path.cc
new file mode 100644
index 00000000000..67b293f87fa
--- /dev/null
+++ b/src/python-kaldi-decoding/lattice-best-path.cc
@@ -0,0 +1,136 @@
+// latbin/lattice-best-path.cc
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "fstext/fstext-lib.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
+
+#include "lattice-best-path.h"
+
+int lattice_best_path_like_main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+    using fst::SymbolTable;
+    using fst::VectorFst;
+    using fst::StdArc;
+
+    const char *usage =
+        "Generate 1-best path through lattices; output as transcriptions and alignments\n"
+        "Note: if you want output as FSTs, use lattice-1best; if you want output\n"
+        "with acoustic and LM scores, use lattice-1best | nbest-to-linear\n"
+        "Usage: lattice-best-path [options]  lattice-rspecifier [ transcriptions-wspecifier [ alignments-wspecifier] ]\n"
+        " e.g.: lattice-best-path --acoustic-scale=0.1 ark:1.lats ark:1.tra ark:1.ali\n";
+      
+    ParseOptions po(usage);
+    BaseFloat acoustic_scale = 1.0;
+    BaseFloat lm_scale = 1.0;
+
+    std::string word_syms_filename;
+    po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
+    po.Register("lm-scale", &lm_scale, "Scaling factor for LM probabilities. "
+                "Note: the ratio acoustic-scale/lm-scale is all that matters.");
+    po.Register("word-symbol-table", &word_syms_filename, "Symbol table for words [for debug output]");
+    
+    po.Read(argc, argv);
+
+    if (po.NumArgs() < 1 || po.NumArgs() > 3) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string lats_rspecifier = po.GetArg(1),
+        transcriptions_wspecifier = po.GetOptArg(2),
+        alignments_wspecifier = po.GetOptArg(3);
+
+    SequentialCompactLatticeReader clat_reader(lats_rspecifier);
+    
+    Int32VectorWriter transcriptions_writer(transcriptions_wspecifier);
+
+    Int32VectorWriter alignments_writer(alignments_wspecifier);
+
+    fst::SymbolTable *word_syms = NULL;
+    if (word_syms_filename != "") 
+      if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
+        KALDI_ERR << "Could not read symbol table from file "
+                   << word_syms_filename;
+
+
+    int32 n_done = 0, n_fail = 0;
+    int64 n_frame = 0;
+    LatticeWeight tot_weight = LatticeWeight::One();
+    
+    for (; !clat_reader.Done(); clat_reader.Next()) {
+      std::string key = clat_reader.Key();
+      CompactLattice clat = clat_reader.Value();
+      clat_reader.FreeCurrent();
+      fst::ScaleLattice(fst::LatticeScale(lm_scale, acoustic_scale), &clat);
+      CompactLattice clat_best_path;
+      CompactLatticeShortestPath(clat, &clat_best_path);  // A specialized
+      // implementation of shortest-path for CompactLattice.
+      Lattice best_path;
+      ConvertLattice(clat_best_path, &best_path);
+      if (best_path.Start() == fst::kNoStateId) {
+        KALDI_WARN << "Best-path failed for key " << key;
+        n_fail++;
+      } else {
+        std::vector<int32> alignment;
+        std::vector<int32> words;
+        LatticeWeight weight;
+        GetLinearSymbolSequence(best_path, &alignment, &words, &weight);
+        KALDI_LOG << "For utterance " << key << ", best cost "
+                  << weight.Value1() << " + " << weight.Value2() << " = "
+                  << (weight.Value1() + weight.Value2());
+        if (transcriptions_wspecifier != "")
+          transcriptions_writer.Write(key, words);
+        if (alignments_wspecifier != "")
+          alignments_writer.Write(key, alignment);
+        if (word_syms != NULL) {
+          std::cerr << key << ' ';
+          for (size_t i = 0; i < words.size(); i++) {
+            std::string s = word_syms->Find(words[i]);
+            if (s == "")
+              KALDI_ERR << "Word-id " << words[i] <<" not in symbol table.";
+            std::cerr << s << ' ';
+          }
+          std::cerr << '\n';
+        }
+        n_done++;
+        n_frame += alignment.size();
+        tot_weight = Times(tot_weight, weight);
+      }
+    }
+
+    BaseFloat tot_weight_float = tot_weight.Value1() + tot_weight.Value2();
+    KALDI_LOG << "Overall score per frame is " << (tot_weight_float/n_frame)
+              << " = " << (tot_weight.Value1()/n_frame) << " [graph]"
+              << " + " << (tot_weight.Value2()/n_frame) << " [acoustic]"
+              << " over " << n_frame << " frames.";
+    KALDI_LOG << "Done " << n_done << " lattices, failed for " << n_fail;
+    
+    if (word_syms) delete word_syms;
+    if (n_done != 0) return 0;
+    else return 1;
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/python-kaldi-decoding/lattice-best-path.h b/src/python-kaldi-decoding/lattice-best-path.h
new file mode 100644
index 00000000000..d810489ff11
--- /dev/null
+++ b/src/python-kaldi-decoding/lattice-best-path.h
@@ -0,0 +1,16 @@
+// -*- coding: utf-8 -*-
+
+#ifndef LATTICE_BEST_PATH_H
+#define LATTICE_BEST_PATH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int lattice_best_path_like_main(int argc, char **argv);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // #ifndef LATTICE_BEST_PATH_H
diff --git a/src/python-kaldi-decoding/little_wavs_data_void_en.scp b/src/python-kaldi-decoding/little_wavs_data_void_en.scp
new file mode 100644
index 00000000000..0a4368fd499
--- /dev/null
+++ b/src/python-kaldi-decoding/little_wavs_data_void_en.scp
@@ -0,0 +1,4 @@
+fj228x-001-100517_171607_0001813_0001882.wav ./data_voip_en/test/fj228x-001-100517_171607_0001813_0001882.wav
+fj228x-001-100517_182933_0001674_0002000.wav ./data_voip_en/test/fj228x-001-100517_182933_0001674_0002000.wav
+fj228x-001-100517_183334_0001637_0001921.wav ./data_voip_en/test/fj228x-001-100517_183334_0001637_0001921.wav
+fj228x-001-100517_200151_0002054_0002337.wav ./data_voip_en/test/fj228x-001-100517_200151_0002054_0002337.wav
diff --git a/src/python-kaldi-decoding/online-wav-gmm-decode-faster-test.c b/src/python-kaldi-decoding/online-wav-gmm-decode-faster-test.c
new file mode 100644
index 00000000000..ac035502c03
--- /dev/null
+++ b/src/python-kaldi-decoding/online-wav-gmm-decode-faster-test.c
@@ -0,0 +1,5 @@
+#include "test_cffi_python_dyn.h"
+
+int main(int argc, char **argv) {
+    return testSharedLib("libkaldi-cffi.so", "online_wav_gmm_decode_faster_like_main", argc, argv);
+}
diff --git a/src/python-kaldi-decoding/online-wav-gmm-decode-faster.cc b/src/python-kaldi-decoding/online-wav-gmm-decode-faster.cc
new file mode 100644
index 00000000000..de3261fb8d6
--- /dev/null
+++ b/src/python-kaldi-decoding/online-wav-gmm-decode-faster.cc
@@ -0,0 +1,247 @@
+// onlinebin/online-wav-gmm-decode-faster.cc
+
+// Copyright 2012 Cisco Systems (author: Matthias Paulik)
+
+//   Modifications to the original contribution by Cisco Systems made by:
+//   Vassil Panayotov
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "feat/feature-mfcc.h"
+#include "feat/wave-reader.h"
+#include "online/online-audio-source.h"
+#include "online/online-feat-input.h"
+#include "online/online-decodable.h"
+#include "online/online-faster-decoder.h"
+#include "online/onlinebin-util.h"
+
+#include "online-wav-gmm-decode-faster.h"
+
+int online_wav_gmm_decode_faster_like_main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace fst;
+
+    typedef kaldi::int32 int32;
+    typedef OnlineFeInput<OnlineVectorSource, Mfcc> FeInput;
+
+    // up to delta-delta derivative features are calculated (unless LDA is used)
+    const int32 kDeltaOrder = 2;
+
+    const char *usage =
+        "Reads in wav file(s) and simulates online decoding.\n"
+        "Writes .tra and .ali files for WER computation. Utterance "
+        "segmentation is done on-the-fly.\n"
+        "Feature splicing/LDA transform is used, if the optional(last) argument "
+        "is given.\n"
+        "Otherwise delta/delta-delta(i.e. 2-nd order) features are produced.\n"
+        "Caution: the last few frames of the wav file may not be decoded properly.\n"
+        "Hence, don't use one wav file per utterance, but "
+        "rather use one wav file per show.\n\n"
+        "Usage: ./online-wav-gmm-decode-faster [options] wav-rspecifier model-in"
+        "fst-in word-symbol-table silence-phones transcript-wspecifier "
+        "alignments-wspecifier [lda-matrix-in]\n\n"
+        "Example: ./online-wav-gmm-decode-faster --rt-min=0.3 --rt-max=0.5 "
+        "--max-active=4000 --beam=12.0 --acoustic-scale=0.0769 "
+        "scp:wav.scp model HCLG.fst words.txt '1:2:3:4:5' ark,t:trans.txt ark,t:ali.txt";
+    ParseOptions po(usage);
+    BaseFloat acoustic_scale = 0.1;
+    int32 cmn_window = 600,
+      min_cmn_window = 100; // adds 1 second latency, only at utterance start.
+    int32 channel = -1;
+    int32 right_context = 4, left_context = 4;
+
+    OnlineFasterDecoderOpts decoder_opts;
+    decoder_opts.Register(&po, true);
+    OnlineFeatureMatrixOptions feature_reading_opts;
+    feature_reading_opts.Register(&po);
+    
+    po.Register("left-context", &left_context, "Number of frames of left context");
+    po.Register("right-context", &right_context, "Number of frames of right context");
+    po.Register("acoustic-scale", &acoustic_scale,
+                "Scaling factor for acoustic likelihoods");
+    po.Register("cmn-window", &cmn_window,
+        "Number of feat. vectors used in the running average CMN calculation");
+    po.Register("min-cmn-window", &min_cmn_window,
+                "Minumum CMN window used at start of decoding (adds "
+                "latency only at start)");
+    po.Register("channel", &channel,
+        "Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)");
+    po.Read(argc, argv);
+    if (po.NumArgs() != 7 && po.NumArgs() != 8) {
+      po.PrintUsage();
+      return 1;
+    }
+    if (po.NumArgs() == 7)
+      if (left_context % kDeltaOrder != 0 || left_context != right_context)
+        KALDI_ERR << "Invalid left/right context parameters!";
+
+    std::string wav_rspecifier = po.GetArg(1),
+        model_rspecifier = po.GetArg(2),
+        fst_rspecifier = po.GetArg(3),
+        word_syms_filename = po.GetArg(4),
+        silence_phones_str = po.GetArg(5),
+        words_wspecifier = po.GetArg(6),
+        alignment_wspecifier = po.GetArg(7),
+        lda_mat_rspecifier = po.GetOptArg(8);
+
+    std::vector<int32> silence_phones;
+    if (!SplitStringToIntegers(silence_phones_str, ":", false, &silence_phones))
+        KALDI_ERR << "Invalid silence-phones string " << silence_phones_str;
+    if (silence_phones.empty())
+        KALDI_ERR << "No silence phones given!";
+
+    Int32VectorWriter words_writer(words_wspecifier);
+    Int32VectorWriter alignment_writer(alignment_wspecifier);
+
+    Matrix<BaseFloat> lda_transform;
+    if (lda_mat_rspecifier != "") {
+      bool binary_in;
+      Input ki(lda_mat_rspecifier, &binary_in);
+      lda_transform.Read(ki.Stream(), binary_in);
+    }
+
+    TransitionModel trans_model;
+    AmDiagGmm am_gmm;
+    {
+        bool binary;
+        Input ki(model_rspecifier, &binary);
+        trans_model.Read(ki.Stream(), binary);
+        am_gmm.Read(ki.Stream(), binary);
+    }
+
+    fst::SymbolTable *word_syms = NULL;
+    if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
+        KALDI_ERR << "Could not read symbol table from file "
+                    << word_syms_filename;
+
+    fst::Fst<fst::StdArc> *decode_fst = ReadDecodeGraph(fst_rspecifier);
+
+    // We are not properly registering/exposing MFCC and frame extraction options,
+    // because there are parts of the online decoding code, where some of these
+    // options are hardwired(ToDo: we should fix this at some point)
+    MfccOptions mfcc_opts;
+    mfcc_opts.use_energy = false;
+    int32 frame_length = mfcc_opts.frame_opts.frame_length_ms = 25;
+    int32 frame_shift = mfcc_opts.frame_opts.frame_shift_ms = 10;
+
+    int32 window_size = right_context + left_context + 1;
+    decoder_opts.batch_size = std::max(decoder_opts.batch_size, window_size);
+
+    OnlineFasterDecoder decoder(*decode_fst, decoder_opts,
+                                silence_phones, trans_model);
+    SequentialTableReader<WaveHolder> reader(wav_rspecifier);
+    VectorFst<LatticeArc> out_fst;
+    for (; !reader.Done(); reader.Next()) {
+      std::string wav_key = reader.Key();
+      std::cerr << "File: " << wav_key << std::endl;
+      const WaveData &wav_data = reader.Value();
+      if(wav_data.SampFreq() != 16000)
+        KALDI_ERR << "Sampling rates other than 16kHz are not supported!";
+      int32 num_chan = wav_data.Data().NumRows(), this_chan = channel;
+      {  // This block works out the channel (0=left, 1=right...)
+        KALDI_ASSERT(num_chan > 0);  // should have been caught in
+        // reading code if no channels.
+        if (channel == -1) {
+          this_chan = 0;
+          if (num_chan != 1)
+            KALDI_WARN << "Channel not specified but you have data with "
+                       << num_chan  << " channels; defaulting to zero";
+        } else {
+          if (this_chan >= num_chan) {
+            KALDI_WARN << "File with id " << wav_key << " has "
+                       << num_chan << " channels but you specified channel "
+                       << channel << ", producing no output.";
+            continue;
+          }
+        }
+      }
+      OnlineVectorSource au_src(wav_data.Data().Row(this_chan));
+      Mfcc mfcc(mfcc_opts);
+      FeInput fe_input(&au_src, &mfcc,
+                       frame_length*(wav_data.SampFreq()/1000),
+                       frame_shift*(wav_data.SampFreq()/1000));
+      OnlineCmnInput cmn_input(&fe_input, cmn_window, min_cmn_window);
+      OnlineFeatInputItf *feat_transform = 0;
+      if (lda_mat_rspecifier != "") {
+        feat_transform = new OnlineLdaInput(
+            &cmn_input, lda_transform,
+            left_context, right_context);
+      } else {
+        DeltaFeaturesOptions opts;
+        opts.order = kDeltaOrder;
+        // Note from Dan: keeping the next statement for back-compatibility,
+        // but I don't think this is really the right way to set the window-size
+        // in the delta computation: it should be a separate config.
+        opts.window = left_context / 2;
+        feat_transform = new OnlineDeltaInput(opts, &cmn_input);
+      }
+
+      // feature_reading_opts contains timeout, batch size.
+      OnlineFeatureMatrix feature_matrix(feature_reading_opts,
+                                         feat_transform);
+
+      OnlineDecodableDiagGmmScaled decodable(am_gmm, trans_model, acoustic_scale,
+                                             &feature_matrix);
+      int32 start_frame = 0;
+      bool partial_res = false;
+      while (1) {
+        OnlineFasterDecoder::DecodeState dstate = decoder.Decode(&decodable);
+        if (dstate & (decoder.kEndFeats | decoder.kEndUtt)) {
+          std::vector<int32> word_ids;
+          decoder.FinishTraceBack(&out_fst);
+          fst::GetLinearSymbolSequence(out_fst,
+                                       static_cast<vector<int32> *>(0),
+                                       &word_ids,
+                                       static_cast<LatticeArc::Weight*>(0));
+          PrintPartialResult(word_ids, word_syms, partial_res || word_ids.size());
+          partial_res = false;
+
+          decoder.GetBestPath(&out_fst);
+          std::vector<int32> tids;
+          fst::GetLinearSymbolSequence(out_fst,
+                                       &tids,
+                                       &word_ids,
+                                       static_cast<LatticeArc::Weight*>(0));
+          std::stringstream res_key;
+          res_key << wav_key << '_' << start_frame << '-' << decoder.frame();
+          if (!word_ids.empty())
+            words_writer.Write(res_key.str(), word_ids);
+          alignment_writer.Write(res_key.str(), tids);
+          if (dstate == decoder.kEndFeats)
+            break;
+          start_frame = decoder.frame();
+        } else {
+          std::vector<int32> word_ids;
+          if (decoder.PartialTraceback(&out_fst)) {
+            fst::GetLinearSymbolSequence(out_fst,
+                                        static_cast<vector<int32> *>(0),
+                                        &word_ids,
+                                        static_cast<LatticeArc::Weight*>(0));
+            PrintPartialResult(word_ids, word_syms, false);
+            if (!partial_res)
+              partial_res = (word_ids.size() > 0);
+          }
+        }
+      }
+      if (feat_transform) delete feat_transform;
+    }
+    if (word_syms) delete word_syms;
+    if (decode_fst) delete decode_fst;
+    return 0;
+  } catch(const std::exception& e) {
+    std::cerr << e.what();
+    return -1;
+  }
+} // main()
diff --git a/src/python-kaldi-decoding/online-wav-gmm-decode-faster.h b/src/python-kaldi-decoding/online-wav-gmm-decode-faster.h
new file mode 100644
index 00000000000..334bbdb8cd3
--- /dev/null
+++ b/src/python-kaldi-decoding/online-wav-gmm-decode-faster.h
@@ -0,0 +1,17 @@
+
+// -*- coding: utf-8 -*-
+
+#ifndef ONLINE_WAV_GMM_DECODE_FASTER_H
+#define ONLINE_WAV_GMM_DECODE_FASTER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int online_wav_gmm_decode_faster_like_main(int argc, char *argv[]) ;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // #ifndef ONLINE_WAV_GMM_DECODE_FASTER_H
diff --git a/src/python-kaldi-decoding/ordereddefaultdict.py b/src/python-kaldi-decoding/ordereddefaultdict.py
new file mode 100644
index 00000000000..085836792ae
--- /dev/null
+++ b/src/python-kaldi-decoding/ordereddefaultdict.py
@@ -0,0 +1,46 @@
+"""
+Combine functionality from ordered and default dict.
+The implementation is taken from:
+http://stackoverflow.com/questions/6190331/can-i-do-an-ordered-default-dict-in-python
+"""
+from collections import OrderedDict, Callable
+
+
+class DefaultOrderedDict(OrderedDict):
+    def __init__(self, default_factory=None, *a, **kw):
+        if (default_factory is not None and not isinstance(default_factory, Callable)):
+            raise TypeError('first argument must be callable')
+        OrderedDict.__init__(self, *a, **kw)
+        self.default_factory = default_factory
+
+    def __getitem__(self, key):
+        try:
+            return OrderedDict.__getitem__(self, key)
+        except KeyError:
+            return self.__missing__(key)
+
+    def __missing__(self, key):
+        if self.default_factory is None:
+            raise KeyError(key)
+        self[key] = value = self.default_factory()
+        return value
+
+    def __reduce__(self):
+        if self.default_factory is None:
+            args = tuple()
+        else:
+            args = self.default_factory,
+        return type(self), args, None, None, self.items()
+
+    def copy(self):
+        return self.__copy__()
+
+    def __copy__(self):
+        return type(self)(self.default_factory, self)
+
+    def __deepcopy__(self, memo):
+        import copy
+        return type(self)(self.default_factory, copy.deepcopy(self.items()))
+
+    def __repr__(self):
+        return 'OrderedDefaultDict(%s, %s)' % (self.default_factory, OrderedDict.__repr__(self))
diff --git a/src/python-kaldi-decoding/run.py b/src/python-kaldi-decoding/run.py
new file mode 100755
index 00000000000..e49dceb15ac
--- /dev/null
+++ b/src/python-kaldi-decoding/run.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python
+from cffi import FFI
+from collections import namedtuple
+# import sys
+import os
+import errno
+from ordereddefaultdict import DefaultOrderedDict
+from subprocess import check_output
+
+cwd = os.path.abspath(os.path.curdir)
+
+MfccParams = namedtuple(
+    'MfccParams', ['mfcc_dir', 'mfcc_config', 'wav_scp', 'mfcc_ark', 'mfcc_scp'])
+LatgenParams = namedtuple(
+    'LatgenParams', ['decode_dir', 'max_active', 'beam', 'latbeam', 'acoustic_scale', 'wst', 'model',
+                     'hclg', 'utt2spk', 'cmvn_scp', 'feats_scp', 'lattice_arch'])
+BestPathParams = namedtuple('BestPathParams', ['lm_scale', 'wst', 'lattice_arch', 'trans'])
+WerParams = namedtuple('WerParams', ['reference', 'hypothesis'])
+OnlineParams = namedtuple(
+    'OnlineParams', ['decode_dir', 'rt_min', 'rt_max', 'max_active', 'beam', 'acoustic_scale',
+                     'wav_scp', 'wst', 'model', 'hclg', 'trans', 'align'])
+
+
+class CffiKaldiError(Exception):
+    def __init__(self, retcode):
+        self.retcode = retcode
+
+    def __str__(self):
+        return 'CffiKaldi with return code: %s' % repr(self.retcode)
+
+
+def mymkdir(path):
+    try:
+        os.makedirs(path)
+    except OSError as exception:
+        if exception.errno != errno.EEXIST:
+            raise
+
+
+def run_mfcc(ffi, mfcclib, mfccPar):
+    '''Settings and arguments based on /ha/work/people/oplatek/kaldi-trunk/egs/kaldi-
+    vystadial-recipe/s5/steps/make_mfcc.sh'''
+    mymkdir(mfccPar.mfcc_dir)
+    mfcc_args = ['mfcc_unused', '--verbose=2',
+                 '--config=%s' % mfccPar.mfcc_config,
+                 'scp:%s' % mfccPar.wav_scp,
+                 'ark,scp:%(mfcc_ark)s,%(mfcc_scp)s' % mfccPar.__dict__]
+
+    try:
+        mfcc_argkeepalive = [ffi.new("char[]", arg) for arg in mfcc_args]
+        mfcc_argv = ffi.new("char *[]", mfcc_argkeepalive)
+        retcode = mfcclib.compute_mfcc_feats_like_main(
+            len(mfcc_args), mfcc_argv)
+        if retcode != 0:
+            raise CffiKaldiError(retcode)
+        return mfccPar.mfcc_scp
+    except Exception as e:
+        print 'Failed running mfcc!'
+        print e
+        raise
+
+
+def run_decode(ffi, decodelib, latgenPar):
+    '''Settings and arguments based on /ha/work/people/oplatek/kaldi-trunk/egs/kaldi-
+    vystadial-recipe/s5/steps/decode.sh'''
+    mymkdir(latgenPar.decode_dir)
+    # feats for delta not lda
+    decode_args = ['decode_unused', '--max-active=%s' % latgenPar.max_active,
+                   '--beam=%s' % latgenPar.beam,
+                   '--lattice-beam=%s' % latgenPar.latbeam,
+                   '--acoustic-scale=%s' % latgenPar.acoustic_scale,
+                   '--allow-partial=true',
+                   '--word-symbol-table=%s' % latgenPar.wst,
+                   latgenPar.model,
+                   latgenPar.hclg,
+                   'ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:%(utt2spk)s scp:%(cmvn_scp)s scp:%(feats_scp)s ark:- | add-deltas ark:- ark:- |' % latgenPar.__dict__,
+                   'ark:|gzip -c > %s' % latgenPar.lattice_arch]
+
+    try:
+        decode_argkeepalive = [ffi.new("char[]", arg) for arg in decode_args]
+        decode_argv = ffi.new("char *[]", decode_argkeepalive)
+        retcode = decodelib.gmm_latgen_faster_like_main(
+            len(decode_args), decode_argv)
+        if retcode != 0:
+            raise CffiKaldiError(retcode)
+        print 'Running decode finished!'
+        return latgenPar.lattice_arch
+    except Exception as e:
+        print 'Failed running decode!'
+        print e
+        raise
+
+
+def run_bestpath(ffi, bestpathlib, bpPar):
+    ''' Settings and arguments based on /ha/work/people/oplatek/kaldi-trunk/egs/kaldi-
+    vystadial-recipe/s5/local/shore.sh'''
+    bestpath_args = ['bestpath_unsed', '--lm-scale=%s' % bpPar.lm_scale,
+                     '--word-symbol-table=%s' % bpPar.wst,
+                     'ark:gunzip -c %s|' % bpPar.lattice_arch,
+                     'ark,t:%s' % bpPar.trans]
+    try:
+        bestpath_argkeepalive = [ffi.new("char[]", arg)
+                                 for arg in bestpath_args]
+        bestpath_argv = ffi.new("char *[]", bestpath_argkeepalive)
+        retcode = bestpathlib.lattice_best_path_like_main(
+            len(bestpath_args), bestpath_argv)
+        if retcode != 0:
+            raise CffiKaldiError(retcode)
+        return bpPar.trans
+    except Exception as e:
+        print 'Failed running bestpath!'
+        print e
+        raise
+
+
+def computeWer(ffi, werlib, werPar):
+    '''Settings and arguments based on /ha/work/people/oplatek/kaldi-trunk/egs/kaldi-
+    vystadial-recipe/s5/local/shore.sh
+    | compute-wer --text --mode=present ark:exp/tri2a/decode/scoring/test_filt.txt ark,p:- >&
+    exp/tri2a/decode/wer_15'''
+
+    wer_args = ['wer_unused', '--text',
+                '--mode=present',
+                'ark:%s' % werPar.reference,
+                'ark:%s' % werPar.hypothesis]
+    try:
+        wer_argkeepalive = [ffi.new("char[]", arg) for arg in wer_args]
+        wer_argv = ffi.new("char *[]", wer_argkeepalive)
+        retcode = werlib.compute_wer_like_main(len(wer_args), wer_argv)
+        if retcode != 0:
+            raise CffiKaldiError(retcode)
+    except Exception as e:
+        print 'Failed running compute_wer!'
+        print e
+        raise
+
+
+def buildReference(wav_scp, ref_path):
+    with open(ref_path, 'w') as w:
+        with open(wav_scp, 'r') as scp:
+            for line in scp:
+                name, wavpath = line.strip().split(' ', 1)
+                with open(wavpath + '.trn') as trn:
+                    trans = trn.read()
+                    w.write('%s %s\n' % (name, trans))
+
+
+def int2txt(trans_path, trans_path_txt, wst, sym_OOV='\<UNK\>'):
+    ''' based on:  cat exp/tri2a/decode/scoring/15.tra | utils/int2sym.pl -f 2-
+     exp/tri2a/graph/words.txt | sed s:\<UNK\>::g'''
+    with open(trans_path, 'rb') as r:
+        with open(trans_path_txt, 'wb') as w:
+            out = check_output(['utils/int2sym.pl', '-f', '2-', wst], stdin=r)
+            noUNK = out.replace(sym_OOV, '')
+            w.write(noUNK)
+
+
+def run_online(ffi, onlinelib, onlinePar):
+    ''' Based on kaldi-trunk/egs/voxforge/online_demo/run.sh'''
+    mymkdir(onlinePar.decode_dir)
+    online_args = ['online_unused',
+                   '--verbose=1',
+                   '--rt-min=%s' % onlinePar.rt_min,
+                   '--rt-max=%s' % onlinePar.rt_max,
+                   '--max-active=%s' % onlinePar.max_active,
+                   '--beam=%s' % onlinePar.beam,
+                   '--acoustic_scale=%s' % onlinePar.acoustic_scale,
+                   'scp:%s' % onlinePar.wav_scp,
+                   onlinePar.model, onlinePar.hclg,
+                   onlinePar.wst, '1:2:3:4:5',
+                   'ark,t:%s' % onlinePar.trans,
+                   'ark,t:%s' % onlinePar.align]
+    try:
+        online_argkeepalive = [ffi.new("char[]", arg) for arg in online_args]
+        online_argv = ffi.new("char *[]", online_argkeepalive)
+        retcode = onlinelib.online_wav_gmm_decode_faster_like_main(
+            len(online_args), online_argv)
+        if retcode != 0:
+            raise CffiKaldiError(retcode)
+        return onlinePar.trans
+    except Exception as e:
+        print 'Failed running online!'
+        print e
+        raise
+
+
+def compactHyp(hyp_path, comp_hyp_path):
+    d = DefaultOrderedDict(list)
+    with open(hyp_path, 'r') as hyp:
+        for line in hyp:
+            name_, align_dec = line.strip().split('wav_')
+            name, dec = name_ + 'wav', align_dec.strip().split()[1:]
+            d[name].extend(dec)
+    with open(comp_hyp_path, 'w') as w:
+        for wav, dec_list in d.iteritems():
+            w.write('%s %s\n' % (wav, ' '.join(dec_list)))
+
+if __name__ == '__main__':
+    ffi = FFI()
+
+    # FIXME check if preprocessor directives works in cffi
+    # with open('../base/kaldi-types.h', 'r') as r:
+    #     int_header = r.read()
+    #     ffi.cdef(int_header)
+
+    header = '''
+    int compute_mfcc_feats_like_main(int argc, char **argv);
+    int gmm_latgen_faster_like_main(int argc, char **argv);
+    int lattice_best_path_like_main(int argc, char **argv);
+    int compute_wer_like_main(int argc, char **argv);
+    int online_wav_gmm_decode_faster_like_main(int argc, char *argv[]);
+    '''
+    ffi.cdef(header)
+    s5_dir = '../../egs/kaldi-vystadial-recipe/s5'
+    exp_dir = s5_dir + '/Results/exp_6_aa7263b3f5c151409a87e3d845d58e39335a4f0c'
+    data_dir = s5_dir + '/Results/data_6_aa7263b3f5c151409a87e3d845d58e39335a4f0c'
+    decodedir = cwd + '/decode'
+    try:
+        lib = ffi.dlopen('libcffi-kaldi.so')
+
+        mfccPar = MfccParams(
+            mfcc_dir='mfcc',
+            mfcc_config=s5_dir + '/conf/mfcc.conf',
+            wav_scp='little_wavs_data_void_en.scp',
+            mfcc_ark='mfcc/raw_mfcc.ark',
+            mfcc_scp='mfcc/raw_mfcc.scp')
+        run_mfcc(ffi, lib, mfccPar)
+        print 'running mfcc finished'
+
+        latgenPar = LatgenParams(
+            decode_dir=decodedir,
+            max_active='7000',
+            beam='13.0',
+            latbeam='6.0',
+            acoustic_scale='0.083333',
+            wst=exp_dir + '/tri2a/graph/words.txt',
+            model=exp_dir + '/tri2a/final.mdl',
+            hclg=exp_dir + '/tri2a/graph/HCLG.fst',
+            utt2spk=data_dir + '/test/utt2spk',
+            # TODO create the version of mfcc dir and change paths in cmvn!
+            cmvn_scp=data_dir + '/test/cmvn.scp',
+            feats_scp=mfccPar.mfcc_scp,
+            lattice_arch=decodedir + '/lat.gz')
+        run_decode(ffi, lib, latgenPar)
+        print 'running mfcc finished'
+
+        bpPar = BestPathParams(
+            lm_scale='15',
+            wst=latgenPar.wst,
+            lattice_arch=latgenPar.lattice_arch,
+            trans=latgenPar.decode_dir + '/trans')
+        run_bestpath(ffi, lib, bpPar)
+        print 'running bestpath finished'
+
+        onlinePar = OnlineParams(
+            decode_dir=decodedir,
+            rt_min='0.8',
+            rt_max='0.85',
+            max_active='4000',
+            beam='12.0',
+            acoustic_scale='0.0769',
+            wav_scp=mfccPar.wav_scp,
+            wst=latgenPar.wst,
+            model=latgenPar.model,
+            hclg=latgenPar.hclg,
+            trans=decodedir + '/online_trans',
+            align=decodedir + '/online_align')
+        run_online(ffi, lib, onlinePar)
+
+        ### Evaluating experiments
+        ref = decodedir + '/reference.txt'
+        buildReference(mfccPar.wav_scp, ref)
+
+        # Evaluate latgen decoding
+        lat_trans_text = bpPar.trans + '.txt'
+        int2txt(bpPar.trans, lat_trans_text, latgenPar.wst)
+        lat_werPar = WerParams(hypothesis=lat_trans_text, reference=ref)
+        computeWer(ffi, lib, lat_werPar)
+        print 'running WER for latgen finished'
+
+        # # Evaluate online decoding
+        onl_transtxttmp, onl_transtxt = onlinePar.trans + '.tmp', onlinePar.trans + '.txt'
+        int2txt(onlinePar.trans, onl_transtxttmp, onlinePar.wst)
+        compactHyp(onl_transtxttmp, onl_transtxt)
+        onl_werPar = WerParams(hypothesis=onl_transtxt, reference=ref)
+        computeWer(ffi, lib, onl_werPar)
+        print 'running WER for online finished'
+    except OSError as e:
+        print 'Maybe you forget to set LD_LIBRARY_PATH?'
+        print e
+        raise
diff --git a/src/python-kaldi-decoding/test_cffi_python_dyn.h b/src/python-kaldi-decoding/test_cffi_python_dyn.h
new file mode 100644
index 00000000000..668e6b0d590
--- /dev/null
+++ b/src/python-kaldi-decoding/test_cffi_python_dyn.h
@@ -0,0 +1,39 @@
+#ifndef TEST_CFFI_PYTHON_H
+#define TEST_CFFI_PYTHON_H
+#include <dlfcn.h>
+#include <stdio.h>
+
+
+/** Links ******
+ - http://www.isotton.com/devel/docs/C++-dlopen-mini-HOWTO/C++-dlopen-mini-HOWTO.html
+ - http://stackoverflow.com/questions/12762910/c-undefined-symbols-when-loading-shared-library-with-dlopen
+ - http://kaldi.sourceforge.net/matrixwrap.html  # see Missing the ATLAS implementation of (parts of) CLAPACK
+ - you have to choose lapack_atlas / lapack /clapack.. check symbols
+
+**********************/
+
+typedef int (*f_t)(int c, char **ar);
+
+int testSharedLib(char *nameLib, char *nameFce, int argc, char ** argv) {
+    void *lib = dlopen(nameLib, RTLD_NOW);
+    if (!lib) {
+        printf("Cannot open library: %s\n", dlerror());
+        return 1;
+    }   
+
+    dlerror();  // reset errors
+    f_t f = (f_t)dlsym(lib, nameFce); 
+    const char *dlsym_error = dlerror();
+    if (dlsym_error) {
+        printf("Cannot load symbol '%s', %s\n", nameFce, dlsym_error );
+        dlclose(lib);
+        return 1;
+    }
+
+    // using the function
+    int retval = f(argc, argv);
+    
+    dlclose(lib);
+    return retval;
+}
+#endif // #ifndef TEST_CFFI_PYTHON_H
diff --git a/src/vystadial-decoder/.ycm_extra_conf.py b/src/vystadial-decoder/.ycm_extra_conf.py
new file mode 100644
index 00000000000..8b835f3de42
--- /dev/null
+++ b/src/vystadial-decoder/.ycm_extra_conf.py
@@ -0,0 +1,145 @@
+# This file is NOT licensed under the GPLv3, which is the license for the rest
+# of YouCompleteMe.
+#
+# Here's the license text for this file:
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# For more information, please refer to <http://unlicense.org/>
+
+import os
+import ycm_core
+from clang_helpers import PrepareClangFlags
+
+# These are the compilation flags that will be used in case there's no
+# compilation database set (by default, one is not set).
+# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
+flags = [
+    '-Wall',
+    '-Wextra',
+    '-Werror',
+    '-Wc++98-compat',
+    '-Wno-long-long',
+    '-Wno-variadic-macros',
+    '-fexceptions',
+    '-DNDEBUG',
+    '-DUSE_CLANG_COMPLETER',
+    # THIS IS IMPORTANT! Without a "-std=<something>" flag, clang won't know which
+    # language to use when compiling headers. So it will guess. Badly. So C++
+    # headers will be compiled as C headers. You don't want that so ALWAYS specify
+    # a "-std=<something>".
+    # For a C project, you would set this to something like 'c99' instead of
+    # 'c++11'.
+    '-std=c++11',
+    # ...and the same thing goes for the magic -x option which specifies the
+    # language that the files to be compiled are written in. This is mostly
+    # relevant for c++ headers.
+    # For a C project, you would set this to 'c' instead of 'c++'.
+    '-x',
+    'c++',
+    # Set for Kaldi project where all sits side by side in src
+    '-I',
+    '..',
+    # Set for Kaldi external dependencies sitting in ../../tools
+    '-isystem',
+    '../../tools/openfst/include',
+    '-isystem',
+    '../../tools/ATLAS/include',
+    '-I',
+    '.',
+]
+
+# Set this to the absolute path to the folder (NOT the file!) containing the
+# compile_commands.json file to use that instead of 'flags'. See here for
+# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
+#
+# Most projects will NOT need to set this to anything; you can just change the
+# 'flags' list of compilation flags. Notice that YCM itself uses that approach.
+compilation_database_folder = ''
+
+if compilation_database_folder:
+    database = ycm_core.CompilationDatabase(compilation_database_folder)
+else:
+    database = None
+
+
+def DirectoryOfThisScript():
+    return os.path.dirname(os.path.abspath(__file__))
+
+
+def MakeRelativePathsInFlagsAbsolute(flags, working_directory):
+    if not working_directory:
+        return flags
+    new_flags = []
+    make_next_absolute = False
+    path_flags = ['-isystem', '-I', '-iquote', '--sysroot=']
+    for flag in flags:
+        new_flag = flag
+
+        if make_next_absolute:
+            make_next_absolute = False
+            if not flag.startswith('/'):
+                new_flag = os.path.join(working_directory, flag)
+
+        for path_flag in path_flags:
+            if flag == path_flag:
+                make_next_absolute = True
+                break
+
+            if flag.startswith(path_flag):
+                path = flag[len(path_flag):]
+                new_flag = path_flag + os.path.join(working_directory, path)
+                break
+
+        if new_flag:
+            new_flags.append(new_flag)
+    return new_flags
+
+
+def FlagsForFile(filename):
+    if database:
+        # Bear in mind that compilation_info.compiler_flags_ does NOT return a
+        # python list, but a "list-like" StringVec object
+        compilation_info = database.GetCompilationInfoForFile(filename)
+        final_flags = PrepareClangFlags(
+            MakeRelativePathsInFlagsAbsolute(
+                compilation_info.compiler_flags_,
+                compilation_info.compiler_working_dir_),
+            filename)
+
+        # NOTE: This is just for YouCompleteMe; it's highly likely that your project
+        # does NOT need to remove the stdlib flag. DO NOT USE THIS IN YOUR
+        # ycm_extra_conf IF YOU'RE NOT 100% YOU NEED IT.
+        try:
+            final_flags.remove('-stdlib=libc++')
+        except ValueError:
+            pass
+    else:
+        relative_to = DirectoryOfThisScript()
+        final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to)
+
+    return {
+        'flags': final_flags,
+        'do_cache': True
+    }
diff --git a/src/vystadial-decoder/README.md b/src/vystadial-decoder/README.md
new file mode 100644
index 00000000000..43a45177cc1
--- /dev/null
+++ b/src/vystadial-decoder/README.md
@@ -0,0 +1,86 @@
+Intro
+-----
+The repository contains the first attempt to build
+online Kaldi decoder taking raw audio packets.
+
+The decoder should have simpel interface,
+because the next step will be interfacing the functionality of the decoder from Python.
+
+Workflow of KALDI decoding in few lines
+---------------------
+```cpp
+Classes:
+    OnlineFeInput<Source, Mffc>
+    MfccOptions // Usage: Mfcc mfcc(mfcc_opts);
+    Mfcc   - ?place holder for mfcc features?
+
+    Mfcc mfcc(mfcc_opts);
+    FeInput fe_input(&au_src, &mfcc, ..)
+    OnlineCmvnInput cmvn_input(&fe_input, ..);
+    feat_transform = new OnlineLdaInput(&cmvn_input, ..)
+    OnlineDecodableDiagGmmScaled decodable(feat_transform, ..)
+    while (1) {
+      OnlineFasterDecoder::DecodeState dstate = decoder.Decode(&decodable);
+      // different staff for online decoder decoder.FinishTraceBack(&out_fst);
+```
+
+
+Classes for online decoder
+--------------------------
+```cpp
+// in online/online-audio-source.h
+class OnlineVectorSource 
+    int32 Read(VectorBase<BaseFloat> *data, uint32 *timeout = 0);
+
+// in online/online-decodable.h
+// A decodable, taking input from an OnlineFeatureInput object on-demand
+class OnlineDecodableDiagGmmScaled : public DecodableInterface 
+  virtual BaseFloat LogLikelihood(int32 frame, int32 index);
+  virtual bool IsLastFrame(int32 frame);
+  virtual int32 NumIndices() /// Indices are one-based!  This is for compatibility with OpenFst.
+
+// in online/online-fast-input.h
+class OnlineFeatInputItf
+  virtual bool Compute(Matrix<BaseFloat> *output, uint32 *timeout) = 0;
+
+// in online/online-cmn.h
+class OnlineCMN 
+    ApplyCmvn
+
+// in online/online-faster-decoder.h
+struct OnlineFasterDecoderOpts : public FasterDecoderOptions
+  void Register(ParseOptions *po, bool full)
+    
+
+// in online/online-faster-decoder.h
+class OnlineFasterDecoder : public FasterDecoder 
+  // Codes returned by Decode() to show the current state of the decoder
+  enum DecodeState {
+    kEndFeats = 1, // No more scores are available from the Decodable
+    kEndUtt = 2, // End of utterance, caused by e.g. a sufficiently long silence
+    kEndBatch = 4 // End of batch - end of utterance not reached yet
+  };
+  DecodeState Decode(DecodableInterface *decodable);
+
+  // Makes a linear graph, by tracing back from the last "immortal" token
+  // to the previous one
+  bool PartialTraceback(fst::MutableFst<LatticeArc> *out_fst);
+
+  // Makes a linear graph, by tracing back from the best currently active token
+  // to the last immortal token. This method is meant to be invoked at the end
+  // of an utterance in order to get the last chunk of the hypothesis
+  void FinishTraceBack(fst::MutableFst<LatticeArc> *fst_out);
+
+  // Returns "true" if the best current hypothesis ends with long enough silence
+  bool EndOfUtterance();
+
+  int32 frame() { return frame_; }
+
+```
+
+
+Reading the wav source
+----------------------
+```cpp
+    SequentialTableReader<WaveHolder> reader(std::string wav_rspecifier);
+```
diff --git a/tools/.gitignore b/tools/.gitignore
new file mode 100644
index 00000000000..2e3c19f3217
--- /dev/null
+++ b/tools/.gitignore
@@ -0,0 +1,7 @@
+ATLAS
+OpenBLAS
+openfst
+openfst-1.3.2
+portaudio
+sctk-2.4.0
+sph2pipe_v2.5
diff --git a/tools/Makefile b/tools/Makefile
index e2c57027d12..5c37e7bfd0c 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -47,7 +47,7 @@ openfst/lib:
 
 openfst-1.3.2/Makefile: openfst-1.3.2/.patched
 		cd openfst-1.3.2/; \
-		./configure --prefix=`pwd` --enable-static --disable-shared --enable-far --enable-ngram-fsts
+		./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts
 
 
 .PHONY: openfst-1.3.2/.patched
diff --git a/tools/extras/install_portaudio.sh b/tools/extras/install_portaudio.sh
index b365af33d2f..07228025ac1 100755
--- a/tools/extras/install_portaudio.sh
+++ b/tools/extras/install_portaudio.sh
@@ -89,7 +89,7 @@ if [ -z "$MACOS" ]; then
     echo "${pa_patch}" | patch -p0 Makefile.in
 fi
 
-./configure --prefix=`pwd`/install
+./configure --prefix=`pwd`/install --with-pic
 
 
 if [ "$MACOS" != "" ]; then