Skip to content

Commit

Permalink
Merge branch 'master' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
danpovey committed Nov 13, 2023
2 parents 8c3c0bc + 21ae411 commit 3675219
Show file tree
Hide file tree
Showing 122 changed files with 2,835 additions and 349 deletions.
2 changes: 1 addition & 1 deletion cmake/gen_cmake_skeleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def gen_code(self):

if len(self.depends) > 0:
ret.append("target_link_libraries(" + self.target_name + " PUBLIC")
for d in self.depends:
for d in self.depends + ['-lcblas', '-llapack']:
ret.append(" " + d)
ret.append(")\n")

Expand Down
46 changes: 46 additions & 0 deletions docker/ubuntu22.04-cuda12.2.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
LABEL maintainer="williamhilton.works@gmail.com"

RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
g++ \
make \
automake \
bzip2 \
unzip \
wget \
sox \
libtool \
git \
subversion \
python2.7 \
python3 \
zlib1g-dev \
ca-certificates \
gfortran \
patch \
ffmpeg \
vim && \
apt-get update && \
apt-get install -y --no-install-recommends\
software-properties-common && \
apt-add-repository multiverse && \
apt-get update && \
yes | DEBIAN_FRONTEND=noninteractive apt-get install -yqq --no-install-recommends\
intel-mkl && \
rm -rf /var/lib/apt/lists/*

RUN ln -s /usr/bin/python2.7 /usr/bin/python

RUN git clone --depth 1 https://github.com/kaldi-asr/kaldi.git /opt/kaldi && \
cd /opt/kaldi/tools && \
make -j $(nproc) && \
cd /opt/kaldi/src && \
./configure --shared --use-cuda && \
make depend -j $(nproc) && \
make -j $(nproc) && \
find /opt/kaldi -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
rm -rf /opt/kaldi/.git

WORKDIR /opt/kaldi/
2 changes: 1 addition & 1 deletion egs/ami/s5/run_ihm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ set -euxo pipefail
# Path where AMI gets downloaded (or where locally available):
AMI_DIR=$PWD/wav_db # Default,
case $(hostname -d) in
fit.vutbr.cz) AMI_DIR=/mnt/matylda5/iveselyk/KALDI_AMI_WAV ;; # BUT,
fit.vutbr.cz) AMI_DIR=/mnt/matylda2/data/AMI_KALDI_DOWNLOAD ;; # BUT,
clsp.jhu.edu) AMI_DIR=/export/corpora4/ami/amicorpus ;; # JHU,
cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh,
esac
Expand Down
2 changes: 1 addition & 1 deletion egs/ami/s5/run_mdm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mic=mdm$nmics
# Path where AMI gets downloaded (or where locally available):
AMI_DIR=$PWD/wav_db # Default,
case $(hostname -d) in
fit.vutbr.cz) AMI_DIR=/mnt/matylda5/iveselyk/KALDI_AMI_WAV ;; # BUT,
fit.vutbr.cz) AMI_DIR=/mnt/matylda2/data/AMI_KALDI_DOWNLOAD ;; # BUT,
clsp.jhu.edu) AMI_DIR=/export/corpora4/ami/amicorpus ;; # JHU,
cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh,
esac
Expand Down
2 changes: 1 addition & 1 deletion egs/ami/s5/run_sdm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ set -euxo pipefail
# Path where AMI gets downloaded (or where locally available):
AMI_DIR=$PWD/wav_db # Default,
case $(hostname -d) in
fit.vutbr.cz) AMI_DIR=/mnt/matylda5/iveselyk/KALDI_AMI_WAV ;; # BUT,
fit.vutbr.cz) AMI_DIR=/mnt/matylda2/data/AMI_KALDI_DOWNLOAD ;; # BUT,
clsp.jhu.edu) AMI_DIR=/export/corpora4/ami/amicorpus ;; # JHU,
cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh,
esac
Expand Down
2 changes: 1 addition & 1 deletion egs/ami/s5b/cmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export decode_cmd="queue.pl --mem 2G"
# the use of cuda_cmd is deprecated, used only in 'nnet1',
export cuda_cmd="queue.pl --gpu 1 --mem 20G"

if [[ "$(hostname -f)" == "*.fit.vutbr.cz" ]]; then
if [[ "$(hostname -d)" == "fit.vutbr.cz" ]]; then
queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
Expand Down
50 changes: 50 additions & 0 deletions egs/ami/s5b/conf/ami_beamformit.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/)

# scrolling size to compute the delays
scroll_size = 250

# cross correlation computation window size
window_size = 500

#amount of maximum points for the xcorrelation taken into account
nbest_amount = 4

#flag wether to apply an automatic noise thresholding
do_noise_threshold = 1

#Percentage of frames with lower xcorr taken as noisy
noise_percent = 10

######## acoustic modelling parameters

#transition probabilities weight for multichannel decoding
trans_weight_multi = 25
trans_weight_nbest = 25

###

#flag wether to print the feaures after setting them, or not
print_features = 1

#flag wether to use the bad frames in the sum process
do_avoid_bad_frames = 1

#flag to use the best channel (SNR) as a reference
#defined from command line
do_compute_reference = 1

#flag wether to use a uem file or not(process all the file)
do_use_uem_file = 0

#flag wether to use an adaptative weights scheme or fixed weights
do_adapt_weights = 1

#flag wether to output the sph files or just run the system to create the auxiliary files
do_write_sph_files = 1

####directories where to store/retrieve info####
#channels_file = ./cfg-files/channels

#show needs to be passed as argument normally, here a default one is given just in case
#show_id = Ttmp

2 changes: 1 addition & 1 deletion egs/ami/s5b/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ set -euo pipefail
# Path where AMI gets downloaded (or where locally available):
AMI_DIR=$PWD/wav_db # Default,
case $(hostname -d) in
fit.vutbr.cz) AMI_DIR=/mnt/matylda5/iveselyk/KALDI_AMI_WAV ;; # BUT,
fit.vutbr.cz) AMI_DIR=/mnt/matylda2/data/AMI_KALDI_DOWNLOAD ;; # BUT,
clsp.jhu.edu) AMI_DIR=/export/corpora4/ami/amicorpus ;; # JHU,
cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh,
esac
Expand Down
8 changes: 4 additions & 4 deletions egs/ami/s5c/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Apache 2.0.
#
# This recipe performs diarization for the mix-headset data in the
# AMI dataset. The x-vector extractor we use is trained on VoxCeleb v2
# AMI dataset. The x-vector extractor we use is trained on VoxCeleb v2
# corpus with simulated RIRs. We use oracle SAD in this recipe.
# This recipe demonstrates the following:
# 1. Diarization using x-vector and clustering (AHC, VBx, spectral)
Expand Down Expand Up @@ -38,7 +38,7 @@ diarizer_type=spectral # must be one of (ahc, spectral, vbx)
# Path where AMI gets downloaded (or where locally available):
AMI_DIR=$PWD/wav_db # Default,
case $(hostname -d) in
fit.vutbr.cz) AMI_DIR=/mnt/matylda5/iveselyk/KALDI_AMI_WAV ;; # BUT,
fit.vutbr.cz) AMI_DIR=/mnt/matylda2/data/AMI_KALDI_DOWNLOAD ;; # BUT,
clsp.jhu.edu) AMI_DIR=/export/corpora5/amicorpus ;; # JHU,
cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh,
esac
Expand All @@ -57,7 +57,7 @@ if [ $stage -le 1 ]; then
local/ami_download.sh $mic $AMI_DIR
fi

# Prepare data directories.
# Prepare data directories.
if [ $stage -le 2 ]; then
# Download the data split and references from BUT's AMI setup
if ! [ -d AMI-diarization-setup ]; then
Expand Down Expand Up @@ -120,7 +120,7 @@ if [ $stage -le 6 ]; then
transform-vec $model_dir/xvectors_plda_train/transform.mat ark:- ark:- |\
ivector-normalize-length ark:- ark:- |" \
$model_dir/xvectors_plda_train/plda || exit 1;

cp $model_dir/xvectors_plda_train/plda $model_dir/
cp $model_dir/xvectors_plda_train/transform.mat $model_dir/
cp $model_dir/xvectors_plda_train/mean.vec $model_dir/
Expand Down
4 changes: 4 additions & 0 deletions egs/gop_speechocean762/s5/local/visualize_feats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import random
import kaldi_io
import seaborn as sns
import numpy as np
from collections import Counter
from sklearn.manifold import TSNE
from utils import load_human_scores, load_phone_symbol_table
Expand Down Expand Up @@ -62,6 +63,9 @@ def main():
min(args.samples, len(lables)))
features, lables = list(zip(*sampled_paris))

# Convert the tuple of arrays to a single 2D array
features = np.vstack(features)

# Draw scatters
label_counter = Counter(lables)
colors = sns.color_palette("colorblind", len(label_counter))
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/utils/fix_data_dir.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function check_sorted {
}

for x in utt2spk spk2utt feats.scp text segments wav.scp cmvn.scp vad.scp \
reco2file_and_channel spk2gender utt2lang utt2uniq utt2dur reco2dur utt2num_frames; do
reco2file_and_channel spk2gender utt2lang utt2uniq utt2dur reco2dur utt2num_frames $utt_extra_files $spk_extra_files; do
if [ -f $data/$x ]; then
cp $data/$x $data/.backup/$x
check_sorted $data/$x
Expand Down
11 changes: 11 additions & 0 deletions egs/xbmu_amdo31/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
About the XBMU-AMDO31 corpus XBMU-AMDO31 is an open-source Amdo Tibetan speech corpus published by Northwest Minzu University.

XBMU-AMDO31 dataset is a speech recognition corpus of Tibetan Amdo dialect. The open source corpus contains 31 hours of speech data and resources related to build speech recognition systems,including transcribed texts and a Tibetan pronunciation lexicon. (The lexicon is a Tibetan lexicon of the Lhasa dialect, which has been reused for the Amdo dialect because of the uniformity of the Tibetan language) The dataset can be used to train a model for Amdo Tibetan Automatic Speech Recognition (ASR).

The database can be downloaded from openslr:
http://www.openslr.org/133/

For more details, please visit:
https://huggingface.co/datasets/syzym/xbmu_amdo31

This recipe includes some different ASR models trained with XBMU-AMDO31.
8 changes: 8 additions & 0 deletions egs/xbmu_amdo31/s5/RESULTS
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
%WER 46.16 [ 15522 / 33628, 380 ins, 2208 del, 12934 sub ] exp/mono/decode_test/wer_10_0.0
%WER 24.60 [ 8274 / 33628, 330 ins, 860 del, 7084 sub ] exp/tri1/decode_test/wer_13_0.0
%WER 24.42 [ 8213 / 33628, 323 ins, 847 del, 7043 sub ] exp/tri2/decode_test/wer_13_0.0
%WER 22.93 [ 7712 / 33628, 336 ins, 814 del, 6562 sub ] exp/tri3a/decode_test/wer_12_0.0
%WER 20.17 [ 6783 / 33628, 275 ins, 764 del, 5744 sub ] exp/tri4a/decode_test/wer_15_0.0
%WER 19.03 [ 6400 / 33628, 292 ins, 667 del, 5441 sub ] exp/tri5a/decode_test/wer_14_0.0
%WER 15.45 [ 5196 / 33628, 229 ins, 646 del, 4321 sub ] exp/nnet3/tdnn_sp/decode_test/wer_16_0.0
%WER 15.57 [ 5235 / 33628, 244 ins, 575 del, 4416 sub ] exp/chain/tdnn_1a_sp/decode_test/wer_11_0.0
15 changes: 15 additions & 0 deletions egs/xbmu_amdo31/s5/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export train_cmd="queue.pl --mem 2G"
export decode_cmd="queue.pl --mem 4G"
export mkgraph_cmd="queue.pl --mem 8G"
5 changes: 5 additions & 0 deletions egs/xbmu_amdo31/s5/conf/decode.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
beam=11.0 # beam for decoding. Was 13.0 in the scripts.
first_beam=8.0 # beam for 1st-pass decoding in SAT.



2 changes: 2 additions & 0 deletions egs/xbmu_amdo31/s5/conf/mfcc.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--use-energy=false # only non-default option.
--sample-frequency=16000
10 changes: 10 additions & 0 deletions egs/xbmu_amdo31/s5/conf/mfcc_hires.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# config for high-resolution MFCC features, intended for neural network training.
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--sample-frequency=16000 # Switchboard is sampled at 8kHz
--num-mel-bins=40 # similar to Google's setup.
--num-ceps=40 # there is no dimensionality reduction.
--low-freq=40 # low cutoff frequency for mel bins
--high-freq=-200 # high cutoff frequently, relative to Nyquist of 8000 (=3800)
1 change: 1 addition & 0 deletions egs/xbmu_amdo31/s5/conf/online_cmvn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# configuration file for apply-cmvn-online, used when invoking online2-wav-nnet3-latgen-faster.
4 changes: 4 additions & 0 deletions egs/xbmu_amdo31/s5/conf/online_pitch.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
--sample-frequency=16000
--simulate-first-pass-online=true
--normalization-right-context=25
--frames-per-chunk=10
1 change: 1 addition & 0 deletions egs/xbmu_amdo31/s5/conf/pitch.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sample-frequency=16000
1 change: 1 addition & 0 deletions egs/xbmu_amdo31/s5/local/chain/run_tdnn.sh
Loading

0 comments on commit 3675219

Please sign in to comment.