Skip to content
This repository has been archived by the owner on Oct 13, 2022. It is now read-only.

Use a modified ctc_topo. #209

Merged
merged 2 commits into from
Jun 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 29 additions & 17 deletions egs/librispeech/asr/simple_v1/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,35 @@ fi

if [ $stage -le 4 ]; then
# Build G
python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=1 \
data/local/lm/lm_tgmed.arpa >data/lang_nosp/G_uni.fst.txt

python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=3 \
data/local/lm/lm_tgmed.arpa >data/lang_nosp/G.fst.txt

python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=4 \
data/local/lm/lm_fglarge.arpa >data/lang_nosp/G_4_gram.fst.txt
if [ ! -f data/lang_nosp/G_uni.fst.txt ]; then
python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=1 \
data/local/lm/lm_tgmed.arpa >data/lang_nosp/G_uni.fst.txt
else
echo "Skip generating data/lang_nosp/G_uni.fst.txt"
fi

if [ ! -f data/lang_nosp/G.fst.txt ]; then
python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=3 \
data/local/lm/lm_tgmed.arpa >data/lang_nosp/G.fst.txt
else
echo "Skip generating data/lang_nosp/G.fst.txt"
fi

if [ ! -f data/lang_nosp/G_4_gram.fst.txt ]; then
python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=4 \
data/local/lm/lm_fglarge.arpa >data/lang_nosp/G_4_gram.fst.txt
else
echo "Skip generating data/lang_nosp/G_4_gram.fst.txt"
fi

echo ""
echo "To load G:"
Expand Down
32 changes: 32 additions & 0 deletions snowfall/training/ctc_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,38 @@ def build_ctc_topo(tokens: List[int]) -> k2.Fsa:
return ans


def build_ctc_topo2(phones: List[int]):
# See https://github.com/k2-fsa/k2/issues/746#issuecomment-856421616
assert 0 in phones, 'We assume 0 is the ID of the blank symbol'
phones = phones.copy()
phones.remove(0)

num_phones = len(phones)

start = 0
final = num_phones + 1

arcs = []
arcs.append([start, start, 0, 0, 0])
arcs.append([start, final, -1, -1, 0])
arcs.append([final])
for i, p in enumerate(phones):
i += 1
arcs.append([start, start, p, p, 0])

arcs.append([start, i, p, p, 0])
arcs.append([i, i, p, 0, 0])

arcs.append([i, start, p, 0, 0])

arcs = sorted(arcs, key=lambda arc: arc[0])
arcs = [[str(i) for i in arc] for arc in arcs]
arcs = [' '.join(arc) for arc in arcs]
arcs = '\n'.join(arcs)
ctc_topo = k2.Fsa.from_str(arcs, False)
return k2.arc_sort(ctc_topo)


class CtcTrainingGraphCompiler(object):

def __init__(self,
Expand Down