/
wchmm.h
280 lines (255 loc) · 10.1 KB
/
wchmm.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
/**
* @file wchmm.h
*
* <JA>
* @brief 木構造化辞書の構造体定義.
*
* このファイルでは,第1パスで用いられる木構造化辞書(あるいは単語連結
* HMM (wchmm) とも呼ばれる)の構造体を定義しています. 起動時に,単語辞書の
* 前単語が並列に並べられ,ツリー上に結合されて木構造化辞書が構築されます.
* HMMの状態単位で構築され,各状態は,対応するHMM出力確率,ツリー内での遷移先
* のリスト,および探索のための様々な情報(言語スコアファクタリングのための
* successor word list や uni-gram 最大値,単語始終端マーカー,音素開始
* マーカーなど)を含みます.
* </JA>
*
* <EN>
* @brief Structure Definition of tree lexicon
*
* This file defines structure for word-conjunction HMM, aka tree lexicon
* for recognition of 1st pass. Words in the dictionary are gathered to
* build a tree lexicon. The lexicon is built per HMM state basis,
* with their HMM output probabilities, transition arcs, and other
* informations for search such as successor word lists and maximum
* uni-gram scores for LM factoring, word head/tail marker, phoneme
* start marker, and so on.
* </EN>
*
* @author Akinobu Lee
* @date Sun Sep 18 21:31:32 2005
*
* $Revision: 1.8 $
*
*/
/*
* Copyright (c) 1991-2013 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2013 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#ifndef __J_WORD_CONJ_HMM_H__
#define __J_WORD_CONJ_HMM_H__
//#define MAXWCNSTEP 40000 ///< Number of states to be allocated at once
#ifdef PASS1_IWCD
/* Cross-word triphone handling */
/**
* State output probability data for head phone of a word. The phoneme HMM
* should change while search according to the last context word.
*
*/
typedef struct {
HMM_Logical *hmm; ///< Original HMM state on the dictionary
short state_loc; ///< State location within the phoneme (1-)
/* Context cache */
boolean last_is_lset; ///< TRUE if last assigned model was context-dependent state set
union {
HTK_HMM_State *state; ///< Last assigned state (last_is_lset = FALSE)
CD_State_Set *lset; ///< Last assigned lset (last_is_lset = TRUE)
} cache;
WORD_ID lastwid_cache; ///< Last context word ID
} RC_INFO;
/**
* State output probability data for 1-phone word. The phoneme HMM should
* change according to the last context word.
*
*/
typedef struct {
HMM_Logical *hmm; ///< Original HMM state on the dictionary
short state_loc; ///< State location within the phoneme (1-)
/* Context cache */
boolean last_is_lset; ///< TRUE if last assigned model was context-dependent state set
WORD_ID category; ///< Last context word's category ID
union {
HTK_HMM_State *state; ///< Last assigned state
CD_State_Set *lset; ///< Last assigned lset
} cache;
WORD_ID lastwid_cache; ///< Last context word ID
} LRC_INFO;
/* For word tail phoneme, pseudo phone on the dictionary will be directly
used as context-dependent state set */
/**
* State output probability container on lexicon tree. Each state
* should have one of them.
*
*/
typedef union {
HTK_HMM_State *state; ///< For AS_STATE (word-internal phone)
CD_State_Set *lset; ///< For AS_LSET (word tail phone)
RC_INFO *rset; ///< For AS_RSET (word head phone)
LRC_INFO *lrset; ///< For AS_LRSET (phone in 1-phoneme word)
} ACOUSTIC_SPEC;
/**
* ID to indicate which data is in the ACOUSTIC_SPEC container.
*
*/
typedef enum {
AS_STATE, ///< This state is in word-internal phone
AS_LSET, ///< This state is in word tail phone
AS_RSET, ///< This state is in word head phone
AS_LRSET ///< This state is in 1-phone word
} AS_Style;
#endif
/*************************************************************************/
/**
* LM cache for the 1st pass
*
*/
typedef struct {
/// Word-internal factoring cache indexed by scid, holding last score
LOGPROB *probcache;
/// Word-internal factoring cache indexed by scid, holding last N-gram entry ID
WORD_ID *lastwcache;
/**
* @brief Cross-word factoring cache to hold last-word-dependent factoring
* score toward word head nodes.
*
* Cached values will be stored as [last_nword][n], where n is the number of
* word-head node on which the last_nword-dependent N-gram factoring value
* should be computed on cross-word transition. In 1-gram factoring,
* n equals to wchmm->isolatenum, the number of isolated (not shared)
* word-head nodes.
* In 2-gram factoring, n simply equals to wchmm->startnum, the number of
* all word-head nodes.
*
* The cache area will be allocated per the previous word when they appeared
* while search.
* It will retain across the speech stream, so the cache area will grow
* to an extent as recognition was done for many files.
*/
LOGPROB **iw_sc_cache;
/**
* Maximum size of cross-word factoring cache @a iw_sc_cache per last word.
* The value is set in max_successor_cache_init().
*/
int iw_cache_num;
#ifdef HASH_CACHE_IW
WORD_ID *iw_lw_cache; ///< Maps hash cache id [x] to corresponding last word
#endif
} LM_PROB_CACHE;
/*************************************************************************/
/**
* Number of arcs in an arc cell.
*
*/
#define A_CELL2_ALLOC_STEP 4
/**
* Transition arc holding cell
*
*/
typedef struct __A_CELL2__ {
/**
* Number of arcs currently stored in this cell.
* If this reaches A_CELL2_ALLOC_STEP, next cell will be allocated.
*
*/
unsigned short n;
int arc[A_CELL2_ALLOC_STEP]; ///< Transition destination node numbers
LOGPROB a[A_CELL2_ALLOC_STEP]; ///< Transitino probabilities
struct __A_CELL2__ *next; ///< Pointer to next cell
} A_CELL2;
/**
* HMM state on tree lexicon.
*
*/
typedef struct wchmm_state {
#ifdef PASS1_IWCD
ACOUSTIC_SPEC out; ///< State output probability container
/* below has been moved to WCHMM (04/06/22 by ri) */
/*unsigned char outstyle; output type (one of AS_Style) */
#else /* ~PASS1_IWCD */
HTK_HMM_State *out; ///< HMM State
#endif /* ~PASS1_IWCD */
/**
* LM factoring parameter:
* If scid > 0, it will points to the successor list index.
* If scid = 0, the node is not on branch.
* If scid < 0, it will points to the unigram factoring value index.
*/
int scid;
} WCHMM_STATE;
/**
* wchmm-specific work area
*
*/
typedef struct {
int *out_from;
int *out_from_next;
LOGPROB *out_a;
LOGPROB *out_a_next;
int out_from_len;
} WCHMM_WORK;
/**
* Whole lexicon tree structure holding all information.
*
*/
typedef struct wchmm_info {
int lmtype; ///< LM type
int lmvar; ///< LM variant
boolean category_tree; ///< TRUE if category_tree is used
HTK_HMM_INFO *hmminfo; ///< HMM definitions used to construct this lexicon
NGRAM_INFO *ngram; ///< N-gram used to construct this lexicon
DFA_INFO *dfa; ///< Grammar used to construct this lexicon
DFA_INFO *dfa_forward; ///< Grammar used to construct this lexicon
WORD_INFO *winfo; ///< Word dictionary used to construct this lexicon
boolean ccd_flag; ///< TRUE if handling context dependency
int maxwcn; ///< Memory assigned maximum number of nodes
int n; ///< Num of nodes in this lexicon
WCHMM_STATE *state; ///< HMM state on tree lexicon [nodeID]
LOGPROB *self_a; ///< Transition probability to self node
LOGPROB *next_a; ///< Transition probabiltiy to next (now+1) node
A_CELL2 **ac; ///< Transition arc information other than self and next.
WORD_ID *stend; ///< Word ID that ends at the state [nodeID]
int **offset; ///< Node ID of a phone [wordID][0..phonelen-1]
int *wordend; ///< Node ID of word-end state [wordID]
int startnum; ///< Number of root nodes
int *startnode; ///< Root node index [0..startnum-1] -> node ID
int *wordbegin; ///< Node ID of word-beginning state [wordID] for multipath mode
int maxstartnum; ///< Allocated number of startnodes for multipath mode
WORD_ID *start2wid; ///< Root node index [0..startnum-1] -> word ID for multipath mode
#ifdef UNIGRAM_FACTORING
int *start2isolate; ///< Root node index -> isolated root node ID
int isolatenum; ///< Number of isolated root nodes
#endif
LOGPROB *wordend_a; ///< Transition prob. outside word [wordID] for non-multipath mode
#ifdef PASS1_IWCD
unsigned char *outstyle; ///< ID to indicate type of output probability container (one of AS_Style)
#endif
/* Successor lists on the tree are stored on sequencial list at @a sclist,
and each node has index to the list */
/* sclist and sclen are used at 2-gram factoring only */
/* scword is used at 1-gram factoring only */
#ifdef UNIGRAM_FACTORING
WORD_ID *scword; ///< successor word[scid]
LOGPROB *fscore; ///< List of 1-gram factoring score [-scid]
int fsnum; ///< Number of @a fscore
#endif
WORD_ID **sclist; ///< List of successor list [scid]
WORD_ID *sclen; ///< Length of each succcessor list [scid]
int scnum; ///< Total number of factoring nodes that has successor list
BMALLOC_BASE *malloc_root; ///< Pointer for block memory allocation
#ifdef PASS1_IWCD
APATNODE *lcdset_category_root; ///< Index of lexicon-dependent category-aware pseudo phone set when used on Julian
BMALLOC_BASE *lcdset_mroot;
#endif /* PASS1_IWCD */
HMMWork *hmmwrk; ///< Work area for HMM computation in wchmm
LM_PROB_CACHE lmcache; ///< LM score cache for 1st pass
WCHMM_WORK wrk; ///< Other work area for 1st pass transition computation
int separated_word_count; ///< Number of words actually separated (linearlized) from the tree
char lccbuf[MAX_HMMNAME_LEN+7]; ///< Work area for HMM name conversion
char lccbuf2[MAX_HMMNAME_LEN+7]; ///< Work area for HMM name conversion
/* user-defined functions, used when this->lmvar == LM_NGRAM_USER */
/* they are local copy from parent Recog instance */
LOGPROB (*uni_prob_user)(WORD_INFO *, WORD_ID, LOGPROB); ///< Pointer to function returning word occurence probability
LOGPROB (*bi_prob_user)(WORD_INFO *, WORD_ID, WORD_ID, LOGPROB); ///< Pointer to function returning a word probability given a word context (corresponds to bi-gram)
} WCHMM_INFO;
#endif /* __J_WORD_CONJ_HMM_H__ */