Skip to content
Permalink
Browse files
added dfa_forward container into multigram scheme
  • Loading branch information
nitslp-ri committed Apr 28, 2020
1 parent bff4dfe commit b98e619b95f675e0245aa41da531a5676f99f73d
Showing 9 changed files with 63 additions and 9 deletions.
@@ -59,6 +59,7 @@ if ($gramprefix eq "") {
$gramfile = "$ARGV[$#ARGV].grammar";
$vocafile = "$ARGV[$#ARGV].voca";
$dfafile = "$ARGV[$#ARGV].dfa";
$fdfafile = "$ARGV[$#ARGV].dfa.forward";
$dictfile = "$ARGV[$#ARGV].dict";
$termfile = "$ARGV[$#ARGV].term";
$tmpprefix = "$tmpdir/g$$";
@@ -140,20 +141,27 @@ if (! -x $minimizebin) {
print "Warning: no minimization performed\n";
if ($tmpprefix =~ /cygdrive/) {
$status = system("$mkfabin -e1 -fg `cygpath -w $rgramfile` -fv `cygpath -w $tmpvocafile` -fo `cygpath -w $dfafile` -fh `cygpath -w ${tmpprefix}.h`");
$status = system("$mkfabin -e1 -fg `cygpath -w $gramfile` -fv `cygpath -w $tmpvocafile` -fo `cygpath -w $fdfafile` -fh `cygpath -w ${tmpprefix}.h`");
} else {
$status = system("$mkfabin -e1 -fg $rgramfile -fv $tmpvocafile -fo $dfafile -fh ${tmpprefix}.h");
$status = system("$mkfabin -e1 -fg $gramfile -fv $tmpvocafile -fo $fdfafile -fh ${tmpprefix}.h");
}
} else {
# minimize DFA after generation
if ($tmpprefix =~ /cygdrive/) {
$status = system("$mkfabin -e1 -fg `cygpath -w $rgramfile` -fv `cygpath -w $tmpvocafile` -fo `cygpath -w ${dfafile}.tmp` -fh `cygpath -w ${tmpprefix}.h`");
system("$minimizebin `cygpath -w ${dfafile}.tmp` -o `cygpath -w $dfafile`");
$status = system("$mkfabin -e1 -fg `cygpath -w $gramfile` -fv `cygpath -w $tmpvocafile` -fo `cygpath -w ${dfafile}.tmp` -fh `cygpath -w ${tmpprefix}.h`");
system("$minimizebin `cygpath -w ${dfafile}.tmp` -o `cygpath -w $fdfafile`");
} else {
$status = system("$mkfabin -e1 -fg $rgramfile -fv $tmpvocafile -fo ${dfafile}.tmp -fh ${tmpprefix}.h");
system("$minimizebin ${dfafile}.tmp -o $dfafile");
$status = system("$mkfabin -e1 -fg $gramfile -fv $tmpvocafile -fo ${dfafile}.tmp -fh ${tmpprefix}.h");
system("$minimizebin ${dfafile}.tmp -o $fdfafile");
}
unlink("${dfafile}.tmp");
}

unlink("$rgramfile");
unlink("$tmpvocafile");
unlink("${tmpprefix}.h");
@@ -192,7 +200,7 @@ if ($make_dict == 1) {
close(DICT);
}

$gene = "$dfafile";
$gene = "$dfafile $fdfafile";
if ($make_term == 1) {
$gene .= " $termfile";
}
@@ -280,7 +280,7 @@ msock_exec_command(char *command, Recog *recog)
/* delete all existing grammars */
multigram_delete_all(cur->lm);
/* register the new grammar to multi-gram tree */
multigram_add(new_dfa, new_winfo, p, cur->lm);
multigram_add(new_dfa, new_winfo, p, cur->lm, NULL);
/* need to rebuild the global lexicon */
/* tell engine to update at requested timing */
schedule_grammar_update(recog);
@@ -312,7 +312,7 @@ msock_exec_command(char *command, Recog *recog)
} else {
if (cur->lmtype == LM_DFA) {
/* add it to multi-gram tree */
multigram_add(new_dfa, new_winfo, p, cur->lm);
multigram_add(new_dfa, new_winfo, p, cur->lm, NULL);
/* need to rebuild the global lexicon */
/* make sure this process will be activated */
cur->active = 1;
@@ -249,7 +249,7 @@ void jconf_set_default_values_search(JCONF_SEARCH *j);


/* multi-gram.c */
int multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm);
int multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm, DFA_INFO *dfa_forward);
boolean multigram_delete(int gid, PROCESS_LM *lm);
void multigram_delete_all(PROCESS_LM *lm);
boolean multigram_update(PROCESS_LM *lm);
@@ -33,6 +33,7 @@ typedef struct __multi_gram__ {
char name[MAXGRAMNAMELEN]; ///< Unique name given by user
unsigned short id; ///< Unique ID
DFA_INFO *dfa; ///< DFA describing syntax of this grammar
DFA_INFO *dfa_forward; ///< additional forward DFA describing syntax of this grammar
WORD_INFO *winfo; ///< Dictionary of this grammar
int hook; ///< Work area to store command hook
boolean newbie; ///< TRUE if just read and not yet configured
@@ -41,6 +42,7 @@ typedef struct __multi_gram__ {
int state_begin; ///< Location of DFA states in the global grammar
int cate_begin; ///< Location of category entries in the global grammar
int word_begin; ///< Location of words in the dictionary of global grammar
int state_begin_forward;
struct __multi_gram__ *next; ///< Link to the next grammar entry
} MULTIGRAM;

@@ -864,6 +864,12 @@ typedef struct __process_lm__ {
*/
DFA_INFO *dfa;

/**
* Global Forward DFA for recognition. This will be generated from @a grammars,
* concatinating each forward DFA into one.
*/
DFA_INFO *dfa_forward;

/**
* TRUE if modified in multigram_update()
*
@@ -215,6 +215,7 @@ typedef struct wchmm_info {
HTK_HMM_INFO *hmminfo; ///< HMM definitions used to construct this lexicon
NGRAM_INFO *ngram; ///< N-gram used to construct this lexicon
DFA_INFO *dfa; ///< Grammar used to construct this lexicon
DFA_INFO *dfa_forward; ///< Grammar used to construct this lexicon
WORD_INFO *winfo; ///< Word dictionary used to construct this lexicon
boolean ccd_flag; ///< TRUE if handling context dependency
int maxwcn; ///< Memory assigned maximum number of nodes
@@ -243,6 +243,7 @@ j_process_lm_free(PROCESS_LM *lm)
if (lm->ngram) ngram_info_free(lm->ngram);
if (lm->grammars) multigram_free_all(lm->grammars);
if (lm->dfa) dfa_info_free(lm->dfa);
if (lm->dfa_forward) dfa_info_free(lm->dfa_forward);
/* not free lm->jconf */
free(lm);
}
@@ -102,6 +102,7 @@ multigram_rebuild_wchmm(RecogProcess *r)
r->wchmm->hmmwrk = &(r->am->hmmwrk);
/* assign models */
r->wchmm->dfa = r->lm->dfa;
r->wchmm->dfa_forward = r->lm->dfa_forward;
r->wchmm->winfo = r->lm->winfo;
r->wchmm->hmminfo = r->am->hmminfo;
if (r->wchmm->category_tree) {
@@ -208,17 +209,20 @@ multigram_build(RecogProcess *r)
* </EN>
*/
static boolean
multigram_append_to_global(DFA_INFO *gdfa, WORD_INFO *gwinfo, MULTIGRAM *m)
multigram_append_to_global(DFA_INFO *gdfa, WORD_INFO *gwinfo, DFA_INFO *gdfa_forward, MULTIGRAM *m)
{
/* the new grammar 'm' will be appended to the last of gdfa and gwinfo */
m->state_begin = gdfa->state_num; /* initial state ID */
m->cate_begin = gdfa->term_num; /* initial terminal ID */
m->word_begin = gwinfo->num; /* initial word ID */
m->state_begin_forward = gdfa_forward->state_num;

/* append category ID and node number of src DFA */
/* Julius allow multiple initial states: connect each initial node
is not necesarry. */
dfa_append(gdfa, m->dfa, m->state_begin, m->cate_begin);
if (m->dfa_forward != NULL)
dfa_append(gdfa_forward, m->dfa_forward, m->state_begin_forward, m->cate_begin);
/* append words of src vocabulary to global winfo */
if (voca_append(gwinfo, m->winfo, m->cate_begin, m->word_begin) == FALSE) {
return FALSE;
@@ -274,7 +278,7 @@ multigram_append_to_global(DFA_INFO *gdfa, WORD_INFO *gwinfo, MULTIGRAM *m)
* @ingroup grammar
*/
int
multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm)
multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm, DFA_INFO *dfa_forward)
{
MULTIGRAM *new;

@@ -288,6 +292,7 @@ multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm)

new->id = lm->gram_maxid;
new->dfa = dfa;
new->dfa_forward = dfa_forward;
new->winfo = winfo;
/* will be setup and activated after multigram_update() is called once */
new->hook = MULTIGRAM_DEFAULT | MULTIGRAM_ACTIVATE;
@@ -402,6 +407,7 @@ multigram_exec_delete(PROCESS_LM *lm)
/* if any grammar is deleted, we need to rebuild lexicons etc. */
/* so tell it to the caller */
if (! m->newbie) ret_flag = TRUE;
if (m->dfa_forward) dfa_info_free(m->dfa_forward);
if (m->dfa) dfa_info_free(m->dfa);
word_info_free(m->winfo);
jlog("STAT: Gram #%d %s: purged\n", m->id, m->name);
@@ -673,6 +679,10 @@ multigram_update(PROCESS_LM *lm)
dfa_info_free(lm->dfa);
lm->dfa = NULL;
}
if (lm->dfa_forward != NULL) {
dfa_info_free(lm->dfa_forward);
lm->dfa_forward = NULL;
}
if (lm->winfo != NULL) {
word_info_free(lm->winfo);
lm->winfo = NULL;
@@ -683,6 +693,10 @@ multigram_update(PROCESS_LM *lm)
lm->dfa = dfa_info_new();
dfa_state_init(lm->dfa);
}
if (lm->lmvar == LM_DFA_GRAMMAR && lm->dfa_forward == NULL) {
lm->dfa_forward = dfa_info_new();
dfa_state_init(lm->dfa_forward);
}
if (lm->winfo == NULL) {
lm->winfo = word_info_new();
winfo_init(lm->winfo);
@@ -697,7 +711,7 @@ multigram_update(PROCESS_LM *lm)
m->hook |= MULTIGRAM_DELETE;
}
} else {
if (multigram_append_to_global(lm->dfa, lm->winfo, m) == FALSE) {
if (multigram_append_to_global(lm->dfa, lm->winfo, lm->dfa_forward, m) == FALSE) {
jlog("ERROR: multi-gram: failed to add grammar #%d to recognition network\n", m->id);
/* mark as delete */
m->hook |= MULTIGRAM_DELETE;
@@ -717,6 +731,10 @@ multigram_update(PROCESS_LM *lm)
lm->dfa = dfa_info_new();
dfa_state_init(lm->dfa);
}
if (lm->lmvar == LM_DFA_GRAMMAR && lm->dfa_forward == NULL) {
lm->dfa_forward = dfa_info_new();
dfa_state_init(lm->dfa_forward);
}
if (lm->winfo == NULL) {
lm->winfo = word_info_new();
winfo_init(lm->winfo);
@@ -731,7 +749,7 @@ multigram_update(PROCESS_LM *lm)
m->hook |= MULTIGRAM_DELETE;
}
} else {
if (multigram_append_to_global(lm->dfa, lm->winfo, m) == FALSE) {
if (multigram_append_to_global(lm->dfa, lm->winfo, lm->dfa_forward, m) == FALSE) {
jlog("ERROR: multi-gram: failed to add grammar #%d to recognition network\n", m->id);
/* mark as delete */
m->hook |= MULTIGRAM_DELETE;
@@ -754,6 +772,10 @@ multigram_update(PROCESS_LM *lm)
dfa_info_free(lm->dfa);
lm->dfa = NULL;
}
if (lm->dfa_forward != NULL) {
dfa_info_free(lm->dfa_forward);
lm->dfa_forward = NULL;
}
if (lm->winfo != NULL) {
word_info_free(lm->winfo);
lm->winfo = NULL;
@@ -793,6 +815,7 @@ multigram_read_file_and_add(char *dfa_file, char *dict_file, PROCESS_LM *lm)
{
WORD_INFO *new_winfo;
DFA_INFO *new_dfa;
DFA_INFO *new_dfa_forward;
char buf[MAXGRAMNAMELEN], *p, *q;
boolean ret;

@@ -841,6 +864,18 @@ multigram_read_file_and_add(char *dfa_file, char *dict_file, PROCESS_LM *lm)
dfa_info_free(new_dfa);
return FALSE;
}
/* read additional forward dfa if exists */
p = mymalloc(strlen(dfa_file) + 9);
strcpy(p, dfa_file);
strcat(p, ".forward");
new_dfa_forward = dfa_info_new();
if (init_dfa(new_dfa_forward, p) == TRUE) {
jlog("STAT: reading additional forward dfa [%s]\n", p);
} else {
dfa_info_free(new_dfa_forward);
new_dfa_forward = NULL;
}
free(p);
}

jlog("STAT: done\n");
@@ -860,7 +895,7 @@ multigram_read_file_and_add(char *dfa_file, char *dict_file, PROCESS_LM *lm)
buf[p-q] = '\0';

/* register the new grammar to multi-gram tree */
multigram_add(new_dfa, new_winfo, buf, lm);
multigram_add(new_dfa, new_winfo, buf, lm, new_dfa_forward);

return TRUE;

@@ -75,6 +75,7 @@ wchmm_new()
w->lmvar = LM_UNDEF;
w->ngram = NULL;
w->dfa = NULL;
w->dfa_forward = NULL;
w->winfo = NULL;
w->malloc_root = NULL;
#ifdef PASS1_IWCD

0 comments on commit b98e619

Please sign in to comment.