/
beam_search.h
205 lines (180 loc) · 7.41 KB
/
beam_search.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#ifndef BEAM_SEARCH_H
#define BEAM_SEARCH_H
#include "alphabet.h"
#include "trie_node.h"
#include "tensorflow/core/util/ctc/ctc_beam_search.h"
#include "kenlm/lm/model.hh"
typedef lm::ngram::ProbingModel Model;
struct KenLMBeamState {
float language_model_score;
float score;
float delta_score;
int num_words;
std::string incomplete_word;
TrieNode *incomplete_word_trie_node;
Model::State model_state;
};
class KenLMBeamScorer : public tensorflow::ctc::BaseBeamScorer<KenLMBeamState> {
public:
KenLMBeamScorer(const std::string &kenlm_path, const std::string &trie_path,
const std::string &alphabet_path, float lm_weight,
float valid_word_count_weight)
: model_(kenlm_path.c_str(), GetLMConfig())
, alphabet_(alphabet_path.c_str())
, lm_weight_(lm_weight)
, valid_word_count_weight_(valid_word_count_weight)
{
std::ifstream in(trie_path, std::ios::in);
TrieNode::ReadFromStream(in, trieRoot_, alphabet_.GetSize());
// low probability for OOV words
oov_score_ = -10.0;
}
virtual ~KenLMBeamScorer() {
delete trieRoot_;
}
// State initialization.
void InitializeState(KenLMBeamState* root) const {
root->language_model_score = 0.0f;
root->score = 0.0f;
root->delta_score = 0.0f;
root->incomplete_word.clear();
root->num_words = 0;
root->incomplete_word_trie_node = trieRoot_;
root->model_state = model_.BeginSentenceState();
}
// ExpandState is called when expanding a beam to one of its children.
// Called at most once per child beam. In the simplest case, no state
// expansion is done.
void ExpandState(const KenLMBeamState& from_state, int /*from_label*/,
KenLMBeamState* to_state, int to_label) const {
CopyState(from_state, to_state);
if (!alphabet_.IsSpace(to_label)) {
to_state->incomplete_word += alphabet_.StringFromLabel(to_label);
TrieNode *trie_node = from_state.incomplete_word_trie_node;
// If we have no valid prefix we assume a very low log probability
float min_unigram_score = oov_score_;
// If prefix does exist
if (trie_node != nullptr) {
trie_node = trie_node->GetChildAt(to_label);
to_state->incomplete_word_trie_node = trie_node;
if (trie_node != nullptr) {
min_unigram_score = trie_node->GetMinUnigramScore();
}
}
// TODO try two options
// 1) unigram score added up to language model scare
// 2) langugage model score of (preceding_words + unigram_word)
to_state->score = min_unigram_score + to_state->language_model_score;
to_state->delta_score = to_state->score - from_state.score;
} else {
auto word_index = WordIndex(to_state->incomplete_word);
float lm_score_delta = ScoreIncompleteWord(from_state.model_state,
word_index,
to_state->model_state);
// Give fixed word bonus
if (!IsOOV(word_index)) {
to_state->language_model_score += valid_word_count_weight_;
}
to_state->num_words += 1;
UpdateWithLMScore(to_state, lm_score_delta);
ResetIncompleteWord(to_state);
}
}
// ExpandStateEnd is called after decoding has finished. Its purpose is to
// allow a final scoring of the beam in its current state, before resorting
// and retrieving the TopN requested candidates. Called at most once per beam.
void ExpandStateEnd(KenLMBeamState* state) const {
float lm_score_delta = 0.0f;
Model::State out;
if (state->incomplete_word.size() > 0) {
lm_score_delta += ScoreIncompleteWord(state->model_state,
WordIndex(state->incomplete_word),
out);
ResetIncompleteWord(state);
state->model_state = out;
}
lm_score_delta += model_.FullScore(state->model_state,
model_.GetVocabulary().EndSentence(),
out).prob;
UpdateWithLMScore(state, lm_score_delta);
// This is a bit of a hack. In order to implement length normalization, we
// compute the final state score here (and not in GetStateEndExpansionScore)
// and then set the state delta score to the value that would normalize
// the state score when added to it. This way, we can normalize the internal
// scores in TensorFlow's CTC code when it adds the final state expansion
// score to this beam's score.
state->score += lm_weight_ * state->delta_score;
if (state->num_words > 0) {
float normalized_score = state->score / (float)state->num_words;
state->delta_score = normalized_score - state->score;
}
}
// GetStateExpansionScore should be an inexpensive method to retrieve the
// (cached) expansion score computed within ExpandState. The score is
// multiplied (log-addition) with the input score at the current step from
// the network.
//
// The score returned should be a log-probability. In the simplest case, as
// there's no state expansion logic, the expansion score is zero.
float GetStateExpansionScore(const KenLMBeamState& state,
float previous_score) const {
return lm_weight_ * state.delta_score + previous_score;
}
// GetStateEndExpansionScore should be an inexpensive method to retrieve the
// (cached) expansion score computed within ExpandStateEnd. The score is
// multiplied (log-addition) with the final probability of the beam.
//
// The score returned should be a log-probability.
float GetStateEndExpansionScore(const KenLMBeamState& state) const {
return state.delta_score;
}
void SetLMWeight(float lm_weight) {
this->lm_weight_ = lm_weight;
}
void SetValidWordCountWeight(float valid_word_count_weight) {
this->valid_word_count_weight_ = valid_word_count_weight;
}
private:
Model model_;
Alphabet alphabet_;
TrieNode *trieRoot_;
float lm_weight_;
float valid_word_count_weight_;
float oov_score_;
lm::ngram::Config GetLMConfig() const {
lm::ngram::Config config;
config.load_method = util::POPULATE_OR_READ;
return config;
}
void UpdateWithLMScore(KenLMBeamState *state, float lm_score_delta) const {
float previous_score = state->score;
state->language_model_score += lm_score_delta;
state->score = state->language_model_score;
state->delta_score = state->language_model_score - previous_score;
}
void ResetIncompleteWord(KenLMBeamState *state) const {
state->incomplete_word.clear();
state->incomplete_word_trie_node = trieRoot_;
}
lm::WordIndex WordIndex(const std::string& word) const {
return model_.GetVocabulary().Index(word);
}
bool IsOOV(const lm::WordIndex& word) const {
auto &vocabulary = model_.GetVocabulary();
return word == vocabulary.NotFound();
}
float ScoreIncompleteWord(const Model::State& model_state,
const lm::WordIndex& word,
Model::State& out) const {
return model_.FullScore(model_state, word, out).prob;
}
void CopyState(const KenLMBeamState& from, KenLMBeamState* to) const {
to->language_model_score = from.language_model_score;
to->score = from.score;
to->delta_score = from.delta_score;
to->incomplete_word = from.incomplete_word;
to->incomplete_word_trie_node = from.incomplete_word_trie_node;
to->model_state = from.model_state;
}
};
#endif /* BEAM_SEARCH_H */