forked from suggest-go/suggest
/
tokenizer.go
34 lines (29 loc) · 917 Bytes
/
tokenizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
package suggest
import (
"github.com/teng231/suggest/pkg/alphabet"
"github.com/teng231/suggest/pkg/analysis"
)
// NewSuggestTokenizer creates a tokenizer for suggester service
func NewSuggestTokenizer(d IndexDescription) analysis.Tokenizer {
filter := analysis.NewNormalizerFilter(alphabet.CreateAlphabet(d.Alphabet), d.Pad)
return analysis.NewWrapTokenizer(
analysis.NewFilterTokenizer(
analysis.NewNGramTokenizer(d.NGramSize),
filter,
),
d.Wrap[0],
d.Wrap[1],
)
}
// NewAutocompleteTokenizer creates a tokenizer for autocomplete service
func NewAutocompleteTokenizer(d IndexDescription) analysis.Tokenizer {
filter := analysis.NewNormalizerFilter(alphabet.CreateAlphabet(d.Alphabet), d.Pad)
return analysis.NewWrapTokenizer(
analysis.NewFilterTokenizer(
analysis.NewNGramTokenizer(d.NGramSize),
filter,
),
d.Wrap[0],
"", // do not add a wrap symbol to the tail of query
)
}