/
FullTextAnalyzer.java
74 lines (62 loc) · 2.18 KB
/
FullTextAnalyzer.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
/*
* Copyright 2008 Glencoe Software, Inc. All rights reserved.
* Use is subject to license terms supplied in LICENSE.txt
*/
package ome.services.fulltext;
import java.io.Reader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.TokenStream;
/**
* {@link Analyzer} implementation based largely on {@link SimpleAnalyzer}, but
* with extensions for handling scientific and OS-type strings.
*
* @author Josh Moore, josh at glencoesoftware.com
* @since 3.0-Beta3
*/
@Deprecated
public class FullTextAnalyzer extends Analyzer {
private final static Logger log = LoggerFactory.getLogger(FullTextAnalyzer.class);
static {
log.info("Initialized FullTextAnalyzer");
}
/**
* Based on {@link LowerCaseTokenizer}, with the same optimization.
* However, in order to do alphanumeric tokenizing, rather than just
* alphabetic, it was necessary to combine that implementation with
* {@link LetterTokenizer} and extend {@link CharTokenizer} directly.
*
*/
static class LowercaseAlphaNumericTokenizer extends CharTokenizer {
public LowercaseAlphaNumericTokenizer(Reader input) {
super(input);
}
/**
* Returns true if "c" is {@link Character#isLetter(char)} or
* {@link Character#isDigit(char)}.
*/
@Override
protected boolean isTokenChar(char c) {
return Character.isLetter(c) || Character.isDigit(c);
}
/**
* Lower cases via {@link Character#toLowerCase(char)}
*/
@Override
protected char normalize(char c) {
return Character.toLowerCase(c);
}
}
/**
* Returns a {@link ome.services.fulltext.FullTextAnalyzer.LowercaseAlphaNumericTokenizer}
*/
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new LowercaseAlphaNumericTokenizer(reader);
}
}