Skip to content

Commit

Permalink
add classic analyzer for pre 3.1 email address matching
Browse files Browse the repository at this point in the history
  • Loading branch information
Robert Newson committed Oct 29, 2011
1 parent 0b94022 commit 2ea60bc
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
Expand Up @@ -36,6 +36,7 @@
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.json.JSONException;
Expand All @@ -61,6 +62,12 @@ public Analyzer newAnalyzer(final String args) {
return new CJKAnalyzer(Constants.VERSION);
}
},
CLASSIC {
@Override
public Analyzer newAnalyzer(final String args) {
return new ClassicAnalyzer(Constants.VERSION);
}
},
CZECH {
@Override
public Analyzer newAnalyzer(final String args) {
Expand Down
Expand Up @@ -4,11 +4,17 @@
import static org.hamcrest.Matchers.containsString;
import static org.junit.Assert.assertThat;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;

public class AnalyzersTest {
Expand Down Expand Up @@ -44,4 +50,21 @@ public void testPerFieldDefault() throws Exception {
assertThat(analyzer.toString(), containsString("default=org.apache.lucene.analysis.KeywordAnalyzer"));
}

@Test
public void testEmailAddresses() throws Exception {
assertThat(analyze("standard", "foo@bar.com"), is(new String[] {"foo", "bar.com"}));
assertThat(analyze("classic", "foo@bar.com"), is(new String[] {"foo@bar.com"}));
}

private String[] analyze(final String analyzerName, final String text) throws Exception {
final Analyzer analyzer = Analyzers.getAnalyzer(analyzerName);
final TokenStream stream = analyzer.tokenStream("default", new StringReader(text));
stream.reset();
final List<String> result = new ArrayList<String>();
while (stream.incrementToken()) {
final CharTermAttribute c = stream.getAttribute(CharTermAttribute.class);
result.add(c.toString());
}
return result.toArray(new String[0]);
}
}

0 comments on commit 2ea60bc

Please sign in to comment.