From 2ea60bc26afe5482d4099799967a561e982688a1 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 29 Oct 2011 18:09:26 +0100 Subject: [PATCH] add classic analyzer for pre 3.1 email address matching --- .../couchdb/lucene/util/Analyzers.java | 7 ++++++ .../couchdb/lucene/util/AnalyzersTest.java | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/main/java/com/github/rnewson/couchdb/lucene/util/Analyzers.java b/src/main/java/com/github/rnewson/couchdb/lucene/util/Analyzers.java index eecce97c..f3d7c9c8 100644 --- a/src/main/java/com/github/rnewson/couchdb/lucene/util/Analyzers.java +++ b/src/main/java/com/github/rnewson/couchdb/lucene/util/Analyzers.java @@ -36,6 +36,7 @@ import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.snowball.SnowballAnalyzer; +import org.apache.lucene.analysis.standard.ClassicAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.th.ThaiAnalyzer; import org.json.JSONException; @@ -61,6 +62,12 @@ public Analyzer newAnalyzer(final String args) { return new CJKAnalyzer(Constants.VERSION); } }, + CLASSIC { + @Override + public Analyzer newAnalyzer(final String args) { + return new ClassicAnalyzer(Constants.VERSION); + } + }, CZECH { @Override public Analyzer newAnalyzer(final String args) { diff --git a/src/test/java/com/github/rnewson/couchdb/lucene/util/AnalyzersTest.java b/src/test/java/com/github/rnewson/couchdb/lucene/util/AnalyzersTest.java index d8bc2897..131debad 100644 --- a/src/test/java/com/github/rnewson/couchdb/lucene/util/AnalyzersTest.java +++ b/src/test/java/com/github/rnewson/couchdb/lucene/util/AnalyzersTest.java @@ -4,11 +4,17 @@ import static org.hamcrest.Matchers.containsString; import static org.junit.Assert.assertThat; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.junit.Test; public class AnalyzersTest { @@ -44,4 +50,21 @@ public void testPerFieldDefault() throws Exception { assertThat(analyzer.toString(), containsString("default=org.apache.lucene.analysis.KeywordAnalyzer")); } + @Test + public void testEmailAddresses() throws Exception { + assertThat(analyze("standard", "foo@bar.com"), is(new String[] {"foo", "bar.com"})); + assertThat(analyze("classic", "foo@bar.com"), is(new String[] {"foo@bar.com"})); + } + + private String[] analyze(final String analyzerName, final String text) throws Exception { + final Analyzer analyzer = Analyzers.getAnalyzer(analyzerName); + final TokenStream stream = analyzer.tokenStream("default", new StringReader(text)); + stream.reset(); + final List result = new ArrayList(); + while (stream.incrementToken()) { + final CharTermAttribute c = stream.getAttribute(CharTermAttribute.class); + result.add(c.toString()); + } + return result.toArray(new String[0]); + } }