Permalink
Browse files

add perfield analyzer support

  • Loading branch information...
1 parent 2325a77 commit 76d290dddef8160b32ca7ec4f89dc6b7c80be64c Robert Newson committed Jan 8, 2010
Showing with 51 additions and 17 deletions.
  1. +13 −0 README.md
  2. +38 −17 src/main/java/com/github/rnewson/couchdb/lucene/util/Analyzers.java
View
@@ -156,6 +156,7 @@ Lucene has numerous ways of converting free-form text into tokens, these classes
<li>french</li>
<li>german</li>
<li>keyword</li>
+<li>perfield</li>
<li>porter</li>
<li>russian</li>
<li>simple</li>
@@ -165,6 +166,18 @@ Lucene has numerous ways of converting free-form text into tokens, these classes
Note: You must also supply analyzer=<analyzer_name> as a query parameter to ensure that queries are processed correctly.
+The "perfield" option lets you use a different analyzer for different fields and is configured as follows;
+
+<pre>
+perfield:{field_name:"analyzer_name"}
+</pre>
+
+Unless overridden, any field name not specified will be handled by the standard analyzer. To change the default, use the special _default field name;
+
+<pre>
+perfield:{_default:"keyword"}
+</pre>
+
<h3>The Document class</h3>
You may construct a new Document instance with;
@@ -2,9 +2,12 @@
import java.io.Reader;
+import net.sf.json.JSONObject;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -24,85 +27,100 @@
BRAZILIAN {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new BrazilianAnalyzer(VERSION);
}
},
CHINESE {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new ChineseAnalyzer();
}
},
CJK {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new CJKAnalyzer(VERSION);
}
},
CZECH {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new CzechAnalyzer(VERSION);
}
},
DUTCH {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new DutchAnalyzer(VERSION);
}
},
ENGLISH {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new StandardAnalyzer(VERSION);
}
},
FRENCH {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new FrenchAnalyzer(VERSION);
}
},
GERMAN {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new GermanAnalyzer(VERSION);
}
},
KEYWORD {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new KeywordAnalyzer();
}
},
+ PERFIELD {
+ @Override
+ public Analyzer newAnalyzer(final String args) {
+ final JSONObject json = JSONObject.fromObject(args);
+ final Analyzer defaultAnalyzer = Analyzers.getAnalyzer(json.optString("_default", "standard"));
+ final PerFieldAnalyzerWrapper result = new PerFieldAnalyzerWrapper(defaultAnalyzer);
+ for (final Object obj : json.keySet()) {
+ final String key = obj.toString();
+ if ("_default".equals(key))
+ continue;
+ result.addAnalyzer(key, Analyzers.getAnalyzer(json.getString(key)));
+ }
+ return result;
+ }
+ },
PORTER {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new PorterStemAnalyzer();
}
},
RUSSIAN {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new RussianAnalyzer(VERSION);
}
},
SIMPLE {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new SimpleAnalyzer();
}
},
STANDARD {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new StandardAnalyzer(Version.LUCENE_CURRENT);
}
},
THAI {
@Override
- public Analyzer newAnalyzer() {
+ public Analyzer newAnalyzer(final String args) {
return new ThaiAnalyzer(VERSION);
}
};
@@ -116,10 +134,13 @@ public TokenStream tokenStream(final String fieldName, final Reader reader) {
}
}
- public static Analyzer getAnalyzer(final String name) {
- return Analyzers.valueOf(name.toUpperCase()).newAnalyzer();
+ public static Analyzer getAnalyzer(final String str) {
+ final String[] parts = str.split(":", 2);
+ final String name = parts[0].toUpperCase();
+ final String args = parts.length == 2 ? parts[1] : null;
+ return Analyzers.valueOf(name).newAnalyzer(args);
}
- public abstract Analyzer newAnalyzer();
+ public abstract Analyzer newAnalyzer(final String args);
}

0 comments on commit 76d290d

Please sign in to comment.