Skip to content
Browse files

Merge pull request #158 from nesteffe/termvectors

Termvectors
  • Loading branch information...
2 parents 453c434 + 27ff576 commit 0a03bd3284a654d85592bc419e42d97c47d1932f Robert Newson committed Apr 26, 2012
View
3 README.md
@@ -423,6 +423,9 @@ The following parameters can be passed for more sophisticated searches;
<dt>force_json<dt><dd>Usually couchdb-lucene determines the Content-Type of its response based on the presence of the Accept header. If Accept contains "application/json", you get "application/json" in the response, otherwise you get "text/plain;charset=utf8". Some tools, like JSONView for FireFox, do not send the Accept header but do render "application/json" responses if received. Setting force_json=true forces all response to "application/json" regardless of the Accept header.</dd>
<dt>include_docs</dt><dd>whether to include the source docs</dd>
<dt>include_fields</dt><dd>By default, <i>all</i> stored fields are returned with results. Use a comma-separate list of field names with this parameter to refine the response</dd>
+<dt>include_termvectors</dt><dd>Return term vectors with the results. Default is <i>false</i>.</dd>
+<dt>highlights</dt><dd>Number of highlights to include with results. Default is <i>0</i>. This uses the <i>fast-vector-highlighter</i> plugin.</dd>
+<dt>highlight_length</dt><dd>Number of characters to include in a highlight row. Default and minimum is <i>18</i>.</dd>
<dt>limit</dt><dd>the maximum number of results to return</dd>
<dt>q</dt><dd>the query to run (e.g, subject:hello). If not specified, the default field is searched. Multiple queries can be supplied, separated by commas; the resulting JSON will be an array of responses.</dd>
<dt>skip</dt><dd>the number of results to skip</dd>
View
1 THANKS.md
@@ -2,3 +2,4 @@
* Thanks to Adam Lofts for the performance boosting JSONDocumentAdapter et al.
* Thanks to Santiago M. Mola for the termvector option.
* Thanks to Joe Hillenbrand for adding default result limit to config.
+* Thanks to Nate Steffen for adding highlighting.
View
5 pom.xml
@@ -73,6 +73,11 @@
<version>${tika-version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-highlighter</artifactId>
+ <version>3.6.0</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>${tika-version}</version>
View
22 src/main/java/com/github/rnewson/couchdb/lucene/DatabaseIndexer.java
@@ -47,9 +47,12 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.QueryParser.Operator;
+import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -486,6 +489,7 @@ public void search(final HttpServletRequest req,
return;
final IndexSearcher searcher = state.borrowSearcher(isStaleOk(req));
final String etag = state.getEtag();
+ final FastVectorHighlighter fvh = new FastVectorHighlighter(true, true);
final JSONArray result = new JSONArray();
try {
if (state.notModified(req)) {
@@ -526,6 +530,9 @@ public void search(final HttpServletRequest req,
final boolean include_docs = getBooleanParameter(req,
"include_docs");
+ final int highlights = getIntParameter(req, "highlights", 0);
+ final int highlight_length = max(getIntParameter(req, "highlight_length", 18), 18); // min for fast term vector highlighter is 18
+ final boolean include_termvectors = getBooleanParameter(req, "include_termvectors");
final int limit = getIntParameter(req, "limit",
ini.getInt("lucene.limit", 25));
final Sort sort = CustomQueryParser.toSort(req
@@ -559,6 +566,7 @@ public void search(final HttpServletRequest req,
final JSONObject row = new JSONObject();
final JSONObject fields = new JSONObject();
+ final JSONObject highlight_rows = new JSONObject();
// Include stored fields.
for (final Fieldable f : doc.getFields()) {
@@ -590,6 +598,11 @@ public void search(final HttpServletRequest req,
((JSONArray) obj).put(value);
}
}
+
+ if (highlights > 0) {
+ String[] frags = fvh.getBestFragments(fvh.getFieldQuery(q), searcher.getIndexReader(), td.scoreDocs[i].doc, name, highlight_length, highlights);
+ highlight_rows.put(name, frags);
+ }
}
}
}
@@ -608,6 +621,15 @@ public void search(final HttpServletRequest req,
if (fields.length() > 0) {
row.put("fields", fields);
}
+ if (highlight_rows.length() > 0) {
+ row.put("highlights", highlight_rows);
+ }
+ if (include_termvectors) {
+ final JsonTermVectorMapper mapper = new JsonTermVectorMapper();
+ searcher.getIndexReader().getTermFreqVector(td.scoreDocs[i].doc, mapper);
+ row.put("termvectors", mapper.getObject());
+ }
+
rows.put(row);
}
// Fetch documents (if requested).
View
58 src/main/java/com/github/rnewson/couchdb/lucene/JsonTermVectorMapper.java
@@ -0,0 +1,58 @@
+package com.github.rnewson.couchdb.lucene;
+
+import org.apache.lucene.index.TermVectorMapper;
+import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+class JsonTermVectorMapper extends TermVectorMapper {
+
+ private JSONObject result = new JSONObject();
+ private JSONObject currentObj;
+
+ @Override
+ public void setExpectations(String field, int numTerms,
+ boolean storeOffsets, boolean storePositions) {
+ currentObj = new JSONObject();
+ try {
+ result.put(field, currentObj);
+ } catch (JSONException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void map(String term, int frequency, TermVectorOffsetInfo[] offsets,
+ int[] positions) {
+ try {
+ final JSONObject field = new JSONObject();
+ field.put("freq", frequency);
+ if (offsets != null) {
+ final JSONArray arr = new JSONArray();
+ for (int i = 0; i < offsets.length; i++) {
+ final JSONArray arr2 = new JSONArray();
+ arr2.put(offsets[i].getStartOffset());
+ arr2.put(offsets[i].getEndOffset());
+ arr.put(arr2);
+ }
+ field.put("offsets", arr);
+ } else {
+ field.put("offsets", "null");
+ }
+ if (positions != null) {
+ field.put("positions", positions);
+ } else {
+ field.put("positions", "null");
+ }
+ currentObj.put(term, field);
+ } catch (JSONException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public JSONObject getObject() {
+ return result;
+ }
+
+}

0 comments on commit 0a03bd3

Please sign in to comment.
Something went wrong with that request. Please try again.