Skip to content
Browse files

Merge branch 'master' into carrot2

Conflicts:
	TODO
  • Loading branch information...
2 parents 6d5a123 + d1f0147 commit fce8c140c238c5c2b8440293e1907d186da7a64c Robert Newson committed Mar 17, 2009
View
25 LICENSE
@@ -174,28 +174,3 @@
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
View
28 README.md
@@ -1,13 +1,6 @@
<h1>News</h1>
-I've merged the changes from the beta branch which brings many improvements. Notably;
-
-<ol>
-<li>Indexing is a separate process to searching and is triggered by update notifications.
-<li>Rhino integration has landed, user customization of indexing is now possible.
-</ol>
-
-You are advised to delete indexes created prior to this update.
+Issue tracking now available at <a href="http://rnewson.lighthouseapp.com/projects/27420-couchdb-lucene"/>lighthouseapp</a>.
<h1>Build couchdb-lucene</h1>
@@ -78,11 +71,11 @@ You can perform all types of queries using Lucene's default <a href="http://luce
<dl>
<dt>q<dd>the query to run (e.g, subject:hello)
-<dt>sort<dd>the comma-separated fields to sort on.
-<dt>asc<dd>sort ascending (true) or descending (false), only when sorting on a single field.
+<dt>sort<dd>the comma-separated fields to sort on. Prefix with / for ascending order and \ for descending order (ascending is the default if not specified).
<dt>limit<dd>the maximum number of results to return
<dt>skip<dd>the number of results to skip
<dt>include_docs<dd>whether to include the source docs
+<dt>stale=ok<dd>If you set the <i>stale</i> option <i>ok</i>, couchdb-lucene may not perform any refreshing on the index. Searches may be faster as Lucene caches important data (especially for sorting). A query without stale=ok will use the latest data committed to the index.
<dt>debug<dd>if false, a normal application/json response with results appears. if true, an pretty-printed HTML blob is returned instead.
</dl>
@@ -92,7 +85,6 @@ You can perform all types of queries using Lucene's default <a href="http://luce
<dl>
<dt>_id<dd>The _id of the document.
-<dt>_rev<dd>The _rev of the document.
<dt>_db<dd>The source database of the document.
<dt>_body<dd>Any text extracted from any attachment.
</dl>
@@ -143,12 +135,10 @@ Here's an example of a JSON response without sorting;
"rows": [
{
"_id": "hain-m-all_documents-257.",
- "_rev": "3750319208",
"score": 1.601625680923462
},
{
"_id": "hain-m-notes_inbox-257.",
- "_rev": "2603032545",
"score": 1.601625680923462
}
]
@@ -179,7 +169,6 @@ And the same with sorting;
"rows": [
{
"_id": "shankman-j-inbox-105.",
- "_rev": "4289412378",
"score": 0.6131107211112976,
"sort_order": [
"enron",
@@ -188,7 +177,6 @@ And the same with sorting;
},
{
"_id": "shankman-j-inbox-8.",
- "_rev": "1417542355",
"score": 0.7492915391921997,
"sort_order": [
"enron",
@@ -197,7 +185,6 @@ And the same with sorting;
},
{
"_id": "shankman-j-inbox-30.",
- "_rev": "951793815",
"score": 0.507369875907898,
"sort_order": [
"enron",
@@ -250,3 +237,12 @@ fti=/usr/bin/java -D couchdb.lucene.dir=/tmp \
/home/rnewson/Source/couchdb-lucene/target/dependency\
org.apache.couchdb.lucene.Main
</pre>
+
+<h2>Basic Authentication</h2>
+
+If you put couchdb behind an authenticating proxy you can still configure couchdb-lucene to pull from it by specifying additional system properties. Currently only Basic authentication is supported.
+
+<dl>
+<dt>couchdb.user<dd>the user to authenticate as.
+<dt>couchdb.password<dd>the password to authenticate with.
+</dl>
View
15 TODO
@@ -1,17 +1,24 @@
+Search Performance
+
* sort cache can take too long to build, respond with keep-alive messages.
-* add a _status uri that lets you see index state/progress.
* add an _optimize option.
-* handle "create" and "delete" notifications explicitly.
+* add Cache-Control: max-age=(minimum number of seconds before we commit)
+* distributed scoring.
+
+Index Performance
+
* batch updates (no hair trigger).
-* reindex when transform function changes.
+
+Misc
+
* hit highlighting (lucene contrib).
* clustering (carrot2).
* distributed search.
-
_design/lucene
carrot2 -> {"title_field":"blah", "content_field:"blah"}
forces Store.YES
+* JSONQuery? (http://docs.persvr.org/documentation/jsonquery)
View
36 federation.rb
@@ -0,0 +1,36 @@
+#!/usr/bin/ruby
+
+require 'net/http'
+require 'json'
+
+servers = %w(localhost localhost)
+result = {}
+
+threads=[]
+for server in servers
+ threads << Thread.new(server) do |url|
+ h = Net::HTTP.new(url, 5984)
+ resp = h.get('/' + ARGV[0] + '/_fti?' + ARGV[1])
+ json = JSON.parse(resp.body)
+
+ if (!result.has_key?('q')) then
+ result = json
+ else
+ # Accounting
+ result['total_rows'] += json['total_rows']
+ # Merge in new rows
+ result['rows'].concat json['rows']
+ if json.has_key?('sort_order')
+ result['rows'].sort!{|a,b| a['sort_order'] <=> b['sort_order']}
+ else
+ result['rows'].sort!{|b,a| [a['score'],a['_id']] <=> [b['score'],b['_id']]}
+ end
+ # Drop extraneous rows.
+ result['rows'].slice!(result['limit']..result['rows'].size)
+ end
+ end
+end
+# Wait for all responses
+threads.each{|thr| thr.join}
+
+puts JSON.generate result
View
10 src/main/java/org/apache/couchdb/lucene/Config.java
@@ -16,8 +16,6 @@
static final String ID = "_id";
- static final String REV = "_rev";
-
static final String SEQ = "_seq";
static final String BODY = "_body";
@@ -33,7 +31,15 @@
static final int BATCH_SIZE = Integer.getInteger("couchdb.lucene.batch", 1000);
static final String DB_URL = System.getProperty("couchdb.url", "http://localhost:5984");
+
+ static final String DB_USER = System.getProperty("couchdb.user");
+
+ static final String DB_PASSWORD = System.getProperty("couchdb.password");
static final int MAX_LIMIT = Integer.getInteger("couchdb.lucene.max_fetch", 250);
+ static final int CHANGE_THRESHOLD = Integer.getInteger("couchdb.lucene.change_threshold", 100);
+
+ static final int TIME_THRESHOLD = Integer.getInteger("couchdb.lucene.time_threshold", 60);
+
}
View
36 src/main/java/org/apache/couchdb/lucene/Database.java
@@ -9,9 +9,12 @@
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
+import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethodBase;
+import org.apache.commons.httpclient.UsernamePasswordCredentials;
+import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
@@ -32,6 +35,12 @@
static {
CLIENT.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES);
+ if (Config.DB_USER != null && Config.DB_PASSWORD != null) {
+ CLIENT.getParams().setAuthenticationPreemptive(true);
+ final Credentials creds = new UsernamePasswordCredentials(Config.DB_USER, Config.DB_PASSWORD);
+ CLIENT.getState().setCredentials(AuthScope.ANY, creds);
+ Log.errlog("Authenticating to couchdb as '%s'.", Config.DB_USER);
+ }
}
private final String url;
@@ -47,17 +56,19 @@ public Database(final String url) {
return (String[]) JSONArray.fromObject(get("_all_dbs")).toArray(EMPTY_ARR);
}
- public JSONObject getAllDocsBySeq(final String dbname, final long from, final int limit) throws HttpException,
+ public JSONObject getAllDocsBySeq(final String dbname, final long startkey) throws HttpException, IOException {
+ return JSONObject.fromObject(get(String.format("%s/_all_docs_by_seq?startkey=%s&include_docs=true",
+ encode(dbname), startkey)));
+ }
+
+ public JSONObject getAllDocsBySeq(final String dbname, final long startkey, final int limit) throws HttpException,
IOException {
- return JSONObject.fromObject(get(String.format("%s/_all_docs_by_seq?startkey=%s&limit=%d&include_docs=true",
- encode(dbname), from, limit)));
+ return JSONObject.fromObject(get(String.format("%s/_all_docs_by_seq?startkey=%d&limit=%d&include_docs=true",
+ encode(dbname), startkey, limit)));
}
- public JSONObject getDoc(final String dbname, final String id, final String rev) throws HttpException, IOException {
- if (rev == null)
- return JSONObject.fromObject(get(String.format("%s/%s", encode(dbname), id)));
- else
- return JSONObject.fromObject(get(String.format("%s/%s?rev=%s", encode(dbname), id, rev)));
+ public JSONObject getDoc(final String dbname, final String id) throws HttpException, IOException {
+ return JSONObject.fromObject(get(String.format("%s/%s", encode(dbname), id)));
}
public JSONObject getDocs(final String dbname, final String... ids) throws HttpException, IOException {
@@ -68,7 +79,8 @@ public JSONObject getDocs(final String dbname, final String... ids) throws HttpE
final JSONObject req = new JSONObject();
req.element("keys", keys);
- return JSONObject.fromObject(post(String.format("%s/_all_docs?include_docs=true", encode(dbname)), req.toString()));
+ return JSONObject.fromObject(post(String.format("%s/_all_docs?include_docs=true", encode(dbname)), req
+ .toString()));
}
public JSONObject getInfo(final String dbname) throws HttpException, IOException {
@@ -99,7 +111,11 @@ private String post(final String path, final String body) throws HttpException,
private synchronized String execute(final HttpMethodBase method) throws HttpException, IOException {
try {
- CLIENT.executeMethod(method);
+
+ final int sc = CLIENT.executeMethod(method);
+ if (sc == 401) {
+ throw new HttpException("Unauthorized.");
+ }
final InputStream in = method.getResponseBodyAsStream();
try {
final StringWriter writer = new StringWriter(2048);
View
165 src/main/java/org/apache/couchdb/lucene/Index.java
@@ -7,19 +7,23 @@
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
-import java.util.HashMap;
+import java.util.Date;
import java.util.Iterator;
-import java.util.Map;
import java.util.Scanner;
+import java.util.Timer;
+import java.util.TimerTask;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
@@ -31,22 +35,33 @@
*/
public final class Index {
- private static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z");
+ private static final DateFormat DATE_FORMAT = new SimpleDateFormat("EEE MMM dd yyyy HH:mm:ss 'GMT'Z '('z')'");
private static final Database DB = new Database(Config.DB_URL);
private static final Tika TIKA = new Tika();
private static final Object MUTEX = new Object();
- private static final Map<String, Long> updates = new HashMap<String, Long>();
+ private static final Timer TIMER = new Timer("Timer", true);
+
+ private static class CheckpointTask extends TimerTask {
+
+ @Override
+ public void run() {
+ wakeupIndexer();
+ }
+
+ }
private static class Indexer implements Runnable {
private Directory dir;
private boolean running = true;
+ private long lastCommit = System.currentTimeMillis();
+
public void run() {
try {
this.dir = FSDirectory.getDirectory(Config.INDEX_DIR);
@@ -59,7 +74,7 @@ public void run() {
}
}
- private void updateIndex() throws IOException {
+ private synchronized void updateIndex() throws IOException {
if (IndexWriter.isLocked(dir)) {
Log.errlog("Forcibly unlocking locked index at startup.");
IndexWriter.unlock(dir);
@@ -71,27 +86,28 @@ private void updateIndex() throws IOException {
Rhino rhino = null;
boolean commit = false;
- boolean expunge = false;
final IndexWriter writer = newWriter();
- Progress progress = null;
+ final Progress progress = new Progress();
try {
- // Delete all documents in non-extant databases.
final IndexReader reader = IndexReader.open(dir);
try {
+ // Load status.
+ progress.load(reader);
+
+ // Remove documents from deleted databases.
final TermEnum terms = reader.terms(new Term(Config.DB, ""));
try {
- while (terms.next()) {
+ do {
final Term term = terms.term();
- if (!term.field().equals(Config.DB))
+ if (term == null || Config.DB.equals(term.field()) == false)
break;
if (Arrays.binarySearch(dbnames, term.text()) < 0) {
Log.errlog("Database '%s' has been deleted," + " removing all documents from index.",
term.text());
- delete(writer, term.text());
+ delete(term.text(), writer);
commit = true;
- expunge = true;
}
- }
+ } while (terms.next());
} finally {
terms.close();
}
@@ -100,11 +116,9 @@ private void updateIndex() throws IOException {
}
// Update all extant databases.
- progress = new Progress(dir);
- progress.load();
for (final String dbname : dbnames) {
// Database might supply a transformation function.
- final JSONObject designDoc = DB.getDoc(dbname, "_design/lucene", null);
+ final JSONObject designDoc = DB.getDoc(dbname, "_design/lucene");
if (designDoc != null && designDoc.containsKey("transform")) {
String transform = designDoc.getString("transform");
// Strip start and end double quotes.
@@ -116,17 +130,22 @@ private void updateIndex() throws IOException {
}
commit |= updateDatabase(writer, dbname, progress, rhino);
}
- } catch (final IOException e) {
+ } catch (final Exception e) {
Log.errlog(e);
commit = false;
} finally {
if (commit) {
- if (expunge) {
- writer.expungeDeletes();
- }
+ progress.save(writer);
writer.close();
- Log.errlog("Committed changes to index.");
- progress.save();
+ lastCommit = System.currentTimeMillis();
+
+ final IndexReader reader = IndexReader.open(dir);
+ try {
+ Log.errlog("Committed changes to index (%,d documents in index, %,d deletes).", reader
+ .numDocs(), reader.numDeletedDocs());
+ } finally {
+ reader.close();
+ }
} else {
writer.rollback();
}
@@ -145,68 +164,86 @@ private void waitForUpdateNotification() {
private IndexWriter newWriter() throws IOException {
final IndexWriter result = new IndexWriter(dir, Config.ANALYZER, MaxFieldLength.UNLIMITED);
+
+ // Customize merge policy.
+ final LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
+ mp.setMergeFactor(5);
+ mp.setMaxMergeMB(1000);
+ result.setMergePolicy(mp);
+
+ // Customer other settings.
result.setUseCompoundFile(false);
result.setRAMBufferSizeMB(Config.RAM_BUF);
- result.setMergeFactor(5);
- result.setMaxMergeDocs(1 * 1000 * 1000);
+
return result;
}
private boolean updateDatabase(final IndexWriter writer, final String dbname, final Progress progress,
final Rhino rhino) throws HttpException, IOException {
- final JSONObject info = DB.getInfo(dbname);
- final long update_seq = info.getLong("update_seq");
+ final long cur_seq = progress.getSeq(dbname);
+ final long target_seq = DB.getInfo(dbname).getLong("update_seq");
+
+ final boolean time_threshold_passed = (System.currentTimeMillis() - lastCommit) >= Config.TIME_THRESHOLD * 1000;
+ final boolean change_threshold_passed = (target_seq - cur_seq) >= Config.CHANGE_THRESHOLD;
+
+ if (!(time_threshold_passed || change_threshold_passed)) {
+ return false;
+ }
- long from = progress.getProgress(dbname);
- long start = from;
+ final String cur_sig = progress.getSignature(dbname);
+ final String new_sig = rhino == null ? Progress.NO_SIGNATURE : rhino.getSignature();
- if (from > update_seq) {
- start = from = -1;
- progress.setProgress(dbname, -1);
- }
+ boolean result = false;
- if (from == -1) {
+ // Reindex the database if sequence is 0 or signature changed.
+ if (progress.getSeq(dbname) == 0 || cur_sig.equals(new_sig) == false) {
Log.errlog("Indexing '%s' from scratch.", dbname);
- delete(writer, dbname);
+ delete(dbname, writer);
+ progress.update(dbname, new_sig, 0);
+ result = true;
}
- boolean changed = false;
- while (from < update_seq) {
- final JSONObject obj = DB.getAllDocsBySeq(dbname, from, Config.BATCH_SIZE);
+ long update_seq = progress.getSeq(dbname);
+ while (update_seq < target_seq) {
+ final JSONObject obj = DB.getAllDocsBySeq(dbname, update_seq, Config.BATCH_SIZE);
+
if (!obj.has("rows")) {
Log.errlog("no rows found (%s).", obj);
return false;
}
+
+ // Process all rows
final JSONArray rows = obj.getJSONArray("rows");
for (int i = 0, max = rows.size(); i < max; i++) {
final JSONObject row = rows.getJSONObject(i);
final JSONObject value = row.optJSONObject("value");
final JSONObject doc = row.optJSONObject("doc");
+ // New or updated document.
if (doc != null) {
updateDocument(writer, dbname, rows.getJSONObject(i), rhino);
- changed = true;
+ result = true;
}
+
+ // Deleted document.
if (value != null && value.optBoolean("deleted")) {
writer.deleteDocuments(new Term(Config.ID, row.getString("id")));
- changed = true;
+ result = true;
}
+
+ update_seq = row.getLong("key");
}
- from += Config.BATCH_SIZE;
}
- progress.setProgress(dbname, update_seq);
- if (changed) {
- synchronized (MUTEX) {
- updates.remove(dbname);
- }
- Log.errlog("%s: index caught up from %,d to %,d.", dbname, start, update_seq);
+ if (result) {
+ progress.update(dbname, new_sig, update_seq);
+ Log.errlog("%s: index caught up to %,d.", dbname, update_seq);
}
- return changed;
+ return result;
}
- private void delete(final IndexWriter writer, final String dbname) throws IOException {
+ private void delete(final String dbname, final IndexWriter writer) throws IOException {
writer.deleteDocuments(new Term(Config.DB, dbname));
}
@@ -230,11 +267,11 @@ private void updateDocument(final IndexWriter writer, final String dbname, final
// Standard properties.
doc.add(token(Config.DB, dbname, false));
final String id = (String) json.remove(Config.ID);
- final String rev = (String) json.remove(Config.REV);
+ // Discard _rev
+ json.remove("_rev");
// Index _id and _rev as tokens.
doc.add(token(Config.ID, id, true));
- doc.add(token(Config.REV, rev, true));
// Index all attributes.
add(doc, null, json, true);
@@ -273,8 +310,9 @@ private void add(final Document out, final String key, final Object value, final
}
} else if (value instanceof String) {
try {
- DATE_FORMAT.parse((String) value);
- out.add(token(key, (String) value, store));
+ final Date date = DATE_FORMAT.parse((String) value);
+ out.add(new Field(key, (String) value, Store.YES, Field.Index.NO));
+ out.add(new Field(key, Long.toString(date.getTime()), Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
} catch (final java.text.ParseException e) {
out.add(text(key, (String) value, store));
}
@@ -304,22 +342,29 @@ private void add(final Document out, final String key, final Object value, final
* type can be created, updated or deleted.
*/
public static void main(final String[] args) {
- final Runnable indexer = new Indexer();
- final Thread indexerThread = new Thread(indexer, "indexer");
- indexerThread.setDaemon(true);
- indexerThread.start();
+ start("indexer", new Indexer());
+ TIMER.schedule(new CheckpointTask(), Config.TIME_THRESHOLD * 1000, Config.TIME_THRESHOLD * 1000);
final Scanner scanner = new Scanner(System.in);
while (scanner.hasNextLine()) {
final String line = scanner.nextLine();
final JSONObject obj = JSONObject.fromObject(line);
if (obj.has("type") && obj.has("db")) {
- synchronized (MUTEX) {
- updates.put(obj.getString("db"), System.nanoTime());
- MUTEX.notify();
- }
+ wakeupIndexer();
}
}
}
+ private static void wakeupIndexer() {
+ synchronized (MUTEX) {
+ MUTEX.notify();
+ }
+ }
+
+ private static void start(final String name, final Runnable runnable) {
+ final Thread thread = new Thread(runnable, name);
+ thread.setDaemon(true);
+ thread.start();
+ }
+
}
View
2 src/main/java/org/apache/couchdb/lucene/Log.java
@@ -15,10 +15,12 @@ public static void errlog(final String fmt, final Object... args) {
public static void outlog(final Exception e) {
outlog("%s", e.getMessage());
+ e.printStackTrace(System.out);
}
public static void errlog(final Exception e) {
errlog("%s", e.getMessage());
+ e.printStackTrace();
}
}
View
104 src/main/java/org/apache/couchdb/lucene/Progress.java
@@ -1,73 +1,83 @@
package org.apache.couchdb.lucene;
import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Map.Entry;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
public final class Progress {
- private static final String FILENAME = "couchdb.status";
+ public static final String NO_SIGNATURE = "";
- private final Directory dir;
+ private static final String PROGRESS_KEY = "_couchdb";
- private final Map<String, Long> progress = new HashMap<String, Long>();
+ private static final String PROGRESS_VALUE = "status";
- public Progress(final Directory dir) {
- this.dir = dir;
+ private static final Term PROGRESS_TERM = new Term(PROGRESS_KEY, PROGRESS_VALUE);
+
+ private Document progress = newDocument();
+
+ public Progress() {
}
- public long getProgress(final String dbname) {
- final Long result = progress.get(dbname);
- return result == null ? -1 : result;
+ public long getSeq(final String dbname) {
+ final Field field = progress.getField(seqField(dbname));
+ return field == null ? 0 : Long.parseLong(field.stringValue());
}
- public void load() throws IOException {
- if (dir.fileExists(FILENAME) == false) {
- progress.clear();
- return;
- }
+ public String getSignature(final String dbname) {
+ final Field field = progress.getField(sigField(dbname));
+ return field == null ? NO_SIGNATURE : field.stringValue();
+ }
- final IndexInput in = dir.openInput(FILENAME);
+ public void load(final IndexReader reader) throws IOException {
+ progress = newDocument();
+
+ final TermDocs termDocs = reader.termDocs(PROGRESS_TERM);
try {
- progress.clear();
- final int size = in.readVInt();
- for (int i = 0; i < size; i++) {
- final String dbname = in.readString();
- final long update_seq = in.readVLong();
- setProgress(dbname, update_seq);
+ while (termDocs.next()) {
+ final int doc = termDocs.doc();
+ if (!reader.isDeleted(doc)) {
+ progress = reader.document(doc);
+ }
}
} finally {
- in.close();
+ termDocs.close();
}
}
- public void save() throws IOException {
- final String tmp = "couchdb.new";
- final IndexOutput out = dir.createOutput(tmp);
- try {
- out.writeVInt(progress.size());
- for (final Entry<String, Long> entry : progress.entrySet()) {
- out.writeString(entry.getKey());
- out.writeVLong(entry.getValue());
- }
- out.close();
- dir.sync(tmp);
- dir.renameFile(tmp, FILENAME);
- } catch (final IOException e) {
- dir.deleteFile(tmp);
- throw e;
- } finally {
- out.close();
- }
+ public void save(final IndexWriter writer) throws IOException {
+ writer.updateDocument(PROGRESS_TERM, progress);
+ }
+
+ public void update(final String dbname, final String sig, final long seq) {
+ // Update seq.
+ progress.removeFields(seqField(dbname));
+ progress.add(new Field(seqField(dbname), Long.toString(seq), Store.YES, Field.Index.NO));
+
+ // Update sig.
+ progress.removeFields(sigField(dbname));
+ progress.add(new Field(sigField(dbname), sig, Store.YES, Field.Index.NO));
+ }
+
+ private Document newDocument() {
+ final Document result = new Document();
+ // Add unique identifier.
+ result.add(new Field(PROGRESS_KEY, PROGRESS_VALUE, Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+ return result;
+ }
+
+ private String seqField(final String dbname) {
+ return dbname + "-seq";
}
- public void setProgress(final String dbname, final long progress) {
- this.progress.put(dbname, progress);
+ private String sigField(final String dbname) {
+ return dbname + "-sig";
}
}
View
4 src/main/java/org/apache/couchdb/lucene/Rhino.java
@@ -54,6 +54,10 @@ public String parse(final String doc) {
return (String) systemFun.call(context, scope, null, new Object[] { doc, userFun });
}
+ public String getSignature() {
+ return Utils.digest(fun);
+ }
+
public void close() {
Context.exit();
}
View
145 src/main/java/org/apache/couchdb/lucene/Search.java
@@ -16,83 +16,94 @@
*/
public final class Search {
- public static void main(final String[] args) throws Exception {
- IndexReader reader = null;
- IndexSearcher searcher = null;
-
- final Scanner scanner = new Scanner(System.in);
- while (scanner.hasNextLine()) {
- if (reader == null) {
- // Open a reader and searcher if index exists.
- if (IndexReader.indexExists(Config.INDEX_DIR)) {
- reader = IndexReader.open(NIOFSDirectory.getDirectory(Config.INDEX_DIR), true);
- searcher = new IndexSearcher(reader);
+ public static void main(final String[] args) {
+ try {
+ IndexReader reader = null;
+ IndexSearcher searcher = null;
+
+ final Scanner scanner = new Scanner(System.in);
+ while (scanner.hasNextLine()) {
+ if (reader == null) {
+ // Open a reader and searcher if index exists.
+ if (IndexReader.indexExists(Config.INDEX_DIR)) {
+ reader = IndexReader.open(NIOFSDirectory.getDirectory(Config.INDEX_DIR), true);
+ searcher = new IndexSearcher(reader);
+ }
}
- } else {
- // Refresh reader and searcher if necessary.
- final IndexReader newReader = reader.reopen();
- if (reader != newReader) {
- Log.outlog("Lucene index was updated, reopening searcher.");
- final IndexReader oldReader = reader;
- reader = newReader;
- searcher = new IndexSearcher(reader);
- oldReader.close();
- }
- }
- final String line = scanner.nextLine();
+ final String line = scanner.nextLine();
- // Process search request if index exists.
- if (searcher == null) {
- System.out.println(Utils.error(503, "couchdb-lucene not available."));
- continue;
- }
+ // Process search request if index exists.
+ if (searcher == null) {
+ System.out.println(Utils.error(503, "couchdb-lucene not available."));
+ continue;
+ }
- final JSONObject obj;
- try {
- obj = JSONObject.fromObject(line);
- } catch (final JSONException e) {
- System.out.println(Utils.error(400, "invalid JSON."));
- continue;
- }
+ final JSONObject obj;
+ try {
+ obj = JSONObject.fromObject(line);
+ } catch (final JSONException e) {
+ System.out.println(Utils.error(400, "invalid JSON."));
+ continue;
+ }
- if (!obj.has("query")) {
- System.out.println(Utils.error(400, "No query found in request."));
- continue;
- }
+ if (!obj.has("query")) {
+ System.out.println(Utils.error(400, "No query found in request."));
+ continue;
+ }
- final JSONObject query = obj.getJSONObject("query");
+ final JSONObject query = obj.getJSONObject("query");
- try {
- // A query.
- if (query.has("q")) {
- final SearchRequest request = new SearchRequest(obj);
- final String result = request.execute(searcher);
- System.out.println(result);
- continue;
+ final boolean reopen = !"ok".equals(query.optString("stale", "not-ok"));
+
+ // Refresh reader and searcher if necessary.
+ if (reader != null && reopen) {
+ final IndexReader newReader = reader.reopen();
+ if (reader != newReader) {
+ Log.outlog("Lucene index was updated, reopening searcher.");
+ final IndexReader oldReader = reader;
+ reader = newReader;
+ searcher = new IndexSearcher(reader);
+ oldReader.close();
+ }
}
- // info.
- if (query.keySet().isEmpty()) {
- final JSONObject json = new JSONObject();
- json.put("doc_count", reader.numDocs());
- json.put("doc_del_count", reader.numDeletedDocs());
- json.put("disk_size", size(reader.directory()));
- reader.directory();
-
- final JSONObject info = new JSONObject();
- info.put("code", 200);
- info.put("json", json);
-
- System.out.println(info);
+
+ try {
+ // A query.
+ if (query.has("q")) {
+ final SearchRequest request = new SearchRequest(obj);
+ final String result = request.execute(searcher);
+ System.out.println(result);
+ continue;
+ }
+ // info.
+ if (query.keySet().isEmpty()) {
+ final JSONObject json = new JSONObject();
+ json.put("doc_count", reader.numDocs());
+ json.put("doc_del_count", reader.numDeletedDocs());
+ json.put("disk_size", size(reader.directory()));
+ reader.directory();
+
+ final JSONObject info = new JSONObject();
+ info.put("code", 200);
+ info.put("json", json);
+ final JSONObject headers = new JSONObject();
+ headers.put("Content-Type", "text/plain");
+ info.put("headers", headers);
+
+ System.out.println(info);
+ }
+ } catch (final Exception e) {
+ System.out.println(Utils.error(400, e.getMessage()));
}
- } catch (final Exception e) {
- System.out.println(Utils.error(400, e.getMessage()));
- }
- System.out.println(Utils.error(400, "Bad request."));
- }
- if (reader != null) {
- reader.close();
+ System.out.println(Utils.error(400, "Bad request."));
+ }
+ if (reader != null) {
+ reader.close();
+ }
+ } catch (final Exception e) {
+ System.out.println(Utils.error(500, e.getMessage()));
}
}
View
54 src/main/java/org/apache/couchdb/lucene/SearchRequest.java
@@ -26,7 +26,7 @@
public final class SearchRequest {
- private static final FieldSelector FS = new MapFieldSelector(new String[] { Config.ID, Config.REV });
+ private static final FieldSelector FS = new MapFieldSelector(new String[] { Config.ID });
private static final Database DB = new Database(Config.DB_URL);
@@ -69,10 +69,27 @@ public SearchRequest(final JSONObject obj) throws ParseException {
if (sort == null) {
this.sort = null;
} else {
- if (sort.indexOf(",") != -1) {
- this.sort = new Sort(sort.split(","));
+ final String[] split = sort.split(",");
+ final SortField[] sort_fields = new SortField[split.length];
+ for (int i = 0; i < split.length; i++) {
+ switch (split[i].charAt(0)) {
+ case '/':
+ sort_fields[i] = new SortField(split[i].substring(1));
+ break;
+ case '\\':
+ sort_fields[i] = new SortField(split[i].substring(1), true);
+ break;
+ default:
+ sort_fields[i] = new SortField(split[i]);
+ break;
+ }
+ }
+
+ if (sort_fields.length == 1) {
+ // Let Lucene add doc as secondary sort order.
+ this.sort = new Sort(sort_fields[0].getField(), sort_fields[0].getReverse());
} else {
- this.sort = new Sort(sort, !query.optBoolean("asc", true));
+ this.sort = new Sort(sort_fields);
}
}
}
@@ -98,30 +115,38 @@ public String execute(final IndexSearcher searcher) throws IOException {
}
stopWatch.lap("search");
// Fetch matches (if any).
- final int max = min(td.totalHits, limit);
+ final int max = min(td.totalHits - skip, limit);
final JSONArray rows = new JSONArray();
+ final String[] fetch_ids = new String[max];
for (int i = skip; i < skip + max; i++) {
final Document doc = searcher.doc(td.scoreDocs[i].doc, FS);
final JSONObject obj = new JSONObject();
// Include basic details.
- obj.element("_id", doc.get(Config.ID));
- obj.element("_rev", doc.get(Config.REV));
- obj.element("score", td.scoreDocs[i].score);
+ obj.put("_id", doc.get(Config.ID));
+ obj.put("score", td.scoreDocs[i].score);
// Include sort order (if any).
if (td instanceof TopFieldDocs) {
final FieldDoc fd = (FieldDoc) ((TopFieldDocs) td).scoreDocs[i];
- obj.element("sort_order", fd.fields);
+ obj.put("sort_order", fd.fields);
}
// Fetch document (if requested).
if (include_docs) {
- obj.element("doc", DB.getDoc(dbname, obj.getString("_id"), obj.getString("_rev")));
+ fetch_ids[i - skip] = doc.get(Config.ID);
}
rows.add(obj);
}
+ // Fetch documents (if requested).
+ if (include_docs) {
+ final JSONArray fetched_docs = DB.getDocs(dbname, fetch_ids).getJSONArray("rows");
+ for (int i = 0; i < max; i++) {
+ rows.getJSONObject(i).put("doc", fetched_docs.get(i));
+ }
+ }
stopWatch.lap("fetch");
final JSONObject json = new JSONObject();
json.put("q", q.toString(Config.DEFAULT_FIELD));
+ json.put("etag", etag);
json.put("skip", skip);
json.put("limit", limit);
json.put("total_rows", td.totalHits);
@@ -136,9 +161,10 @@ public String execute(final IndexSearcher searcher) throws IOException {
final JSONObject result = new JSONObject();
result.put("code", 200);
- // Results can't change unless the IndexReader does.
final JSONObject headers = new JSONObject();
- // TODO make a per-db etag (md5(dbname + update_seq)?).
+ // Cache for 5 minutes.
+ headers.put("Cache-Control", "max-age=300");
+ // Results can't change unless the IndexReader does.
headers.put("ETag", etag);
result.put("headers", headers);
@@ -155,10 +181,6 @@ private String getETag(final IndexSearcher searcher) {
return Long.toHexString(searcher.getIndexReader().getVersion());
}
- private String toString(final Sort sort) {
- return toString(sort.getSort());
- }
-
private String toString(final SortField[] sortFields) {
final JSONArray result = new JSONArray();
for (final SortField field : sortFields) {
View
5 src/main/java/org/apache/couchdb/lucene/Utils.java
@@ -2,6 +2,7 @@
import net.sf.json.JSONObject;
+import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
@@ -21,6 +22,10 @@ public static String error(final String txt) {
return error(500, txt);
}
+ public static String digest(final String data) {
+ return DigestUtils.md5Hex(data);
+ }
+
public static String error(final int code, final String txt) {
return new JSONObject().element("code", code).element("body", StringEscapeUtils.escapeHtml(txt)).toString();
}

0 comments on commit fce8c14

Please sign in to comment.
Something went wrong with that request. Please try again.