allowing post queries to lucene for increased max length #113

Closed
wants to merge 28 commits into
from
Commits
Jump to file or symbol
Failed to load files and symbols.
+204 −35
Diff settings

Always

Just for now

View
@@ -35,7 +35,7 @@ def main():
if len(args):
parser.error("Unrecognized arguments: %s" % ' '.join(args))
for req in requests():
- res = httplib.HTTPConnection(opts.remote_host, opts.remote_port)
+ res = httplib.HTTPConnection(opts.remote_host, opts.remote_port,timeout=300)
try:
resp = respond(res, req, opts.key)
except Exception, e:
@@ -78,7 +78,10 @@ def respond(res, req, key):
else:
method = req["verb"]
- res.request(method, path, headers=req_headers)
+ if method == "POST":
+ res.request(method, path,req.get("body").encode("utf-8"),req_headers)
+ else:
+ res.request(method, path, headers=req_headers)
resp = res.getresponse()
resp_headers = {}
@@ -20,6 +20,11 @@
<fileMode>755</fileMode>
<outputDirectory>/bin</outputDirectory>
</file>
+ <file>
+ <source>${project.basedir}/src/main/bin/run.bat</source>
+ <fileMode>755</fileMode>
+ <outputDirectory>/bin</outputDirectory>
+ </file>
<file>
<source>${project.basedir}/src/main/bin/kill_ppid</source>
<fileMode>755</fileMode>
View
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+[ -z $CL_BASEDIR ] && CL_BASEDIR=`dirname "$0"`
+cd $CL_BASEDIR/..
+
+JAVA_OPTS="-server -Xmx1g -XX:OnOutOfMemoryError=bin/kill_ppid -Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=n"
+CLASS=com.github.rnewson.couchdb.lucene.Main
+
+CLASSPATH="conf"
+for JAR in `ls lib/*.jar`
+do
+ CLASSPATH="$CLASSPATH:$JAR"
+done
+if [ -z $PIDFILE ]; then
+ exec java $JAVA_OPTS -cp $CLASSPATH $CLASS
+else
+ java $JAVA_OPTS -cp $CLASSPATH $CLASS &
+ echo $! > $PIDFILE
+fi
View
@@ -0,0 +1,20 @@
+@ECHO OFF
+SETLOCAL EnableDelayedExpansion
+SET JAVA_OPTS=-server -Xmx1g
+SET CLASS=com.github.rnewson.couchdb.lucene.Main
+SET CLASSPATH=conf
+
+FOR %%G IN (lib/*.jar) DO (
+ CALL :addclasspath lib/%%G
+)
+GOTO :done
+
+:addclasspath
+SET CLASSPATH=%CLASSPATH%;%1
+GOTO :eof
+
+:done
+
+java %JAVA_OPTS% -cp %CLASSPATH% %CLASS%
+
+ENDLOCAL
@@ -74,9 +74,12 @@
import com.github.rnewson.couchdb.lucene.util.ServletUtils;
import com.github.rnewson.couchdb.lucene.util.StopWatch;
import com.github.rnewson.couchdb.lucene.util.Utils;
+import org.apache.lucene.search.BooleanQuery;
public final class DatabaseIndexer implements Runnable, ResponseHandler<Void> {
+
+
private class IndexState {
private final DocumentConverter converter;
@@ -185,7 +188,7 @@ private void blockForLatest(final boolean staleOk) throws IOException, JSONExcep
wait(timeout);
timeout -= (System.currentTimeMillis() - start);
if (timeout <= 0) {
- throw new IOException("Search timed out.");
+ throw new IOException(String.format("Search timed out while waiting for %s (currently at: %s)",latest,pending_seq));
}
} catch (final InterruptedException e) {
throw new IOException("Search timed out.");
@@ -335,24 +338,31 @@ public Void handleResponse(final HttpResponse response)
}
final UpdateSequence seq = UpdateSequence.parseUpdateSequence(json.getString("seq"));
+
final String id = json.getString("id");
- CouchDocument doc;
+ CouchDocument doc = null;
+ boolean isDeleted = false;
if (!json.isNull("doc")) {
doc = new CouchDocument(json.getJSONObject("doc"));
+ isDeleted = doc.isDeleted();
} else {
- // include_docs=true doesn't work prior to 0.11.
- try {
- doc = database.getDocument(id);
- } catch (final HttpResponseException e) {
- switch (e.getStatusCode()) {
- case HttpStatus.SC_NOT_FOUND:
- doc = CouchDocument.deletedDocument(id);
- break;
- default:
- logger.warn("Failed to fetch " + id);
- break loop;
- }
- }
+ isDeleted = json.has("deleted") && json.getBoolean("deleted");
+ if (!isDeleted) {
+ logger.warn("Doc was null:" + id);
+ // include_docs=true doesn't work prior to 0.11.
+ try {
+ doc = database.getDocument(id);
+ } catch (final HttpResponseException e) {
+ switch (e.getStatusCode()) {
+ case HttpStatus.SC_NOT_FOUND:
+ doc = CouchDocument.deletedDocument(id);
+ break;
+ default:
+ logger.warn("Failed to fetch " + id);
+ break loop;
+ }
+ }
+ }
}
if (id.startsWith("_design")) {
@@ -362,7 +372,7 @@ public Void handleResponse(final HttpResponse response)
}
}
- if (doc.isDeleted()) {
+ if (isDeleted) {
for (final IndexState state : states.values()) {
state.writer.deleteDocuments(new Term("_id", id));
state.setPendingSequence(seq);
@@ -470,8 +480,12 @@ public void run() {
close();
}
}
+ public void search(final HttpServletRequest req,
+ final HttpServletResponse resp) throws IOException, JSONException {
+ search(req.getParameter(LuceneServlet.QUERY_PARM), req, resp);
+ }
- public void search(final HttpServletRequest req,
+ public void search(final String query,final HttpServletRequest req,
final HttpServletResponse resp) throws IOException, JSONException {
final IndexState state = getState(req, resp);
if (state == null)
@@ -484,7 +498,7 @@ public void search(final HttpServletRequest req,
resp.setStatus(304);
return;
}
- for (final String queryString : getQueryStrings(req)) {
+ for (final String queryString : getQueryStrings(query)) {
final Analyzer analyzer = state.analyzer(req.getParameter("analyzer"));
final Operator operator = "and".equalsIgnoreCase(req.getParameter("default_operator"))
? Operator.AND : Operator.OR;
@@ -608,10 +622,7 @@ public void search(final HttpServletRequest req,
.getDocuments(fetch_ids);
for (int j = 0; j < max; j++) {
final CouchDocument doc = fetched_docs.get(j);
- final JSONObject row = doc == null ?
- new JSONObject("{\"error\":\"not_found\"}") :
- doc.asJson();
- rows.getJSONObject(j).put("doc", row);
+ rows.getJSONObject(j).put("doc",doc!=null?doc.asJson():null);
}
}
stopWatch.lap("fetch");
@@ -670,7 +681,10 @@ public void search(final HttpServletRequest req,
}
private String[] getQueryStrings(final HttpServletRequest req) {
- return Utils.splitOnCommas(req.getParameter("q"));
+ return getQueryStrings(req.getParameter("q"));
+ }
+ private String[] getQueryStrings(final String query) {
+ return Utils.splitOnCommas(query);
}
private void close() {
@@ -817,10 +831,12 @@ private IndexWriter newWriter(final Directory dir) throws IOException {
final LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setMergeFactor(ini.getInt("lucene.mergeFactor", 10));
- mergePolicy.setUseCompoundFile(ini.getBoolean("lucene.useCompoundFile",
- false));
+ mergePolicy.setUseCompoundFile(ini.getBoolean("lucene.useCompoundFile",false));
+
+ BooleanQuery.setMaxClauseCount(ini.getInt("lucene.maxBooleanClauseCount", 10000));
+
config.setMergePolicy(mergePolicy);
-
+ config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
config.setRAMBufferSizeMB(ini.getDouble("lucene.ramBufferSizeMB",
IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB));
@@ -45,8 +45,13 @@ public void handle(String target, HttpServletRequest request, HttpServletRespons
ServletUtils.sendJsonError(request, response, connection.getResponse().getStatus(),
new JSONObject(reason));
} else {
- ServletUtils.sendJsonError(request, response, connection.getResponse().getStatus(),
- reason);
+ if (reason != null)
+ ServletUtils.sendJsonError(request, response, connection.getResponse().getStatus(),
+ reason);
+ else
+ ServletUtils.sendJsonError(request, response, connection.getResponse().getStatus(),
+ "Unknown");
+
}
} catch (final JSONException e) {
response.sendError(500);
@@ -16,6 +16,7 @@
* limitations under the License.
*/
+import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -46,8 +47,12 @@
import com.github.rnewson.couchdb.lucene.couchdb.DesignDocument;
import com.github.rnewson.couchdb.lucene.couchdb.View;
import com.github.rnewson.couchdb.lucene.util.ServletUtils;
+import java.io.StringWriter;
+import org.apache.log4j.Level;
+import org.apache.log4j.Priority;
public final class LuceneServlet extends HttpServlet {
+ public static final String QUERY_PARM = "q";
private static final Logger LOG = Logger.getLogger(LuceneServlet.class);
@@ -175,8 +180,15 @@ protected void doGet(final HttpServletRequest req,
try {
doGetInternal(req, resp);
} catch (final JSONException e) {
+ LOG.log(Level.FATAL, resp, e);
resp.sendError(500);
- }
+ } catch(final IOException e) {
+ LOG.log(Level.FATAL, resp, e);
+ throw e;
+ } catch(final Throwable e) {
+ LOG.log(Level.FATAL, resp, e);
+ resp.sendError(500);
+ }
}
private void doGetInternal(final HttpServletRequest req, final HttpServletResponse resp)
@@ -192,7 +204,7 @@ private void doGetInternal(final HttpServletRequest req, final HttpServletRespon
return;
}
- if (req.getParameter("q") == null) {
+ if (req.getParameter(LuceneServlet.QUERY_PARM) == null) {
indexer.info(req, resp);
} else {
indexer.search(req, resp);
@@ -210,22 +222,59 @@ protected void doPost(final HttpServletRequest req,
try {
doPostInternal(req, resp);
} catch (final JSONException e) {
+ LOG.log(Level.FATAL, resp, e);
resp.sendError(500);
- }
+ } catch(final IOException e) {
+ LOG.log(Level.FATAL, resp, e);
+ throw e;
+ } catch(final Throwable e) {
+ LOG.log(Level.FATAL, resp, e);
+ resp.sendError(500);
+ }
}
private void doPostInternal(final HttpServletRequest req, final HttpServletResponse resp)
throws IOException, JSONException {
+ DatabaseIndexer indexer;
switch (StringUtils.countMatches(req.getRequestURI(), "/")) {
case 3:
if (req.getPathInfo().endsWith("/_cleanup")) {
cleanup(req, resp);
return;
}
break;
+ case 5:
+ indexer = getIndexer(req);
+ if (indexer == null) {
+ ServletUtils.sendJsonError(req, resp, 500, "error_creating_index");
+ return;
+ }
+
+ BufferedReader reader = req.getReader();
+ StringWriter writer = new StringWriter();
+
+ char[] buffer = new char[1024];
+ try {
+ int n;
+ while ((n = reader.read(buffer)) != -1) {
+ writer.write(buffer, 0, n);
+ }
+ } catch(Exception ex) {
+ log("Could not read input", ex);
+ ServletUtils.sendJsonError(req, resp, 500, "could not read input");
+ return;
+ } finally {
+ reader.close();
+ }
+ String query = writer.toString();
+
+ indexer.search(query,req, resp);
+
+ return;
case 6:
- final DatabaseIndexer indexer = getIndexer(req);
- indexer.admin(req, resp);
+ indexer = getIndexer(req);
+ if (indexer != null)
+ indexer.admin(req, resp);
return;
}
ServletUtils.sendJsonError(req, resp, 400, "bad_request");
@@ -0,0 +1,20 @@
+[lucene]
+# The output directory for Lucene indexes.
+dir=indexes
+
+# The local host name that couchdb-lucene binds to
+host=localhost
+
+# The port that couchdb-lucene binds to.
+port=5985
+
+# Timeout for requests in milliseconds.
+timeout=10000
+
+# Default limit for search results
+limit=25
+
+# couchdb server mappings
+
+[local]
+url = http://localhost:5984/
View
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
+
+ <!-- Output to screen -->
+ <appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
+ <layout class="org.apache.log4j.PatternLayout">
+ <param name="ConversionPattern" value="%-5p %c %x | %d{HH:mm:ss} | %m%n"/>
+ </layout>
+ </appender>
+
+ <!-- Output to file -->
+ <appender name="FILE" class="org.apache.log4j.RollingFileAppender">
+ <param name="file" value="logs/couchdb-lucene.log"/>
+ <param name="MaxFileSize" value="100KB"/>
+ <!-- Keep one backup file -->
+ <param name="MaxBackupIndex" value="1"/>
+ <layout class="org.apache.log4j.PatternLayout">
+ <param name="ConversionPattern" value="%d{ISO8601} %p [%c{1}] %m%n"/>
+ </layout>
+ </appender>
+
+ <logger name="com.github">
+ <level value="INFO"/>
+ </logger>
+
+ <root>
+ <priority value="WARN"/>
+ <appender-ref ref="CONSOLE"/>
+ <appender-ref ref="FILE"/>
+ </root>
+</log4j:configuration>