Permalink
Browse files

most of index cleanup work

  • Loading branch information...
1 parent c47c494 commit 6e66766f0bac23148150eb3ffdb2fa84ae1b159c Robert Newson committed Jan 31, 2010
View
@@ -540,3 +540,9 @@ If you just want to expunge pending deletes, then call;
<pre>
curl -X POST http://localhost:5984/&lt;db>/_fti/&lt;ddoc>/&lt;index>/_expunge
</pre>
+
+If you recreate databases or frequently change your fulltext functions, you will probably have old indexes lying around on disk. To remove all of them, call;
+
+<pre>
+curl -X POST http://localhost:5984/&lt;db>/_fti/_cleanup
+</pre>
View
@@ -69,10 +69,8 @@ def respond(res, req, key):
path = '/'.join(['', 'search', key] + path)
params = urllib.urlencode(dict([k, v.encode('utf-8')] for k, v in req["query"].items()))
path = '?'.join([path, params])
- elif len(path) == 4:
- path = '/'.join(['', 'admin', key] + path)
else:
- return mkresp(400, "Invalid path\n" + str(len(req)), {"Content-Type":"text/plain"})
+ path = '/'.join(['', 'admin', key] + path)
req_headers = {}
for h in req.get("headers", []):
@@ -84,7 +82,7 @@ def respond(res, req, key):
method = req["method"]
else:
method = req["verb"]
-
+ sys.stderr.write(path)
res.request(method, path, headers=req_headers)
resp = res.getresponse()
@@ -16,19 +16,27 @@
* limitations under the License.
*/
+import java.io.File;
import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
+import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.configuration.HierarchicalINIConfiguration;
+import org.apache.commons.io.FileUtils;
+import org.apache.http.client.HttpClient;
import org.apache.lucene.index.IndexWriter;
import com.github.rnewson.couchdb.lucene.Lucene.WriterCallback;
+import com.github.rnewson.couchdb.lucene.couchdb.Couch;
+import com.github.rnewson.couchdb.lucene.couchdb.Database;
import com.github.rnewson.couchdb.lucene.util.IndexPath;
import com.github.rnewson.couchdb.lucene.util.ServletUtils;
import com.github.rnewson.couchdb.lucene.util.Utils;
@@ -67,15 +75,25 @@ public void setConfiguration(final HierarchicalINIConfiguration configuration) {
@Override
protected void doPost(final HttpServletRequest req, final HttpServletResponse resp) throws ServletException, IOException {
final IndexPath path = IndexPath.parse(configuration, req);
+ String command = req.getPathInfo();
+ command = command.substring(command.lastIndexOf("/") + 1);
if (path == null) {
- ServletUtils.sendJSONError(req, resp, 400, "Bad path");
+ // generalize path handling.
+ final String[] parts = IndexPath.parts(req);
+ if (parts.length == 2) {
+ if ("_cleanup".equals(command)) {
+ cleanup(parts[0]);
+ resp.setStatus(202);
+ Utils.writeJSON(resp, JSON_SUCCESS);
+ }
+ } else {
+ ServletUtils.sendJSONError(req, resp, 400, "Bad path");
+ }
return;
}
- lucene.startIndexing(path, true);
- String command = req.getPathInfo();
- command = command.substring(command.lastIndexOf("/") + 1);
+ lucene.startIndexing(path, true);
if ("_expunge".equals(command)) {
lucene.withWriter(path, new WriterCallback() {
@@ -114,4 +132,44 @@ public void onMissing() throws IOException {
resp.sendError(400, "Bad request");
}
+ private void cleanup(final String key) throws IOException {
+ // TODO tidy this.
+ final HttpClient client = HttpClientFactory.getInstance();
+ final Couch couch = Couch.getInstance(client, IndexPath.url(configuration, key));
+
+ final Set<String> dbKeep = new HashSet<String>();
+
+ for (final String dbname : couch.getAllDatabases()) {
+ final Database db = couch.getDatabase(dbname);
+ dbKeep.add(db.getUuid().toString());
+
+ // TODO create DesignDocument, Fulltext, View classes.
+
+ final JSONArray arr = db.getAllDesignDocuments();
+ final Set<String> viewKeep = new HashSet<String>();
+ for (int i = 0; i < arr.size(); i++) {
+ final JSONObject ddoc = arr.getJSONObject(i).getJSONObject("doc");
+ if (ddoc.has("fulltext")) {
+ final JSONObject fulltext = ddoc.getJSONObject("fulltext");
+ for (final Object name : fulltext.keySet()) {
+ final JSONObject view = fulltext.getJSONObject((String) name);
+ viewKeep.add(Lucene.digest(view));
+ }
+ }
+ }
+ // Delete all indexes except the keepers.
+ for (final File dir : lucene.getUuidDir(db.getUuid()).listFiles()) {
+ if (!viewKeep.contains(dir.getName())) {
+ FileUtils.deleteDirectory(dir);
+ }
+ }
+ }
+
+ // Delete all directories except the keepers.
+ for (final File dir : lucene.getRootDir().listFiles()) {
+ if (!dbKeep.contains(dir.getName())) {
+ FileUtils.deleteDirectory(dir);
+ }
+ }
+ }
}
@@ -170,7 +170,7 @@ public void withWriter(final IndexPath path, final WriterCallback callback) thro
public void createWriter(final IndexPath path, final UUID uuid, final JSONObject view) throws IOException {
final String digest = digest(view);
- final File dir = new File(new File(root, uuid.toString()), digest);
+ final File dir = new File(getUuidDir(uuid), digest);
dir.mkdirs();
synchronized (map) {
@@ -184,6 +184,14 @@ public void createWriter(final IndexPath path, final UUID uuid, final JSONObject
}
}
+ public File getRootDir() {
+ return root;
+ }
+
+ public File getUuidDir(final UUID uuid) {
+ return new File(getRootDir(), uuid.toString());
+ }
+
public void close() {
executor.shutdownNow();
}
@@ -368,30 +368,15 @@ private JSONObject extractView(final JSONObject ddoc) {
return fulltext.getJSONObject(path.getViewName());
}
- private UUID getDatabaseUuid() throws IOException {
+ private void index() throws IOException {
+ UUID uuid = null;
try {
- final JSONObject local = database.getDocument("_local/lucene");
- final UUID uuid = UUID.fromString(local.getString("uuid"));
- logger.trace("Database has uuid " + uuid);
- return uuid;
- } catch (final HttpResponseException e) {
- switch (e.getStatusCode()) {
- case HttpStatus.SC_NOT_FOUND:
- final JSONObject err = JSONObject.fromObject(e.getMessage());
- if ("no_db_file".equals(err.getString("reason"))) {
- throw e;
- }
- final UUID uuid = UUID.randomUUID();
- database.saveDocument("_local/lucene", String.format("{\"uuid\":\"%s\"}", uuid));
- return getDatabaseUuid();
- default:
- throw e;
- }
+ uuid = database.getUuid();
+ } catch (final IOException e) {
+ database.createUuid();
+ uuid = database.getUuid();
}
- }
- private void index() throws IOException {
- final UUID uuid = getDatabaseUuid();
final JSONObject ddoc = database.getDocument("_design/" + path.getDesignDocumentName());
final JSONObject view = extractView(ddoc);
if (view == null) {
@@ -17,6 +17,7 @@
*/
import java.io.IOException;
+import java.util.UUID;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
@@ -91,4 +92,14 @@ public boolean saveDocument(final String id, final String body) throws IOExcepti
return HttpUtils.put(httpClient, url + Utils.urlEncode(id), body) == 201;
}
+ public UUID getUuid() throws IOException {
+ final JSONObject local = getDocument("_local/lucene");
+ return UUID.fromString(local.getString("uuid"));
+ }
+
+ public void createUuid() throws IOException {
+ final UUID uuid = UUID.randomUUID();
+ saveDocument("_local/lucene", String.format("{\"uuid\":\"%s\"}", uuid));
+ }
+
}
@@ -24,13 +24,22 @@
public final class IndexPath {
public static IndexPath parse(final HierarchicalINIConfiguration configuration, final HttpServletRequest req) {
- final String uri = req.getRequestURI().replaceFirst("^/\\w+/", "");
- final String[] parts = uri.split("/");
+ final String[] parts = parts(req);
if (parts.length < 4) {
return null;
}
- final Configuration section = configuration.getSection(parts[0]);
- return section.containsKey("url") ? new IndexPath(section.getString("url"), parts[1], parts[2], parts[3]) : null;
+ final String url = url(configuration, parts[0]);
+ return url == null ? null : new IndexPath(url, parts[1], parts[2], parts[3]);
+ }
+
+ public static String[] parts(final HttpServletRequest req) {
+ final String uri = req.getRequestURI().replaceFirst("^/\\w+/", "");
+ return uri.split("/");
+ }
+
+ public static String url(final HierarchicalINIConfiguration configuration, final String key) {
+ final Configuration section = configuration.getSection(key);
+ return section.containsKey("url") ? section.getString("url") : null;
}
private final String database;

0 comments on commit 6e66766

Please sign in to comment.