From 043e43277428be99bba236bdeab0423a26940ac0 Mon Sep 17 00:00:00 2001 From: John Wang Date: Mon, 28 Nov 2011 18:40:53 -0800 Subject: [PATCH 1/3] minor bug fix --- .../java/proj/zoie/perf/indexing/LinedFileDataProvider.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zoie-perf/src/main/java/proj/zoie/perf/indexing/LinedFileDataProvider.java b/zoie-perf/src/main/java/proj/zoie/perf/indexing/LinedFileDataProvider.java index ded05ac4..c72d2062 100644 --- a/zoie-perf/src/main/java/proj/zoie/perf/indexing/LinedFileDataProvider.java +++ b/zoie-perf/src/main/java/proj/zoie/perf/indexing/LinedFileDataProvider.java @@ -27,7 +27,7 @@ public class LinedFileDataProvider extends StreamDataProvider { public LinedFileDataProvider(File file,long startingOffset){ - super(ZoieConfig.DEFAULT_VERSION_COMPARATOR); + super(ZoiePerfVersion.COMPARATOR); _file = file; _rad = null; _startingOffset = startingOffset; @@ -43,7 +43,8 @@ public DataEvent next() { if (line == null) return null; String version = ZoiePerfVersion.toString(_count,_offset); - _offset+=version.length(); + _offset+=line.length(); + _count++; event = new DataEvent(line,version); } @@ -51,7 +52,6 @@ public DataEvent next() { logger.error(ioe.getMessage(),ioe); } } - _count++; return event; } From dc6fa2ca5d8f7718a68b98207fe56af903063a06 Mon Sep 17 00:00:00 2001 From: John Wang Date: Fri, 9 Dec 2011 09:28:33 -0800 Subject: [PATCH 2/3] added flush before shutting down --- .../main/java/proj/zoie/impl/indexing/StreamDataProvider.java | 1 + 1 file changed, 1 insertion(+) diff --git a/zoie-core/src/main/java/proj/zoie/impl/indexing/StreamDataProvider.java b/zoie-core/src/main/java/proj/zoie/impl/indexing/StreamDataProvider.java index 456c0c28..00d66f06 100644 --- a/zoie-core/src/main/java/proj/zoie/impl/indexing/StreamDataProvider.java +++ b/zoie-core/src/main/java/proj/zoie/impl/indexing/StreamDataProvider.java @@ -382,6 +382,7 @@ public void run() } } } + flush(); } private long getEventCount() From d233bfadc8a32fd57dc686d3426c704124ece1fa Mon Sep 17 00:00:00 2001 From: John Wang Date: Tue, 13 Dec 2011 19:14:08 -0800 Subject: [PATCH 3/3] fixed purge filter --- .../internal/LuceneIndexDataLoader.java | 15 +++-- .../test/java/proj/zoie/test/ZoieTest.java | 56 ++++++++++++++++++- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/LuceneIndexDataLoader.java b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/LuceneIndexDataLoader.java index eaf2a16c..fbcebfd3 100644 --- a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/LuceneIndexDataLoader.java +++ b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/LuceneIndexDataLoader.java @@ -33,6 +33,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Similarity; @@ -40,6 +41,7 @@ import proj.zoie.api.DataConsumer; import proj.zoie.api.ZoieException; import proj.zoie.api.ZoieHealth; +import proj.zoie.api.ZoieIndexReader; import proj.zoie.api.ZoieSegmentReader; import proj.zoie.api.indexing.AbstractZoieIndexable; import proj.zoie.api.indexing.IndexingEventListener; @@ -78,26 +80,29 @@ public void setPurgeFilter(Filter purgeFilter){ private final void purgeDocuments(){ if (_purgeFilter!=null){ BaseSearchIndex idx = getSearchIndex(); - IndexReader reader = null; + IndexReader writeReader = null; log.info("purging docs started..."); int count = 0; long start = System.currentTimeMillis(); try{ - reader = idx.openIndexReaderForDelete(); + writeReader = idx.openIndexReaderForDelete(); + + ZoieIndexReader reader = idx.openIndexReader(); DocIdSetIterator iter = _purgeFilter.getDocIdSet(reader).iterator(); + int doc; while((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ count++; - reader.deleteDocument(doc); + writeReader.deleteDocument(doc); } } catch(Throwable th){ log.error("problem creating purge filter: "+th.getMessage(),th); } finally{ - if (reader!=null){ + if (writeReader!=null){ try{ - reader.close(); + writeReader.close(); } catch(IOException ioe){ ZoieHealth.setFatal(); diff --git a/zoie-core/src/test/java/proj/zoie/test/ZoieTest.java b/zoie-core/src/test/java/proj/zoie/test/ZoieTest.java index 828b408f..be45ef20 100644 --- a/zoie-core/src/test/java/proj/zoie/test/ZoieTest.java +++ b/zoie-core/src/test/java/proj/zoie/test/ZoieTest.java @@ -26,7 +26,9 @@ import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; @@ -259,13 +261,63 @@ public void testRealtime2() throws ZoieException { } } + private static class EvenIDPurgeFilter extends Filter{ + + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + if (reader instanceof ZoieIndexReader){ + final ZoieIndexReader zoieReader = (ZoieIndexReader)reader; + return new DocIdSet(){ + + @Override + public DocIdSetIterator iterator() throws IOException { + return new DocIdSetIterator(){ + + int doc=-1; + int maxdoc = zoieReader.maxDoc(); + + @Override + public int advance(int target) throws IOException { + doc = target-1; + return nextDoc(); + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + while(doc idxSystem = createZoie( idxDir, true, ZoieConfig.DEFAULT_VERSION_COMPARATOR,true); - idxSystem.setPurgeFilter(new QueryWrapperFilter(new MatchAllDocsQuery())); + idxSystem.setPurgeFilter(new EvenIDPurgeFilter()); idxSystem.start(); MemoryStreamDataProvider memoryProvider = new MemoryStreamDataProvider(ZoieConfig.DEFAULT_VERSION_COMPARATOR); @@ -321,7 +373,7 @@ public void testPurgeFilter() throws Exception { numDocs = multiReader.numDocs(); log.info("new numdocs: "+numDocs); - TestCase.assertTrue(numDocs==0); + TestCase.assertTrue(numDocs==5); idxSystem.returnIndexReaders(readers);