diff --git a/zoie-core/resource/log4j.properties b/zoie-core/resource/log4j.properties new file mode 100644 index 00000000..5ac1279d --- /dev/null +++ b/zoie-core/resource/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, console1 + +log4j.appender.console1=org.apache.log4j.ConsoleAppender +log4j.appender.console1.layout=org.apache.log4j.PatternLayout +log4j.appender.console1.layout.ConversionPattern=%d{yyyy/MM/dd HH:mm:ss.SSS} %p [%c] [%x] %m%n diff --git a/zoie-core/src/main/java/proj/zoie/impl/indexing/ZoieSystem.java b/zoie-core/src/main/java/proj/zoie/impl/indexing/ZoieSystem.java index 2c27d038..5954cb0f 100644 --- a/zoie-core/src/main/java/proj/zoie/impl/indexing/ZoieSystem.java +++ b/zoie-core/src/main/java/proj/zoie/impl/indexing/ZoieSystem.java @@ -458,7 +458,7 @@ public IndexingReq[] buildIndexingReqs() { super.setBatchSize(Math.max(1, batchSize)); // realtime memory batch size _diskLoader = new DiskLuceneIndexDataLoader(_analyzer, _similarity, - _searchIdxMgr,versionComparator); + _searchIdxMgr,versionComparator,_lsnrList); _diskLoader.setOptimizeScheduler(new DefaultOptimizeScheduler( getAdminMBean())); // note that the ZoieSystemAdminMBean zoieAdmin // parameter for DefaultOptimizeScheduler is not diff --git a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/DiskLuceneIndexDataLoader.java b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/DiskLuceneIndexDataLoader.java index 83f5131e..918889b9 100644 --- a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/DiskLuceneIndexDataLoader.java +++ b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/DiskLuceneIndexDataLoader.java @@ -23,6 +23,7 @@ import java.nio.channels.WritableByteChannel; import java.util.Collection; import java.util.Comparator; +import java.util.Queue; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; @@ -32,6 +33,7 @@ import proj.zoie.api.ZoieException; import proj.zoie.api.ZoieHealth; +import proj.zoie.api.indexing.IndexingEventListener; import proj.zoie.api.indexing.OptimizeScheduler; import proj.zoie.api.indexing.OptimizeScheduler.OptimizeType; import proj.zoie.api.indexing.ZoieIndexable; @@ -45,8 +47,8 @@ public class DiskLuceneIndexDataLoader extends LuceneInde private Object _optimizeMonitor; private volatile OptimizeScheduler _optScheduler; - public DiskLuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager idxMgr,Comparator comparator) { - super(analyzer, similarity, idxMgr,comparator); + public DiskLuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager idxMgr,Comparator comparator,Queue lsnrList) { + super(analyzer, similarity, idxMgr,comparator,lsnrList); _lastTimeOptimized=System.currentTimeMillis(); _optimizeMonitor = new Object(); } diff --git a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/LuceneIndexDataLoader.java b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/LuceneIndexDataLoader.java index e83b806a..713c23b0 100644 --- a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/LuceneIndexDataLoader.java +++ b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/LuceneIndexDataLoader.java @@ -26,6 +26,7 @@ import java.util.Comparator; import java.util.LinkedList; import java.util.List; +import java.util.Queue; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; @@ -41,6 +42,7 @@ import proj.zoie.api.ZoieHealth; import proj.zoie.api.ZoieSegmentReader; import proj.zoie.api.indexing.AbstractZoieIndexable; +import proj.zoie.api.indexing.IndexingEventListener; import proj.zoie.api.indexing.ZoieIndexable; import proj.zoie.api.indexing.ZoieIndexable.IndexingReq; @@ -53,12 +55,15 @@ public abstract class LuceneIndexDataLoader implements Da protected final Comparator _versionComparator; private Filter _purgeFilter; - protected LuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager idxMgr,Comparator versionComparator) { + private final Queue _lsnrList; + + protected LuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager idxMgr,Comparator versionComparator,Queue lsnrList) { _analyzer = analyzer; _similarity = similarity; _idxMgr=idxMgr; _versionComparator = versionComparator; _purgeFilter = null; + _lsnrList = lsnrList; } public void setPurgeFilter(Filter purgeFilter){ @@ -187,8 +192,9 @@ public void consume(Collection> events) throws ZoieExce for (List tmpList : addList.values()) { docList.addAll(tmpList); } - idx.updateIndex(delSet, docList, _analyzer,_similarity); + purgeDocuments(); + idx.updateIndex(delSet, docList, _analyzer,_similarity); propagateDeletes(delSet); synchronized(_idxMgr) { @@ -228,6 +234,7 @@ public void loadFromIndex(RAMSearchIndex ramIndex) throws ZoieException idx.clearDeletes(); // clear old deletes as deletes are written to the lucene index // hao: update the disk idx reader idx.refresh(); // load the index reader + purgeDocuments(); idx.markDeletes(ramIndex.getDelDocs()); // inherit deletes idx.commitDeletes(); idx.incrementEventCount(ramIndex.getEventsHandled()); diff --git a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/RAMLuceneIndexDataLoader.java b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/RAMLuceneIndexDataLoader.java index 30ed2db4..ae22f491 100644 --- a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/RAMLuceneIndexDataLoader.java +++ b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/RAMLuceneIndexDataLoader.java @@ -19,17 +19,20 @@ import java.io.IOException; import java.util.Comparator; +import java.util.Queue; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Similarity; +import proj.zoie.api.indexing.IndexingEventListener; + public class RAMLuceneIndexDataLoader extends LuceneIndexDataLoader { - public RAMLuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager idxMgr,Comparator comparator) + public RAMLuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager idxMgr,Comparator comparator,Queue lsnrList) { - super(analyzer, similarity,idxMgr,comparator); + super(analyzer, similarity,idxMgr,comparator,lsnrList); } @Override diff --git a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/RealtimeIndexDataLoader.java b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/RealtimeIndexDataLoader.java index d5b0bf27..520f2a15 100644 --- a/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/RealtimeIndexDataLoader.java +++ b/zoie-core/src/main/java/proj/zoie/impl/indexing/internal/RealtimeIndexDataLoader.java @@ -64,7 +64,7 @@ public RealtimeIndexDataLoader(DiskLuceneIndexDataLoader dataLoader, int batc _analyzer = analyzer; _similarity = similarity; _currentBatchSize = 0; - _ramConsumer = new RAMLuceneIndexDataLoader(_analyzer, _similarity, _idxMgr,comparator); + _ramConsumer = new RAMLuceneIndexDataLoader(_analyzer, _similarity, _idxMgr,comparator,lsnrList); _luceneDataLoader = dataLoader; } diff --git a/zoie-core/src/test/java/proj/zoie/test/ZoieTest.java b/zoie-core/src/test/java/proj/zoie/test/ZoieTest.java index 8bf34a19..96b8e551 100644 --- a/zoie-core/src/test/java/proj/zoie/test/ZoieTest.java +++ b/zoie-core/src/test/java/proj/zoie/test/ZoieTest.java @@ -28,7 +28,9 @@ import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TermQuery; @@ -36,21 +38,20 @@ import org.apache.lucene.util.Version; import org.junit.Test; +import proj.zoie.api.DataConsumer.DataEvent; import proj.zoie.api.DefaultDirectoryManager; import proj.zoie.api.DirectoryManager; import proj.zoie.api.DocIDMapper; +import proj.zoie.api.DocIDMapper.DocIDArray; import proj.zoie.api.UIDDocIdSet; import proj.zoie.api.ZoieException; import proj.zoie.api.ZoieIndexReader; -import proj.zoie.api.DataConsumer.DataEvent; -import proj.zoie.api.DocIDMapper.DocIDArray; - import proj.zoie.api.impl.DocIDMapperImpl; import proj.zoie.api.impl.InRangeDocIDMapperFactory; import proj.zoie.impl.indexing.AsyncDataConsumer; import proj.zoie.impl.indexing.MemoryStreamDataProvider; -import proj.zoie.impl.indexing.ZoieSystem; import proj.zoie.impl.indexing.ZoieConfig; +import proj.zoie.impl.indexing.ZoieSystem; import proj.zoie.impl.indexing.internal.IndexSignature; import proj.zoie.test.data.DataForTests; import proj.zoie.test.mock.MockDataLoader; @@ -96,7 +97,7 @@ public void testIndexWithAnalyzer() throws ZoieException, IOException { File idxDir = getIdxDir(); ZoieSystem idxSystem = createZoie( idxDir, true, 20, new WhitespaceAnalyzer(), null, - ZoieConfig.DEFAULT_VERSION_COMPARATOR); + ZoieConfig.DEFAULT_VERSION_COMPARATOR,false); idxSystem.start(); MemoryStreamDataProvider memoryProvider = new MemoryStreamDataProvider(ZoieConfig.DEFAULT_VERSION_COMPARATOR); @@ -258,6 +259,81 @@ public void testRealtime2() throws ZoieException { } } + @Test + public void testPurgeFilter() throws Exception { + File idxDir = getIdxDir(); + ZoieSystem idxSystem = createZoie( + idxDir, true, ZoieConfig.DEFAULT_VERSION_COMPARATOR,true); + + idxSystem.setPurgeFilter(new QueryWrapperFilter(new MatchAllDocsQuery())); + idxSystem.start(); + + MemoryStreamDataProvider memoryProvider = new MemoryStreamDataProvider(ZoieConfig.DEFAULT_VERSION_COMPARATOR); + memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); + memoryProvider.setDataConsumer(idxSystem); + memoryProvider.start(); + + try { + int count = DataForTests.testdata.length; + List> list = new ArrayList>( + count); + for (int i = 0; i < count; ++i) { + list.add(new DataEvent( + DataForTests.testdata[i], ""+i)); + } + memoryProvider.addEvents(list); + memoryProvider.flush(); + + + idxSystem.flushEvents(10000); + + List> readers = idxSystem + .getIndexReaders(); + + MultiReader multiReader = new MultiReader(readers.toArray(new IndexReader[0]),false); + + IndexSearcher searcher = new IndexSearcher(multiReader); + + int numDocs = searcher.search(new MatchAllDocsQuery(), 10).totalHits; + + searcher.close(); + log.info("numdocs: "+numDocs); + TestCase.assertTrue(numDocs>0); + + idxSystem.returnIndexReaders(readers); + + idxSystem.getAdminMBean().flushToDiskIndex(); + + + idxSystem.refreshDiskReader(); + readers = idxSystem + .getIndexReaders(); + + + multiReader = new MultiReader(readers.toArray(new IndexReader[0]),false); + + searcher = new IndexSearcher(multiReader); + + numDocs = searcher.search(new MatchAllDocsQuery(), 10).totalHits; + + searcher.close(); + + numDocs = multiReader.numDocs(); + + log.info("new numdocs: "+numDocs); + TestCase.assertTrue(numDocs==0); + + idxSystem.returnIndexReaders(readers); + + } catch (IOException ioe) { + throw new ZoieException(ioe.getMessage()); + } finally { + memoryProvider.stop(); + idxSystem.shutdown(); + deleteDirectory(idxDir); + } + } + @Test public void testStore() throws ZoieException { File idxDir = getIdxDir(); diff --git a/zoie-core/src/test/java/proj/zoie/test/ZoieTestCaseBase.java b/zoie-core/src/test/java/proj/zoie/test/ZoieTestCaseBase.java index 757d6e04..b637aa13 100644 --- a/zoie-core/src/test/java/proj/zoie/test/ZoieTestCaseBase.java +++ b/zoie-core/src/test/java/proj/zoie/test/ZoieTestCaseBase.java @@ -20,6 +20,7 @@ import proj.zoie.api.ZoieIndexReader; import proj.zoie.api.impl.InRangeDocIDMapperFactory; import proj.zoie.api.indexing.IndexReaderDecorator; +import proj.zoie.impl.indexing.ReaderCacheFactory; import proj.zoie.impl.indexing.SimpleReaderCache; import proj.zoie.impl.indexing.ZoieConfig; import proj.zoie.impl.indexing.ZoieSystem; @@ -109,6 +110,18 @@ protected static ZoieSystem createZoie(File idxDir,boolean r return createZoie(idxDir, realtime, 20, versionComparator); } + + protected static ZoieSystem createZoie(File idxDir,boolean realtime, Comparator versionComparator,boolean immediateRefresh) + { + return createZoie(idxDir, realtime, 20, versionComparator,immediateRefresh); + } + + protected static ZoieSystem createZoie(File idxDir,boolean realtime, long delay, Comparator versionComparator,boolean immediateRefresh) + { + return createZoie(idxDir,realtime,delay,null,null,versionComparator,immediateRefresh); + } + + /** * @param idxDir * @param realtime @@ -118,12 +131,12 @@ protected static ZoieSystem createZoie(File idxDir,boolean r */ protected static ZoieSystem createZoie(File idxDir,boolean realtime, long delay, Comparator versionComparator) { - return createZoie(idxDir,realtime,delay,null,null,versionComparator); + return createZoie(idxDir,realtime,delay,null,null,versionComparator,false); } protected static ZoieSystem createZoie(File idxDir,boolean realtime,DocIDMapperFactory docidMapperFactory, Comparator versionComparator) { - return createZoie(idxDir, realtime, 2,null,docidMapperFactory, versionComparator); + return createZoie(idxDir, realtime, 2,null,docidMapperFactory, versionComparator,false); } /** @@ -135,7 +148,7 @@ protected static ZoieSystem createZoie(File idxDir,boolean r * @param zoieVersionFactory * @return */ - protected static ZoieSystem createZoie(File idxDir,boolean realtime, long delay,Analyzer analyzer,DocIDMapperFactory docidMapperFactory, Comparator versionComparator) + protected static ZoieSystem createZoie(File idxDir,boolean realtime, long delay,Analyzer analyzer,DocIDMapperFactory docidMapperFactory, Comparator versionComparator,boolean immediateRefresh) { ZoieConfig config = new ZoieConfig(); config.setDocidMapperFactory(docidMapperFactory); @@ -145,8 +158,9 @@ protected static ZoieSystem createZoie(File idxDir,boolean r config.setVersionComparator(versionComparator); config.setSimilarity(null); config.setAnalyzer(null); -// config.setReadercachefactory(SimpleReaderCache.FACTORY); - + if (immediateRefresh){ + config.setReadercachefactory(SimpleReaderCache.FACTORY); + } ZoieSystem idxSystem=new ZoieSystem(idxDir,new DataInterpreterForTests(delay,analyzer), new TestIndexReaderDecorator(),config); return idxSystem;