From 702867412a0fc06aea3ae1ee46174623fceb73db Mon Sep 17 00:00:00 2001 From: Paul Horn Date: Mon, 17 Aug 2015 16:02:28 +0200 Subject: [PATCH] Replace repeated search in Scan Store with continuous DocValues read --- .../api/impl/index/BitmapDocumentFormat.java | 49 ++++- .../api/impl/index/DocValuesAccess.java | 38 ++++ .../api/impl/index/DocValuesCollector.java | 188 ++++++++++++++++++ .../impl/index/HitsPrimitiveLongIterator.java | 57 ------ .../api/impl/index/LongValuesIterator.java | 165 +++++++++++++++ .../LuceneAllEntriesLabelScanReader.java | 26 ++- .../impl/index/LuceneDocumentStructure.java | 2 + .../impl/index/LuceneIndexAccessorReader.java | 7 +- .../api/impl/index/LuceneLabelScanWriter.java | 8 +- .../api/impl/index/PageOfRangesIterator.java | 62 +++--- ...AbstractLuceneIndexAccessorReaderTest.java | 2 +- .../api/impl/index/IndexReaderStub.java | 45 ++++- .../kernel/api/impl/index/LuceneIndexIT.java | 2 +- .../index/LuceneLabelScanStoreWriterTest.java | 71 ++++++- ...eDocumentLabelScanStorageStrategyTest.java | 57 ++++-- .../impl/index/PageOfRangesIteratorTest.java | 73 ++++--- 16 files changed, 703 insertions(+), 149 deletions(-) create mode 100644 community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesAccess.java create mode 100644 community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesCollector.java delete mode 100644 community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/HitsPrimitiveLongIterator.java create mode 100644 community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LongValuesIterator.java diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/BitmapDocumentFormat.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/BitmapDocumentFormat.java index 3988b9411592..3925913fe88f 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/BitmapDocumentFormat.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/BitmapDocumentFormat.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.document.LongField; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; @@ -43,7 +44,16 @@ protected Field createLabelField( String label, long bitmap ) { assert (bitmap & 0xFFFFFFFF00000000L) == 0 : "Tried to store a bitmap as int, but which had values outside int limits"; - return new IntField( label, (int) bitmap, Field.Store.YES ); + return new NumericDocValuesField( label, bitmap ); + } + + @Override + protected void addLabelFields( Document document, String label, long bitmap ) + { + assert (bitmap & 0xFFFFFFFF00000000L) == 0 : + "Tried to store a bitmap as int, but which had values outside int limits"; + document.add( new NumericDocValuesField( label, bitmap ) ); + document.add( new IntField( label, (int) bitmap, Field.Store.YES ) ); } }, _64( BitmapFormat._64 ) @@ -51,7 +61,14 @@ protected Field createLabelField( String label, long bitmap ) @Override protected Field createLabelField( String label, long bitmap ) { - return new LongField( label, bitmap, Field.Store.YES ); + return new NumericDocValuesField( label, bitmap ); + } + + @Override + protected void addLabelFields( Document document, String label, long bitmap ) + { + document.add( new NumericDocValuesField( label, bitmap ) ); + document.add( new LongField( label, bitmap, Field.Store.YES ) ); } }; @@ -96,7 +113,7 @@ public Query labelQuery( long labelId ) public Query rangeQuery( long range ) { - return new TermQuery( new Term( RANGE, Long.toString( range) ) ); + return new TermQuery( new Term( RANGE, Long.toString( range ) ) ); } public IndexableField rangeField( long range ) @@ -104,6 +121,12 @@ public IndexableField rangeField( long range ) return new StringField( RANGE, Long.toString( range ), Field.Store.YES ); } + public void addRangeValuesField( Document doc, long range ) + { + doc.add( rangeField( range ) ); + doc.add( new NumericDocValuesField( RANGE, range ) ); + } + public IndexableField labelField( long key, long bitmap ) { return createLabelField( label( key ), bitmap ); @@ -111,15 +134,17 @@ public IndexableField labelField( long key, long bitmap ) protected abstract Field createLabelField( String label, long bitmap ); - public void addLabelField( Document document, long label, Bitmap bitmap ) + protected abstract void addLabelFields( Document document, String label, long bitmap ); + + public void addLabelAndSearchFields( Document document, long label, Bitmap bitmap ) { - document.add( labelField( label, bitmap ) ); + addLabelFields( document, label( label ), bitmap.bitmap() ); document.add( labelSearchField( label ) ); } public IndexableField labelSearchField( long label ) { - return new StringField( LABEL, Long.toString( label ), Field.Store.NO ); + return new StringField( LABEL, Long.toString( label ), Field.Store.YES ); } public IndexableField labelField( long key, Bitmap value ) @@ -147,12 +172,22 @@ public Term rangeTerm( Document document ) return new Term( RANGE, document.get( RANGE ) ); } - public boolean isRangeField( IndexableField field ) + public boolean isRangeOrLabelField( IndexableField field ) { String fieldName = field.name(); return RANGE.equals( fieldName ) || LABEL.equals( fieldName ); } + public boolean isRangeField( IndexableField field ) + { + return RANGE.equals( field.name() ); + } + + public boolean isLabelBitmapField( IndexableField field ) + { + return !isRangeOrLabelField( field ); + } + public Bitmap readBitmap( IndexableField field ) { return new Bitmap( bitmap( field ) ); diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesAccess.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesAccess.java new file mode 100644 index 000000000000..6a96b1081b08 --- /dev/null +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesAccess.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2002-2015 "Neo Technology," + * Network Engine for Objects in Lund AB [http://neotechnology.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.index; + +/** + * Represents a point-in-time view on a set of numeric values + * that are read from a {@code NumericDocValues} field. + */ +interface DocValuesAccess +{ + /** + * @return the current value of the main field that is driving the values. + */ + long current(); + + /** + * @return the value of an additional sidecar field. + * @throws IllegalStateException if no such field is indexed. + */ + long getValue( String field ); +} diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesCollector.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesCollector.java new file mode 100644 index 000000000000..5ebe82f21c35 --- /dev/null +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesCollector.java @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2002-2015 "Neo Technology," + * Network Engine for Objects in Lund AB [http://neotechnology.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.index; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.util.RoaringDocIdSet; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Collector to record per-segment {@code DocIdSet}s and {@code LeafReaderContext}s for every + * segment that contains a hit. Those items can be later used to read {@code DocValues} fields + * and iterate over the matched {@code DocIdSet}s. This collector is different from + * {@code org.apache.lucene.search.CachingCollector} in that the later focuses on predictable RAM usage + * and feeding other collectors while this collector focuses on exposing the required per-segment data structures + * to the user. + */ +public final class DocValuesCollector extends SimpleCollector +{ + + private LeafReaderContext context; + private int segmentHits; + private int totalHits; + private final List matchingDocs = new ArrayList<>(); + private Docs docs; + + /** + * @return the documents matched by the query, one {@link MatchingDocs} per visited segment that contains a hit. + */ + public List getMatchingDocs() + { + if ( docs != null && segmentHits > 0 ) + { + matchingDocs.add( new MatchingDocs( this.context, docs.getDocIdSet(), segmentHits ) ); + docs = null; + context = null; + } + + return Collections.unmodifiableList( matchingDocs ); + } + + /** + * @return the total number of hits across all segments. + */ + public int getTotalHits() + { + return totalHits; + } + + @Override + public final void collect( int doc ) throws IOException + { + docs.addDoc( doc ); + segmentHits++; + totalHits++; + } + + @Override + public boolean needsScores() + { + return false; + } + + @Override + protected void doSetNextReader( LeafReaderContext context ) throws IOException + { + if ( docs != null && segmentHits > 0 ) + { + matchingDocs.add( new MatchingDocs( this.context, docs.getDocIdSet(), segmentHits ) ); + } + docs = createDocs( context.reader().maxDoc() ); + segmentHits = 0; + this.context = context; + } + + /** + * @return a new {@link Docs} to record hits. + */ + private Docs createDocs( final int maxDoc ) + { + return new Docs( maxDoc ); + } + + /** + * Holds the documents that were matched per segment. + */ + public final static class MatchingDocs + { + + /** The {@code LeafReaderContext} for this segment. */ + public final LeafReaderContext context; + + /** Which documents were seen. */ + public final DocIdSet docIdSet; + + /** Total number of hits */ + public final int totalHits; + + public MatchingDocs( LeafReaderContext context, DocIdSet docIdSet, int totalHits ) + { + this.context = context; + this.docIdSet = docIdSet; + this.totalHits = totalHits; + } + + /** + * @return the {@code NumericDocValues} for a given field + * @throws IllegalArgumentException if this field is not indexed with numeric doc values + */ + public NumericDocValues readDocValues( String field ) + { + try + { + NumericDocValues dv = context.reader().getNumericDocValues( field ); + if ( dv == null ) + { + FieldInfo fi = context.reader().getFieldInfos().fieldInfo( field ); + DocValuesType actual = null; + if ( fi != null ) + { + actual = fi.getDocValuesType(); + } + throw new IllegalStateException( + "The field '" + field + "' is not indexed properly, expected NumericDV, but got '" + + actual + "'" ); + } + return dv; + } + catch ( IOException e ) + { + throw new RuntimeException( e ); + } + } + } + + /** + * Used during collection to record matching docs and then return a + * {@see DocIdSet} that contains them. + */ + private static final class Docs + { + private final RoaringDocIdSet.Builder bits; + + public Docs( int maxDoc ) + { + bits = new RoaringDocIdSet.Builder( maxDoc ); + } + + /** Record the given document. */ + public void addDoc( int docId ) + { + bits.add( docId ); + } + + ; + + /** Return the {@see DocIdSet} which contains all the recorded docs. */ + public DocIdSet getDocIdSet() + { + return bits.build(); + } + } +} diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/HitsPrimitiveLongIterator.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/HitsPrimitiveLongIterator.java deleted file mode 100644 index 223fb0c772e1..000000000000 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/HitsPrimitiveLongIterator.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2002-2015 "Neo Technology," - * Network Engine for Objects in Lund AB [http://neotechnology.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.kernel.api.impl.index; - -import java.io.IOException; - -import org.neo4j.collection.primitive.PrimitiveLongCollections.PrimitiveLongBaseIterator; -import org.neo4j.index.impl.lucene.Hits; - -public class HitsPrimitiveLongIterator extends PrimitiveLongBaseIterator -{ - private final Hits hits; - private final int size; - private int index; - private final LuceneDocumentStructure documentStructure; - - public HitsPrimitiveLongIterator( Hits hits, LuceneDocumentStructure documentStructure ) - { - this.hits = hits; - this.documentStructure = documentStructure; - size = hits.length(); - } - - @Override - protected boolean fetchNext() - { - if ( index < size ) - { - try - { - return next( documentStructure.getNodeId( hits.doc( index++ ) ) ); - } - catch ( IOException e ) - { - throw new RuntimeException( e ); - } - } - return false; - } -} diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LongValuesIterator.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LongValuesIterator.java new file mode 100644 index 000000000000..94797eb1e2eb --- /dev/null +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LongValuesIterator.java @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2002-2015 "Neo Technology," + * Network Engine for Objects in Lund AB [http://neotechnology.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.index; + +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.neo4j.collection.primitive.PrimitiveLongCollections.PrimitiveLongBaseIterator; + +/** + * Iterates over all per-segment {@link DocValuesCollector.MatchingDocs}. Supports two kinds of lookups. + * One, iterate over all long values of the given field (constructor argument). + * Two, lookup a value for the current doc in a sidecar {@code NumericDocValues} field. + * That is, this iterator has a main field, that drives the iteration and allow for lookups + * in other, secondary fields based on the current document of the main iteration. + * + * Lookups from this class are not thread-safe. Races can happen when the segment barrier + * is crossed; one thread might think it is reading from one segment while another thread has + * already advanced this Iterator to the next segment, having raced the first thread. + */ +public class LongValuesIterator extends PrimitiveLongBaseIterator implements DocValuesAccess +{ + private final Iterator matchingDocs; + private final String field; + private final int size; + private DocIdSetIterator currentDisi; + private NumericDocValues currentDocValues; + private DocValuesCollector.MatchingDocs currentDocs; + private final Map docValuesCache; + + private int index = 0; + + /** + * @param allMatchingDocs all {@link DocValuesCollector.MatchingDocs} across all segments + * @param totalHits the total number of hits across all segments + * @param field the main field, whose values drive the iteration + */ + public LongValuesIterator( Iterable allMatchingDocs, int totalHits, String field ) + { + this.size = totalHits; + this.field = field; + matchingDocs = allMatchingDocs.iterator(); + docValuesCache = new HashMap<>(); + } + + /** + * @return the number of docs left in this iterator. + */ + public int remaining() + { + return size - index; + } + + @Override + public long current() + { + return next; + } + + @Override + public long getValue( String field ) + { + if ( ensureValidDisi() ) + { + if ( docValuesCache.containsKey( field ) ) + { + return docValuesCache.get( field ).get( currentDisi.docID() ); + } + + NumericDocValues docValues = currentDocs.readDocValues( field ); + docValuesCache.put( field, docValues ); + + return docValues.get( currentDisi.docID() ); + } + else + { + // same as DocValues.emptyNumeric()#get + // which means, getValue carries over the semantics of NDV + // -1 would also be a possibility here. + return 0; + } + } + + @Override + protected boolean fetchNext() + { + try + { + if ( ensureValidDisi() ) + { + int nextDoc = currentDisi.nextDoc(); + if ( nextDoc != DocIdSetIterator.NO_MORE_DOCS ) + { + index++; + return next( currentDocValues.get( nextDoc ) ); + } + else + { + currentDisi = null; + return fetchNext(); + } + } + } + catch ( IOException e ) + { + throw new RuntimeException( e ); + } + + return false; + } + + /** + * @return true if it was able to make sure, that currentDisi is valid + */ + private boolean ensureValidDisi() + { + try + { + while ( currentDisi == null ) + { + if ( matchingDocs.hasNext() ) + { + currentDocs = matchingDocs.next(); + currentDisi = currentDocs.docIdSet.iterator(); + if ( currentDisi != null ) + { + docValuesCache.clear(); + currentDocValues = currentDocs.readDocValues( field ); + } + } + else + { + return false; + } + } + return true; + } + catch ( IOException e ) + { + throw new RuntimeException( e ); + } + } +} diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneAllEntriesLabelScanReader.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneAllEntriesLabelScanReader.java index bd95cf84fb25..eb1935c9a637 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneAllEntriesLabelScanReader.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneAllEntriesLabelScanReader.java @@ -19,6 +19,7 @@ */ package org.neo4j.kernel.api.impl.index; +import java.util.Arrays; import java.util.Iterator; import java.util.List; @@ -82,8 +83,9 @@ private LuceneNodeLabelRange parse( int id, Document document ) { List fields = document.getFields(); - long[] labelIds = new long[fields.size() - 1]; - Bitmap[] bitmaps = new Bitmap[fields.size() - 1]; + int expectedLabelFields = fields.size() - 1; + long[] scratchLabelIds = new long[expectedLabelFields]; + Bitmap[] scratchBitmaps = new Bitmap[expectedLabelFields]; int i = 0; long rangeId = -1; @@ -93,14 +95,28 @@ private LuceneNodeLabelRange parse( int id, Document document ) { rangeId = format.rangeOf( field ); } - else + else if ( format.isLabelBitmapField( field ) ) { - labelIds[i] = format.labelId( field ); - bitmaps[i] = format.readBitmap( field ); + scratchLabelIds[i] = format.labelId( field ); + scratchBitmaps[i] = format.readBitmap( field ); i++; } } assert (rangeId >= 0); + + final long[] labelIds; + final Bitmap[] bitmaps; + if (i < expectedLabelFields) + { + labelIds = Arrays.copyOf( scratchLabelIds, i ); + bitmaps = Arrays.copyOf( scratchBitmaps, i ); + } + else + { + labelIds = scratchLabelIds; + bitmaps = scratchBitmaps; + } + return LuceneNodeLabelRange.fromBitmapStructure( id, labelIds, getLongs( bitmaps, rangeId ) ); } diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneDocumentStructure.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneDocumentStructure.java index bddcc49918fb..05ff96cd9143 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneDocumentStructure.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneDocumentStructure.java @@ -22,6 +22,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; @@ -48,6 +49,7 @@ Document newDocument( long nodeId ) { Document document = new Document(); document.add( field( NODE_ID_KEY, "" + nodeId, YES ) ); + document.add( new NumericDocValuesField( NODE_ID_KEY, nodeId ) ); return document; } diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneIndexAccessorReader.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneIndexAccessorReader.java index f90a4a207eb6..c80cc3134067 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneIndexAccessorReader.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneIndexAccessorReader.java @@ -39,7 +39,6 @@ import org.neo4j.collection.primitive.PrimitiveLongIterator; import org.neo4j.helpers.CancellationRequest; -import org.neo4j.index.impl.lucene.Hits; import org.neo4j.kernel.api.exceptions.index.IndexNotFoundKernelException; import org.neo4j.kernel.api.impl.index.LuceneDocumentStructure.ValueEncoding; import org.neo4j.kernel.api.index.IndexReader; @@ -221,8 +220,10 @@ protected PrimitiveLongIterator query( Query query ) { try { - Hits hits = new Hits( searcher, query, null ); - return new HitsPrimitiveLongIterator( hits, documentLogic ); + DocValuesCollector docValuesCollector = new DocValuesCollector(); + searcher.search( query, docValuesCollector ); + return new LongValuesIterator( + docValuesCollector.getMatchingDocs(), docValuesCollector.getTotalHits(), NODE_ID_KEY ); } catch ( IOException e ) { diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneLabelScanWriter.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneLabelScanWriter.java index 5627422f5f3a..51cad99f33dc 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneLabelScanWriter.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/LuceneLabelScanWriter.java @@ -119,7 +119,7 @@ public void close() throws IOException Document document = searcher.doc( hitCollector.getMatchedDoc() ); for ( IndexableField field : document.getFields() ) { - if ( !format.isRangeField( field ) ) + if ( !format.isRangeOrLabelField( field ) ) { Long label = Long.valueOf( field.name() ); fields.put( label, format.readBitmap( field ) ); @@ -140,7 +140,7 @@ private void flush() throws IOException, IndexCapacityExceededException updateFields( updates, fields ); Document document = new Document(); - document.add( format.rangeField( currentRange ) ); + format.addRangeValuesField( document, currentRange ); for ( Map.Entry field : fields.entrySet() ) { @@ -148,7 +148,7 @@ private void flush() throws IOException, IndexCapacityExceededException Bitmap value = field.getValue(); if ( value.hasContent() ) { - format.addLabelField( document, field.getKey(), value ); + format.addLabelAndSearchFields( document, field.getKey(), value ); } } @@ -167,7 +167,7 @@ private boolean isEmpty( Document document ) { for ( IndexableField fieldable : document.getFields() ) { - if ( !format.isRangeField( fieldable ) ) + if ( !format.isRangeOrLabelField( fieldable ) ) { return false; } diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/PageOfRangesIterator.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/PageOfRangesIterator.java index 85419cebcb22..da3a1beed06a 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/PageOfRangesIterator.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/PageOfRangesIterator.java @@ -21,11 +21,8 @@ import java.io.IOException; -import org.apache.lucene.document.Document; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; import org.neo4j.collection.primitive.PrimitiveLongIterator; import org.neo4j.helpers.collection.PrefetchingIterator; @@ -39,7 +36,7 @@ class PageOfRangesIterator extends PrefetchingIterator private final BitmapDocumentFormat format; private final int rangesPerPage; private final int[] labels; - private ScoreDoc lastDoc; + private LongValuesIterator rangesIterator; PageOfRangesIterator( BitmapDocumentFormat format, IndexSearcher searcher, int rangesPerPage, Query query, int... labels ) @@ -62,42 +59,51 @@ protected PrimitiveLongIterator fetchNextOrNull() { return null; // we are done searching with this iterator } + + LongValuesIterator ranges = getRanges(); + int pageSize = Math.min( ranges.remaining(), rangesPerPage ); + long[] rangeMap = new long[pageSize * 2]; + + for ( int i = 0; i < pageSize; i++ ) + { + long range = ranges.next(); + rangeMap[i * 2] = range; + rangeMap[i * 2 + 1] = labeledBitmap( ranges ); + } + + if ( pageSize < rangesPerPage ) // not a full page => this is the last page (optimization) + { + searcher = null; // avoid searching again + } + return new LongPageIterator( new BitmapExtractor( format.bitmapFormat(), rangeMap ) ); + } + + private LongValuesIterator getRanges() { + if ( rangesIterator != null ) + { + return rangesIterator; + } try { - TopDocs docs = searcher.searchAfter( lastDoc, query, rangesPerPage ); - lastDoc = null; - int docCount = docs != null ? docs.scoreDocs.length : 0; - if ( docCount == 0 ) - { - searcher = null; // avoid searching again - return null; - } - lastDoc = docs.scoreDocs[docCount - 1]; - long[] rangeMap = new long[docCount * 2]; - for ( int i = 0; i < docCount; i++ ) - { - Document doc = searcher.doc( docs.scoreDocs[i].doc ); - rangeMap[i * 2] = format.rangeOf( doc ); - rangeMap[i * 2 + 1] = labeledBitmap( doc ); - } - if ( docCount < rangesPerPage ) // not a full page => this is the last page (optimization) - { - searcher = null; // avoid searching again - } - return new LongPageIterator( new BitmapExtractor( format.bitmapFormat(), rangeMap ) ); + DocValuesCollector docValuesCollector = new DocValuesCollector(); + searcher.search( query, docValuesCollector ); + rangesIterator = new LongValuesIterator( + docValuesCollector.getMatchingDocs(), docValuesCollector.getTotalHits(), BitmapDocumentFormat + .RANGE ); + return rangesIterator; } catch ( IOException e ) { - throw new RuntimeException( e ); // TODO: something better? + throw new RuntimeException( e ); } } - private long labeledBitmap( Document doc ) + private long labeledBitmap( DocValuesAccess doc ) { long bitmap = -1; for ( int label : labels ) { - bitmap &= format.mapOf( doc, label ); + bitmap &= doc.getValue( format.label( label ) ); } return bitmap; } diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/AbstractLuceneIndexAccessorReaderTest.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/AbstractLuceneIndexAccessorReaderTest.java index cfebd282a8de..e99188bdacdb 100644 --- a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/AbstractLuceneIndexAccessorReaderTest.java +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/AbstractLuceneIndexAccessorReaderTest.java @@ -129,7 +129,7 @@ public void shouldUseCorrectLuceneQueryForRangeSeekByPrefixQuery() throws Except public void shouldUseCorrectLuceneQueryForScanQuery() throws Exception { // Given - when( documentLogic.newScanQuery() ).thenReturn( mock( MatchAllDocsQuery.class) ); + when( documentLogic.newScanQuery() ).thenCallRealMethod(); // When accessor.scan(); diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/IndexReaderStub.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/IndexReaderStub.java index 05539d5c0d30..70f97464a269 100644 --- a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/IndexReaderStub.java +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/IndexReaderStub.java @@ -32,13 +32,24 @@ import org.apache.lucene.util.Bits; import java.io.IOException; +import java.util.Map; + +import org.neo4j.function.Function; public class IndexReaderStub extends LeafReader { private Fields fields; private boolean allDeleted; - private String[] elements; + private String[] elements = new String[0]; + private Function ndvs = new Function() + { + @Override + public NumericDocValues apply( String s ) + { + return DocValues.emptyNumeric(); + } + }; private IOException throwOnFields; @@ -51,7 +62,35 @@ public IndexReaderStub( boolean allDeleted, final String... elements ) public IndexReaderStub( Fields fields ) { this.fields = fields; - this.elements = new String[0]; + } + + public IndexReaderStub( final NumericDocValues ndv ) + { + this.ndvs = new Function() + { + @Override + public NumericDocValues apply( String s ) + { + return ndv; + } + }; + } + + public IndexReaderStub( final Map ndvs ) + { + this.ndvs = new Function() + { + @Override + public NumericDocValues apply( String s ) + { + NumericDocValues dv = ndvs.get( s ); + if ( dv == null ) + { + return DocValues.emptyNumeric(); + } + return dv; + } + }; } public void throwOnNextFieldsAccess( IOException throwOnFields ) @@ -91,7 +130,7 @@ public Fields fields() throws IOException @Override public NumericDocValues getNumericDocValues( String field ) throws IOException { - return DocValues.emptyNumeric(); + return ndvs.apply( field ); } @Override diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneIndexIT.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneIndexIT.java index 9e342e6fe32f..80d206541a6f 100644 --- a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneIndexIT.java +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneIndexIT.java @@ -60,7 +60,7 @@ public void shouldProvideStoreSnapshot() throws Exception // When & Then try ( ResourceIterator snapshot = accessor.snapshotFiles() ) { - assertThat( asUniqueSetOfNames( snapshot ), equalTo( asSet( "_0.cfs", "segments_1" ) ) ); + assertThat( asUniqueSetOfNames( snapshot ), equalTo( asSet( "_0.cfe", "_0.cfs", "_0.si", "segments_1" ) ) ); } } diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneLabelScanStoreWriterTest.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneLabelScanStoreWriterTest.java index 632f75891957..7fe9948da78c 100644 --- a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneLabelScanStoreWriterTest.java +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/LuceneLabelScanStoreWriterTest.java @@ -20,11 +20,16 @@ package org.neo4j.kernel.api.impl.index; import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.junit.Test; @@ -211,6 +216,27 @@ public IndexSearcher acquireSearcher() { private final Map docIds = new HashMap<>(); + @Override + public void search( Query query, Collector results ) throws IOException + { + Document document = storage.get( ((TermQuery) query).getTerm() ); + if ( document == null ) + { + return; + } + + final int docId = new Random().nextInt(); + docIds.put( docId, document ); + try { + LeafCollector leafCollector = results.getLeafCollector( leafContexts.get( 0 ) ); + Scorer scorer = new ConstantScoreScorer( null, 10f, new OneDocIdIterator( docId ) ); + leafCollector.setScorer( scorer ); + leafCollector.collect( docId ); + } catch ( CollectionTerminatedException swallow ) { + // swallow + } + } + @Override public TopDocs search( Query query, int n ) { @@ -249,4 +275,47 @@ public Document getDocument( Term term ) return storage.get( term ); } } + + private final class OneDocIdIterator extends DocIdSetIterator { + private final int target; + private int currentDoc = -1; + private boolean exhausted; + + public OneDocIdIterator(int docId) + { + target = docId; + } + + @Override + public int docID() + { + return currentDoc; + } + + @Override + public int nextDoc() throws IOException + { + return advance( currentDoc + 1 ); + } + + @Override + public int advance( int target ) throws IOException + { + if ( exhausted || target > this.target ) + { + return currentDoc = DocIdSetIterator.NO_MORE_DOCS; + } + else + { + exhausted = true; + return currentDoc = this.target; + } + } + + @Override + public long cost() + { + return 1L; + } + } } diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/NodeRangeDocumentLabelScanStorageStrategyTest.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/NodeRangeDocumentLabelScanStorageStrategyTest.java index a92d67e24352..c067289c3f3e 100644 --- a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/NodeRangeDocumentLabelScanStorageStrategyTest.java +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/NodeRangeDocumentLabelScanStorageStrategyTest.java @@ -21,7 +21,10 @@ import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.hamcrest.Description; @@ -29,6 +32,8 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; import java.util.ArrayList; import java.util.Arrays; @@ -37,13 +42,16 @@ import java.util.Map; import java.util.concurrent.locks.Lock; +import org.neo4j.kernel.api.impl.index.bitmaps.Bitmap; + +import static org.mockito.Matchers.any; import static org.mockito.Matchers.argThat; import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; - import static org.neo4j.kernel.api.impl.index.PageOfRangesIteratorTest.docs; import static org.neo4j.kernel.api.impl.index.PageOfRangesIteratorTest.document; import static org.neo4j.kernel.api.labelscan.NodeLabelUpdate.labelChanges; @@ -114,10 +122,10 @@ public void shouldCreateNewDocumentsForNewlyLabeledNodes() throws Exception public void shouldUpdateDocumentsForReLabeledNodes() throws Exception { // given - LabelScanStorageStrategy.StorageService storage = storage( - document( format.rangeField( 0 ), - format.labelField( 7, 0x70 ) ) - ); + Document givenDoc = new Document(); + format.addRangeValuesField( givenDoc, 0 ); + format.addLabelFields( givenDoc, "7", 0x70L ); + LabelScanStorageStrategy.StorageService storage = storage(givenDoc); LuceneLabelScanWriter writer = new LuceneLabelScanWriter( storage, format, mock( Lock.class ) ); @@ -126,11 +134,11 @@ public void shouldUpdateDocumentsForReLabeledNodes() throws Exception writer.close(); // then - verify( storage ).updateDocument( eq( format.rangeTerm( 0 ) ), - match( document( format.rangeField( 0 ), - format.labelField( 7, 0x71 ), - format.labelField( 8, 0x01 ), - format.labelSearchField( 8 ) ) ) ); + Document thenDoc = new Document(); + format.addRangeValuesField( thenDoc, 0 ); + format.addLabelFields( thenDoc, "7", 0x71L ); + format.addLabelAndSearchFields( thenDoc, 8, new Bitmap( 0x01L ) ); + verify( storage ).updateDocument( eq( format.rangeTerm( 0 ) ), match( thenDoc ) ); } @Test @@ -212,10 +220,11 @@ public void shouldStoreAnyNodeIdInRange() throws Exception writer.close(); // then + Document document = new Document(); + format.addRangeValuesField( document, 0 ); + format.addLabelAndSearchFields( document, 7, new Bitmap( 1L << i ) ); verify( storage ).updateDocument( eq( format.rangeTerm( 0 ) ), - match( document( format.rangeField( 0 ), - format.labelField( 7, 1L << i ), - format.labelSearchField( 7 ) ) ) ); + match( document ) ); } } @@ -241,6 +250,28 @@ private LabelScanStorageStrategy.StorageService storage( Document... documents ) when( storage.acquireSearcher() ).thenReturn( searcher ); for ( int i = 0; i < documents.length; i++ ) { + final int docId = i; + doAnswer( new Answer() + { + @Override + public Void answer( InvocationOnMock invocation ) throws Throwable + { + Collector collector = (Collector) invocation.getArguments()[1]; + LeafCollector leafCollector = collector.getLeafCollector( null ); + try + { + leafCollector.collect( docId ); + } + catch ( CollectionTerminatedException swallow ) + { + // swallow + } + return null; + } + } ).when( searcher ).search( + eq( new TermQuery( format.rangeTerm( documents[i] ) ) ), + any( Collector.class ) + ); when( searcher.search( new TermQuery( format.rangeTerm( documents[i] ) ), 1 ) ) .thenReturn( docs( new ScoreDoc( i, 0.0f ) ) ); when( searcher.doc( i ) ).thenReturn( documents[i] ); diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/PageOfRangesIteratorTest.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/PageOfRangesIteratorTest.java index 1685fcaa04ab..4913b87d3974 100644 --- a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/PageOfRangesIteratorTest.java +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/index/PageOfRangesIteratorTest.java @@ -19,11 +19,10 @@ */ package org.neo4j.kernel.api.impl.index; -import java.util.ArrayList; -import java.util.List; - import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; @@ -31,23 +30,27 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.mockito.ArgumentCaptor; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import org.neo4j.collection.primitive.PrimitiveLongIterator; +import org.neo4j.helpers.collection.MapUtil; import static java.util.Arrays.asList; - import static org.junit.Assert.assertEquals; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.eq; import static org.mockito.Matchers.same; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; - import static org.neo4j.collection.primitive.PrimitiveLongCollections.concat; import static org.neo4j.helpers.collection.IteratorUtil.primitivesList; @@ -56,10 +59,10 @@ * * @see NodeRangeDocumentLabelScanStorageStrategyTest for tests for writing. */ -@RunWith(Parameterized.class) +@RunWith( Parameterized.class ) public class PageOfRangesIteratorTest { - @Parameterized.Parameters(name = "{0} bits") + @Parameterized.Parameters( name = "{0} bits" ) public static List formats() { ArrayList parameters = new ArrayList<>(); @@ -85,19 +88,36 @@ public void shouldReadPagesOfDocumentsFromSearcher() throws Exception // given Query query = mock( Query.class ); IndexSearcher searcher = mock( IndexSearcher.class ); - ScoreDoc doc1 = new ScoreDoc( 37, 0.0f ); - ScoreDoc doc2 = new ScoreDoc( 16, 0.0f ); - ScoreDoc doc3 = new ScoreDoc( 11, 0.0f ); - when( searcher.searchAfter( any( ScoreDoc.class ), same( query ), anyInt() ) ).thenReturn( - docs( doc1, doc2 ), // page1 - docs( doc3 ) // page2 - ); - when( searcher.doc( 37 ) ).thenReturn( document( format.rangeField( 0x1 ), - format.labelField( labelId, 0x01 ) ) ); - when( searcher.doc( 16 ) ).thenReturn( document( format.rangeField( 0x2 ), - format.labelField( labelId, 0x03 ) ) ); - when( searcher.doc( 11 ) ).thenReturn( document( format.rangeField( 0x3 ), - format.labelField( labelId, 0x30 ) ) ); + + NumericDocValues rangeNDV = mock( NumericDocValues.class ); + when( rangeNDV.get( 11 ) ).thenReturn( 0x1L ); + when( rangeNDV.get( 16 ) ).thenReturn( 0x2L ); + when( rangeNDV.get( 37 ) ).thenReturn( 0x3L ); + + NumericDocValues labelNDV = mock( NumericDocValues.class ); + when( labelNDV.get( 11 ) ).thenReturn( 0x01L ); + when( labelNDV.get( 16 ) ).thenReturn( 0x03L ); + when( labelNDV.get( 37 ) ).thenReturn( 0x30L ); + + Map docValues = + MapUtil.genericMap( "range", rangeNDV, "7", labelNDV ); + IndexReaderStub reader = new IndexReaderStub( docValues ); + reader.setElements( new String[]{"11", "16", "37"} ); + final LeafReaderContext context = reader.getContext(); + + doAnswer( new Answer() + { + @Override + public Void answer( InvocationOnMock invocation ) throws Throwable + { + DocValuesCollector collector = (DocValuesCollector) invocation.getArguments()[1]; + collector.doSetNextReader( context ); + collector.collect( 11 ); + collector.collect( 16 ); + collector.collect( 37 ); + return null; + } + } ).when( searcher ).search( same( query ), any( DocValuesCollector.class ) ); PrimitiveLongIterator iterator = concat( new PageOfRangesIterator( format, searcher, pageSize, query, labelId ) ); @@ -112,12 +132,13 @@ public void shouldReadPagesOfDocumentsFromSearcher() throws Exception /*doc3:*/(3L << format.bitmapFormat().shift) + 4, (3L << format.bitmapFormat().shift) + 5 ), longs ); - ArgumentCaptor prefixCollector = ArgumentCaptor.forClass( ScoreDoc.class ); - verify( searcher, times( 2 ) ).searchAfter( prefixCollector.capture(), same( query ), eq( 2 ) ); - assertEquals( asList( null, doc2 ), prefixCollector.getAllValues() ); + verify( searcher, times( 1 ) ).search( same( query ), any( DocValuesCollector.class ) ); + verify( rangeNDV, times( 3 ) ).get( anyInt() ); + verify( labelNDV, times( 3 ) ).get( anyInt() ); - verify( searcher, times( 3 ) ).doc( anyInt() ); verifyNoMoreInteractions( searcher ); + verifyNoMoreInteractions( labelNDV ); + verifyNoMoreInteractions( rangeNDV ); } static TopDocs docs( ScoreDoc... docs )