Replace repeated search in Scan Store with continuous DocValues read

neo4j · Dec 5, 2015 · 7028674 · 7028674
1 parent 0934095
commit 7028674
Show file tree

Hide file tree

Showing 16 changed files with 703 additions and 149 deletions.
diff --git a/...nity/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/BitmapDocumentFormat.java b/...nity/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/BitmapDocumentFormat.java
@@ -23,6 +23,7 @@
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.IntField;
 import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
@@ -43,15 +44,31 @@ protected Field createLabelField( String label, long bitmap )
         {
             assert (bitmap & 0xFFFFFFFF00000000L) == 0 :
                 "Tried to store a bitmap as int, but which had values outside int limits";
-            return new IntField( label, (int) bitmap, Field.Store.YES );
+            return new NumericDocValuesField( label, bitmap );
+        }
+
+        @Override
+        protected void addLabelFields( Document document, String label, long bitmap )
+        {
+            assert (bitmap & 0xFFFFFFFF00000000L) == 0 :
+                    "Tried to store a bitmap as int, but which had values outside int limits";
+            document.add( new NumericDocValuesField( label, bitmap ) );
+            document.add( new IntField( label, (int) bitmap, Field.Store.YES ) );
         }
     },
     _64( BitmapFormat._64 )
     {
         @Override
         protected Field createLabelField( String label, long bitmap )
         {
-            return new LongField( label, bitmap, Field.Store.YES );
+            return new NumericDocValuesField( label, bitmap );
+        }
+
+        @Override
+        protected void addLabelFields( Document document, String label, long bitmap )
+        {
+            document.add( new NumericDocValuesField( label, bitmap ) );
+            document.add( new LongField( label, bitmap, Field.Store.YES ) );
         }
     };
 
@@ -96,30 +113,38 @@ public Query labelQuery( long labelId )
 
     public Query rangeQuery( long range )
     {
-        return new TermQuery( new Term( RANGE, Long.toString( range) ) );
+        return new TermQuery( new Term( RANGE, Long.toString( range ) ) );
     }
 
     public IndexableField rangeField( long range )
     {
         return new StringField( RANGE, Long.toString( range ), Field.Store.YES );
     }
 
+    public void addRangeValuesField( Document doc, long range )
+    {
+        doc.add( rangeField( range ) );
+        doc.add( new NumericDocValuesField( RANGE, range ) );
+    }
+
     public IndexableField labelField( long key, long bitmap )
     {
         return createLabelField( label( key ), bitmap );
     }
 
     protected abstract Field createLabelField( String label, long bitmap );
 
-    public void addLabelField( Document document, long label, Bitmap bitmap )
+    protected abstract void addLabelFields( Document document, String label, long bitmap );
+
+    public void addLabelAndSearchFields( Document document, long label, Bitmap bitmap )
     {
-        document.add( labelField( label, bitmap ) );
+        addLabelFields( document, label( label ), bitmap.bitmap() );
         document.add( labelSearchField( label ) );
     }
 
     public IndexableField labelSearchField( long label )
     {
-        return new StringField( LABEL, Long.toString( label ), Field.Store.NO );
+        return new StringField( LABEL, Long.toString( label ), Field.Store.YES );
     }
 
     public IndexableField labelField( long key, Bitmap value )
@@ -147,12 +172,22 @@ public Term rangeTerm( Document document )
         return new Term( RANGE, document.get( RANGE ) );
     }
 
-    public boolean isRangeField( IndexableField field )
+    public boolean isRangeOrLabelField( IndexableField field )
     {
         String fieldName = field.name();
         return RANGE.equals( fieldName ) || LABEL.equals( fieldName );
     }
 
+    public boolean isRangeField( IndexableField field )
+    {
+        return RANGE.equals( field.name() );
+    }
+
+    public boolean isLabelBitmapField( IndexableField field )
+    {
+        return !isRangeOrLabelField( field );
+    }
+
     public Bitmap readBitmap( IndexableField field )
     {
         return new Bitmap( bitmap( field ) );

diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesAccess.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesAccess.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2002-2015 "Neo Technology,"
+ * Network Engine for Objects in Lund AB [http://neotechnology.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package org.neo4j.kernel.api.impl.index;
+
+/**
+ * Represents a point-in-time view on a set of numeric values
+ * that are read from a {@code NumericDocValues} field.
+ */
+interface DocValuesAccess
+{
+    /**
+     * @return the current value of the main field that is driving the values.
+     */
+    long current();
+
+    /**
+     * @return the value of an additional sidecar field.
+     * @throws IllegalStateException if no such field is indexed.
+     */
+    long getValue( String field );
+}
diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesCollector.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/DocValuesCollector.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2002-2015 "Neo Technology,"
+ * Network Engine for Objects in Lund AB [http://neotechnology.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package org.neo4j.kernel.api.impl.index;
+
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.util.RoaringDocIdSet;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Collector to record per-segment {@code DocIdSet}s and {@code LeafReaderContext}s for every
+ * segment that contains a hit. Those items can be later used to read {@code DocValues} fields
+ * and iterate over the matched {@code DocIdSet}s. This collector is different from
+ * {@code org.apache.lucene.search.CachingCollector} in that the later focuses on predictable RAM usage
+ * and feeding other collectors while this collector focuses on exposing the required per-segment data structures
+ * to the user.
+ */
+public final class DocValuesCollector extends SimpleCollector
+{
+
+    private LeafReaderContext context;
+    private int segmentHits;
+    private int totalHits;
+    private final List<MatchingDocs> matchingDocs = new ArrayList<>();
+    private Docs docs;
+
+    /**
+     * @return the documents matched by the query, one {@link MatchingDocs} per visited segment that contains a hit.
+     */
+    public List<MatchingDocs> getMatchingDocs()
+    {
+        if ( docs != null && segmentHits > 0 )
+        {
+            matchingDocs.add( new MatchingDocs( this.context, docs.getDocIdSet(), segmentHits ) );
+            docs = null;
+            context = null;
+        }
+
+        return Collections.unmodifiableList( matchingDocs );
+    }
+
+    /**
+     * @return the total number of hits across all segments.
+     */
+    public int getTotalHits()
+    {
+        return totalHits;
+    }
+
+    @Override
+    public final void collect( int doc ) throws IOException
+    {
+        docs.addDoc( doc );
+        segmentHits++;
+        totalHits++;
+    }
+
+    @Override
+    public boolean needsScores()
+    {
+        return false;
+    }
+
+    @Override
+    protected void doSetNextReader( LeafReaderContext context ) throws IOException
+    {
+        if ( docs != null && segmentHits > 0 )
+        {
+            matchingDocs.add( new MatchingDocs( this.context, docs.getDocIdSet(), segmentHits ) );
+        }
+        docs = createDocs( context.reader().maxDoc() );
+        segmentHits = 0;
+        this.context = context;
+    }
+
+    /**
+     * @return a new {@link Docs} to record hits.
+     */
+    private Docs createDocs( final int maxDoc )
+    {
+        return new Docs( maxDoc );
+    }
+
+    /**
+     * Holds the documents that were matched per segment.
+     */
+    public final static class MatchingDocs
+    {
+
+        /** The {@code LeafReaderContext} for this segment. */
+        public final LeafReaderContext context;
+
+        /** Which documents were seen. */
+        public final DocIdSet docIdSet;
+
+        /** Total number of hits */
+        public final int totalHits;
+
+        public MatchingDocs( LeafReaderContext context, DocIdSet docIdSet, int totalHits )
+        {
+            this.context = context;
+            this.docIdSet = docIdSet;
+            this.totalHits = totalHits;
+        }
+
+        /**
+         * @return the {@code NumericDocValues} for a given field
+         * @throws IllegalArgumentException if this field is not indexed with numeric doc values
+         */
+        public NumericDocValues readDocValues( String field )
+        {
+            try
+            {
+                NumericDocValues dv = context.reader().getNumericDocValues( field );
+                if ( dv == null )
+                {
+                    FieldInfo fi = context.reader().getFieldInfos().fieldInfo( field );
+                    DocValuesType actual = null;
+                    if ( fi != null )
+                    {
+                        actual = fi.getDocValuesType();
+                    }
+                    throw new IllegalStateException(
+                            "The field '" + field + "' is not indexed properly, expected NumericDV, but got '" +
+                            actual + "'" );
+                }
+                return dv;
+            }
+            catch ( IOException e )
+            {
+                throw new RuntimeException( e );
+            }
+        }
+    }
+
+    /**
+     * Used during collection to record matching docs and then return a
+     * {@see DocIdSet} that contains them.
+     */
+    private static final class Docs
+    {
+        private final RoaringDocIdSet.Builder bits;
+
+        public Docs( int maxDoc )
+        {
+            bits = new RoaringDocIdSet.Builder( maxDoc );
+        }
+
+        /** Record the given document. */
+        public void addDoc( int docId )
+        {
+            bits.add( docId );
+        }
+
+        ;
+
+        /** Return the {@see DocIdSet} which contains all the recorded docs. */
+        public DocIdSet getDocIdSet()
+        {
+            return bits.build();
+        }
+    }
+}
diff --git a/...lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/HitsPrimitiveLongIterator.java b/...lucene-index/src/main/java/org/neo4j/kernel/api/impl/index/HitsPrimitiveLongIterator.java