IndexPopulator for native schema number index

Implemented using GBPTree and for the time being only IndexPopulator is implemented, online IndexAccessor and SchemaIndexProvider will come later. Both unique and non-unique layouts are supported and all types of Numbers. There are small differences between unique and non-unique layouts where entity ID is in the key for non-unique, but in the value for unique. This makes the keys smaller for the unique layout, something which will make the overall index size smaller due to internal tree nodes not containing values. Each layout contains both the comparison value, a coerced value type which all numbers are converted into for comparison, but also their original value. This means that there will not need to be an additional entity property lookup for filtering those results higher up in the stack.
neo4j · May 30, 2017 · 22d97b2 · 22d97b2
1 parent 1171dc5
commit 22d97b2
Show file tree

Hide file tree

Showing 31 changed files with 2,883 additions and 56 deletions.
diff --git a/community/common/src/test/java/org/neo4j/test/Randoms.java b/community/common/src/test/java/org/neo4j/test/Randoms.java
@@ -155,11 +155,16 @@ public char character( int characterSets )
             {
                 switch ( bit )
                 {
-                case CS_LOWERCASE_LETTERS: return (char) intBetween( 'a', 'z' );
+                case CS_LOWERCASE_LETTERS:
-                case CS_UPPERCASE_LETTERS: return (char) intBetween( 'A', 'Z' );
+                    return (char) intBetween( 'a', 'z' );
-                case CS_DIGITS: return (char) intBetween( '0', '9' );
+                case CS_UPPERCASE_LETTERS:
-                case CS_SYMBOLS: return symbol();
+                    return (char) intBetween( 'A', 'Z' );
-                default: throw new IllegalArgumentException( "Unknown character set " + bit );
+                case CS_DIGITS:
+                    return (char) intBetween( '0', '9' );
+                case CS_SYMBOLS:
+                    return symbol();
+                default:
+                    throw new IllegalArgumentException( "Unknown character set " + bit );
                 }
             }
         }
@@ -211,6 +216,28 @@ public <T> T among( T[] among )
         return among[random.nextInt( among.length )];
     }
 
+    public Number numberPropertyValue()
+    {
+        int type = random.nextInt( 6 );
+        switch ( type )
+        {
+        case 0:
+            return (byte) random.nextInt();
+        case 1:
+            return (short) random.nextInt();
+        case 2:
+            return random.nextInt();
+        case 3:
+            return random.nextLong();
+        case 4:
+            return random.nextFloat();
+        case 5:
+            return random.nextDouble();
+        default:
+            throw new IllegalArgumentException( "Unknown value type " + type );
+        }
+    }
+
     public Object propertyValue()
     {
         return propertyValue( propertyType( true ) );

diff --git a/community/common/src/test/java/org/neo4j/test/rule/RandomRule.java b/community/common/src/test/java/org/neo4j/test/rule/RandomRule.java
@@ -177,6 +177,11 @@ public <T> T among( T[] among )
         return randoms.among( among );
     }
 
+    public Number numberPropertyValue()
+    {
+        return randoms.numberPropertyValue();
+    }
+
     public Object propertyValue()
     {
         return randoms.propertyValue();

diff --git a/community/index/src/main/java/org/neo4j/index/internal/gbptree/CleanupJob.java b/community/index/src/main/java/org/neo4j/index/internal/gbptree/CleanupJob.java
@@ -42,6 +42,9 @@ public interface CleanupJob extends Runnable
      */
     Exception getCause();
 
+    /**
+     * A {@link CleanupJob} that doesn't need cleaning, i.e. it's already clean.
+     */
     CleanupJob CLEAN = new CleanupJob()
     {
         @Override

diff --git a/community/index/src/main/java/org/neo4j/index/internal/gbptree/GBPTree.java b/community/index/src/main/java/org/neo4j/index/internal/gbptree/GBPTree.java
@@ -203,7 +203,7 @@ public void startupState( boolean clean )
     /**
      * No-op header reader.
      */
-    static final Header.Reader NO_HEADER = ( cursor, length ) ->
+    public static final Header.Reader NO_HEADER = ( cursor, length ) ->
     {
     };
 
@@ -409,7 +409,7 @@ public GBPTree( PageCache pageCache, File indexFile, Layout<KEY,VALUE> layout, i
             {
                 close();
             }
-            catch ( IOException e )
+            catch ( Throwable e )
             {
                 t.addSuppressed( e );
             }
@@ -946,7 +946,15 @@ void printTree() throws IOException
     }
 
     // Utility method
-    void printTree( boolean printValues, boolean printPosition, boolean printState ) throws IOException
+    /**
+     * Prints the contents of the tree to System.out.
+     *
+     * @param printValues whether or not to print values in the leaf nodes.
+     * @param printPosition whether or not to print position for each key.
+     * @param printState whether or not to print the tree state.
+     * @throws IOException on I/O error.
+     */
+    public void printTree( boolean printValues, boolean printPosition, boolean printState ) throws IOException
     {
         try ( PageCursor cursor = openRootCursor( PagedFile.PF_SHARED_READ_LOCK ) )
         {

diff --git a/.../src/main/java/org/neo4j/index/internal/gbptree/GroupingRecoveryCleanupWorkCollector.java b/.../src/main/java/org/neo4j/index/internal/gbptree/GroupingRecoveryCleanupWorkCollector.java
@@ -36,6 +36,9 @@ public class GroupingRecoveryCleanupWorkCollector implements RecoveryCleanupWork
     private final Queue<CleanupJob> jobs;
     private final JobScheduler jobScheduler;
 
+    /**
+     * @param jobScheduler {@link JobScheduler} to queue {@link CleanupJob} into.
+     */
     public GroupingRecoveryCleanupWorkCollector( JobScheduler jobScheduler )
     {
         this.jobScheduler = jobScheduler;

diff --git a/community/index/src/main/java/org/neo4j/index/internal/gbptree/Header.java b/community/index/src/main/java/org/neo4j/index/internal/gbptree/Header.java
@@ -38,7 +38,6 @@ public interface Writer
         /**
          * Writes header data into {@code to} with previous valid header data found in {@code from} of {@code length}
          * bytes in size.
-         *
          * @param from {@link PageCursor} positioned at the header data written in the previous check point.
          * @param length size in bytes of the previous header data.
          * @param to {@link PageCursor} to write new header into.

diff --git a/...ty/index/src/main/java/org/neo4j/index/internal/gbptree/RecoveryCleanupWorkCollector.java b/...ty/index/src/main/java/org/neo4j/index/internal/gbptree/RecoveryCleanupWorkCollector.java
@@ -45,6 +45,10 @@ public interface RecoveryCleanupWorkCollector extends Lifecycle
      */
     RecoveryCleanupWorkCollector IMMEDIATE = new ImmediateRecoveryCleanupWorkCollector();
 
+    /**
+     * {@link RecoveryCleanupWorkCollector} which runs added {@link CleanupJob} as part of the {@link #add(CleanupJob)}
+     * call in the caller thread.
+     */
     class ImmediateRecoveryCleanupWorkCollector extends LifecycleAdapter implements RecoveryCleanupWorkCollector
     {
         @Override

diff --git a/.../kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/NonUniqueIndexSampler.java b/.../kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/NonUniqueIndexSampler.java
@@ -40,4 +40,27 @@ public interface NonUniqueIndexSampler
     IndexSample result();
 
     IndexSample result( int numDocs );
+
+    abstract class Adapter implements NonUniqueIndexSampler
+    {
+        @Override
+        public void include( String value )
+        {   // no-op
+        }
+
+        @Override
+        public void include( String value, long increment )
+        {   // no-op
+        }
+
+        @Override
+        public void exclude( String value )
+        {   // no-op
+        }
+
+        @Override
+        public void exclude( String value, long decrement )
+        {   // no-op
+        }
+    }
 }
diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/GBPTreeUtil.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/GBPTreeUtil.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2002-2017 "Neo Technology,"
+ * Network Engine for Objects in Lund AB [http://neotechnology.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package org.neo4j.kernel.impl.index;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.NoSuchFileException;
+
+import org.neo4j.index.internal.gbptree.GBPTree;
+import org.neo4j.io.fs.FileHandle;
+import org.neo4j.io.pagecache.PageCache;
+
+/**
+ * Utilities for common operations around a {@link GBPTree}.
+ */
+public class GBPTreeUtil
+{
+    /**
+     * Deletes store file backing a {@link GBPTree}.
+     *
+     * @param pageCache {@link PageCache} which manages the file.
+     * @param storeFile the {@link File} to delete.
+     * @throws NoSuchFileException if the {@code storeFile} doesn't exist according to the {@code pageCache}.
+     * @throws IOException on failure to delete existing {@code storeFile}.
+     */
+    public static void delete( PageCache pageCache, File storeFile ) throws IOException
+    {
+        FileHandle fileHandle = storeFileHandle( pageCache, storeFile );
+        fileHandle.delete();
+    }
+
+    /**
+     * Deletes store file backing a {@link GBPTree}, if it exists according to the {@code pageCache}.
+     *
+     * @param pageCache {@link PageCache} which manages the file.
+     * @param storeFile the {@link File} to delete.
+     * @throws IOException on failure to delete existing {@code storeFile}.
+     */
+    public static void deleteIfPresent( PageCache pageCache, File storeFile ) throws IOException
+    {
+        try
+        {
+            delete( pageCache, storeFile );
+        }
+        catch ( NoSuchFileException e )
+        {
+            // File does not exist, we don't need to delete
+        }
+    }
+
+    /**
+     * Checks whether or not {@code storeFile} exists according to {@code pageCache}.
+     *
+     * @param pageCache {@link PageCache} which manages the file.
+     * @param storeFile the {@link File} to check for existence.
+     * @return {@code true} if {@code storeFile} exists according to {@code pageCache}, otherwise {@code false}.
+     */
+    public static boolean storeFileExists( PageCache pageCache, File storeFile )
+    {
+        try
+        {
+            storeFileHandle( pageCache, storeFile );
+            return true;
+        }
+        catch ( IOException e )
+        {
+            return false;
+        }
+    }
+
+    private static FileHandle storeFileHandle( PageCache pageCache, File storeFile ) throws IOException
+    {
+        return pageCache.getCachedFileSystem().streamFilesRecursive( storeFile ).findFirst().get();
+    }
+}
diff --git a/...nity/kernel/src/main/java/org/neo4j/kernel/impl/index/labelscan/NativeLabelScanStore.java b/...nity/kernel/src/main/java/org/neo4j/kernel/impl/index/labelscan/NativeLabelScanStore.java
@@ -26,26 +26,25 @@
 import java.io.UncheckedIOException;
 import java.nio.file.NoSuchFileException;
 import java.util.function.Consumer;
-import java.util.Optional;
 import java.util.function.IntFunction;
 
 import org.neo4j.cursor.RawCursor;
 import org.neo4j.graphdb.ResourceIterator;
 import org.neo4j.graphdb.factory.GraphDatabaseSettings;
-import org.neo4j.index.internal.gbptree.RecoveryCleanupWorkCollector;
 import org.neo4j.index.internal.gbptree.GBPTree;
 import org.neo4j.index.internal.gbptree.Header;
 import org.neo4j.index.internal.gbptree.Hit;
 import org.neo4j.index.internal.gbptree.Layout;
 import org.neo4j.index.internal.gbptree.MetadataMismatchException;
-import org.neo4j.io.fs.FileHandle;
+import aorg.neo4j.index.internal.gbptree.RecoveryCleanupWorkCollector;
 import org.neo4j.io.pagecache.IOLimiter;
 import org.neo4j.io.pagecache.PageCache;
 import org.neo4j.io.pagecache.PageCursor;
 import org.neo4j.kernel.api.labelscan.AllEntriesLabelScanReader;
 import org.neo4j.kernel.api.labelscan.LabelScanStore;
 import org.neo4j.kernel.api.labelscan.LabelScanWriter;
 import org.neo4j.kernel.impl.api.scan.FullStoreChangeStream;
+import org.neo4j.kernel.impl.index.GBPTreeUtil;
 import org.neo4j.kernel.impl.store.UnderlyingStorageException;
 import org.neo4j.kernel.monitoring.Monitors;
 import org.neo4j.storageengine.api.schema.LabelScanReader;
@@ -337,19 +336,7 @@ public void init() throws IOException
     @Override
     public boolean hasStore() throws IOException
     {
-        try
+        return GBPTreeUtil.storeFileExists( pageCache, storeFile );
-        {
-            return storeFileHandle().isPresent();
-        }
-        catch ( NoSuchFileException e )
-        {
-            return false;
-        }
-    }
-
-    private Optional<FileHandle> storeFileHandle() throws IOException
-    {
-        return  pageCache.getCachedFileSystem().streamFilesRecursive( storeFile ).findFirst() ;
     }
 
     /**
@@ -403,11 +390,7 @@ private void dropStrict() throws IOException
             index.close();
             index = null;
         }
-        Optional<FileHandle> fileHandle = storeFileHandle();
+        GBPTreeUtil.delete( pageCache, storeFile );
-        if ( fileHandle.isPresent() )
-        {
-            fileHandle.get().delete();
-        }
     }
 
     /**

diff --git a/...kernel/src/main/java/org/neo4j/kernel/impl/index/schema/ConflictDetectingValueMerger.java b/...kernel/src/main/java/org/neo4j/kernel/impl/index/schema/ConflictDetectingValueMerger.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2002-2017 "Neo Technology,"
+ * Network Engine for Objects in Lund AB [http://neotechnology.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package org.neo4j.kernel.impl.index.schema;
+
+import org.neo4j.index.internal.gbptree.ValueMerger;
+import org.neo4j.index.internal.gbptree.Writer;
+
+/**
+ * {@link ValueMerger} which will merely detect conflict, not change any value if conflict, i.e. if the
+ * key already exists. After this merge has been used in a call to {@link Writer#merge(Object, Object, ValueMerger)}
+ * the {@link #wasConflict()} accessor can be called to check whether or not that call conflicted with
+ * an existing key. A call to {@link #wasConflict()} will also clear the conflict flag.
+ *
+ * @param <VALUE> type of values being merged.
+ */
+class ConflictDetectingValueMerger<VALUE extends SchemaNumberValue> implements ValueMerger<VALUE>
+{
+    private boolean conflict;
+    private long existingNodeId;
+    private long addedNodeId;
+
+    @Override
+    public VALUE merge( VALUE existingValue, VALUE newValue )
+    {
+        conflict = true;
+        existingNodeId = existingValue.getEntityId();
+        addedNodeId = newValue.getEntityId();
+        return null;
+    }
+
+    /**
+     * @return whether or not merge conflicted with an existing key. This call also clears the conflict flag.
+     */
+    boolean wasConflict()
+    {
+        boolean result = conflict;
+        if ( conflict )
+        {
+            conflict = false;
+        }
+        return result;
+    }
+
+    long existingNodeId()
+    {
+        return existingNodeId;
+    }
+
+    long addedNodeId()
+    {
+        return addedNodeId;
+    }
+}