From e77d7ec170809d3e2b3d5b903ef24377edd937c8 Mon Sep 17 00:00:00 2001 From: Anton Persson Date: Mon, 14 May 2018 10:50:00 +0200 Subject: [PATCH] Store only minimal splitter in internal nodes in GBPTree Keys in internal nodes only act as guide posts for search and thus we only need to store just enough to separate the sub ranges from each other. For keys with dynamic layout such as strings this can be used to save space within internal nodes and instead of storing full string we may only need to store the first few characters depending on the nature of the strings. --- .../neo4j/index/internal/gbptree/Layout.java | 13 ++++++ .../internal/gbptree/TreeNodeDynamicSize.java | 26 +++++++++++- .../InternalTreeLogicDynamicSizeTest.java | 26 ++++++++++++ .../gbptree/InternalTreeLogicTestBase.java | 41 ++++++++++--------- .../gbptree/SimpleByteArrayLayout.java | 16 +++++++- .../impl/index/schema/StringLayout.java | 17 ++++++++ .../impl/index/schema/StringSchemaKey.java | 9 +++- .../schema/NativeSchemaIndexTestUtil.java | 4 +- .../index/schema/NativeStringIndexingIT.java | 4 +- 9 files changed, 128 insertions(+), 28 deletions(-) diff --git a/community/index/src/main/java/org/neo4j/index/internal/gbptree/Layout.java b/community/index/src/main/java/org/neo4j/index/internal/gbptree/Layout.java index de3d293114b67..714d5c60a759c 100644 --- a/community/index/src/main/java/org/neo4j/index/internal/gbptree/Layout.java +++ b/community/index/src/main/java/org/neo4j/index/internal/gbptree/Layout.java @@ -109,6 +109,19 @@ public interface Layout extends Comparator * @return true if keys and values are fixed size, otherwise true. */ boolean fixedSize(); + + /** + * Find shortest key (best effort) that separate left from right in sort order + * and initialize into with result. + * @param left key that is less than right + * @param right key that is greater than left. + * @param into will be initialized with result. + */ + default void minimalSplitter( KEY left, KEY right, KEY into ) + { + copyKey( right, into ); + } + /** * Used as verification when loading an index after creation, to verify that the same layout is used, * as the one it was initially created with. diff --git a/community/index/src/main/java/org/neo4j/index/internal/gbptree/TreeNodeDynamicSize.java b/community/index/src/main/java/org/neo4j/index/internal/gbptree/TreeNodeDynamicSize.java index a22f1c1a4fd58..1be1624eca0e7 100644 --- a/community/index/src/main/java/org/neo4j/index/internal/gbptree/TreeNodeDynamicSize.java +++ b/community/index/src/main/java/org/neo4j/index/internal/gbptree/TreeNodeDynamicSize.java @@ -100,6 +100,8 @@ public class TreeNodeDynamicSize extends TreeNode private final int[] newOffset = new int[maxKeyCount]; private final int totalSpace; private final int halfSpace; + private final KEY tmpKeyLeft; + private final KEY tmpKeyRight; TreeNodeDynamicSize( int pageSize, Layout layout ) { @@ -115,6 +117,9 @@ public class TreeNodeDynamicSize extends TreeNode "with current page size of %dB. We require this cap to be at least %dB.", LEAST_NUMBER_OF_ENTRIES_PER_PAGE, keyValueSizeCap, pageSize, Long.SIZE ); } + + tmpKeyLeft = layout.newKey(); + tmpKeyRight = layout.newKey(); } @Override @@ -633,14 +638,31 @@ void doSplitLeaf( PageCursor leftCursor, int leftKeyCount, PageCursor rightCurso int keyCountAfterInsert = leftKeyCount + 1; int middlePos = middleLeaf( leftCursor, insertPos, newKey, newValue, keyCountAfterInsert ); + KEY leftInSplit; + KEY rightInSplit; if ( middlePos == insertPos ) { - layout.copyKey( newKey, newSplitter ); + leftInSplit = keyAt( leftCursor, tmpKeyLeft, middlePos - 1, LEAF ); + rightInSplit = newKey; + } else { - keyAt( leftCursor, newSplitter, insertPos < middlePos ? middlePos - 1 : middlePos, LEAF ); + int rightPos = insertPos < middlePos ? middlePos - 1 : middlePos; + rightInSplit = keyAt( leftCursor, tmpKeyRight, rightPos, LEAF ); + + if ( rightPos == insertPos ) + { + leftInSplit = newKey; + } + else + { + int leftPos = rightPos - 1; + leftInSplit = keyAt( leftCursor, tmpKeyLeft, leftPos, LEAF ); + } } + layout.minimalSplitter( leftInSplit, rightInSplit, newSplitter ); + int rightKeyCount = keyCountAfterInsert - middlePos; if ( insertPos < middlePos ) diff --git a/community/index/src/test/java/org/neo4j/index/internal/gbptree/InternalTreeLogicDynamicSizeTest.java b/community/index/src/test/java/org/neo4j/index/internal/gbptree/InternalTreeLogicDynamicSizeTest.java index 3beb2e94a06ba..5513c0201876f 100644 --- a/community/index/src/test/java/org/neo4j/index/internal/gbptree/InternalTreeLogicDynamicSizeTest.java +++ b/community/index/src/test/java/org/neo4j/index/internal/gbptree/InternalTreeLogicDynamicSizeTest.java @@ -19,6 +19,13 @@ */ package org.neo4j.index.internal.gbptree; +import org.junit.Test; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.neo4j.index.internal.gbptree.TreeNode.Type.INTERNAL; + public class InternalTreeLogicDynamicSizeTest extends InternalTreeLogicTestBase { private SimpleByteArrayLayout layout = new SimpleByteArrayLayout(); @@ -45,4 +52,23 @@ protected TestLayout getLayout() { return layout; } + + @Test + public void storeOnlyMinimalKeyDividerInInternal() throws IOException + { + // given + initialize(); + long key = 0; + while ( numberOfRootSplits == 0 ) + { + insert( key( key ), value( key ) ); + key++; + } + + // when + RawBytes rawBytes = keyAt( rootId, 0, INTERNAL ); + + // then + assertEquals( "expected no tail on internal key but was " + rawBytes.toString(), Long.BYTES, rawBytes.bytes.length ); + } } diff --git a/community/index/src/test/java/org/neo4j/index/internal/gbptree/InternalTreeLogicTestBase.java b/community/index/src/test/java/org/neo4j/index/internal/gbptree/InternalTreeLogicTestBase.java index f7f651328ec21..52f2b5bd28feb 100644 --- a/community/index/src/test/java/org/neo4j/index/internal/gbptree/InternalTreeLogicTestBase.java +++ b/community/index/src/test/java/org/neo4j/index/internal/gbptree/InternalTreeLogicTestBase.java @@ -94,9 +94,9 @@ public static Collection generators() @Rule public RandomRule random = new RandomRule(); - private long rootId; + long rootId; + int numberOfRootSplits; private long rootGeneration; - private int numberOfRootSplits; private int numberOfRootSuccessors; @Before @@ -512,15 +512,16 @@ public void modifierMustRemoveFromRightChildButNotFromInternalWithHitOnInternalS insert( key( i ), value( i ) ); // And one more to avoid rebalance // when key to remove exists in internal - KEY keyToRemove = structurePropagation.rightKey; + KEY internalKey = structurePropagation.rightKey; goTo( readCursor, rootId ); - assertEqualsKey( keyAt( 0, INTERNAL ), keyToRemove ); + assertEqualsKey( keyAt( 0, INTERNAL ), internalKey ); // and as first key in right child long rightChild = structurePropagation.rightChild; goTo( readCursor, rightChild ); int keyCountInRightChild = keyCount(); - assertEqualsKey( keyAt( 0, LEAF ), keyToRemove ); + KEY keyToRemove = keyAt( 0, LEAF ); + assertEquals( "expected same seed", getSeed( keyToRemove ), getSeed( internalKey ) ); // and we remove it generationManager.checkpoint(); @@ -529,7 +530,7 @@ public void modifierMustRemoveFromRightChildButNotFromInternalWithHitOnInternalS // then we should still find it in internal goTo( readCursor, rootId ); assertThat( keyCount(), is( 1 ) ); - assertEqualsKey( keyAt( 0, INTERNAL ), keyToRemove ); + assertEquals( "expected same seed", getSeed( keyAt( 0, INTERNAL ) ), getSeed( keyToRemove ) ); // but not in right leaf rightChild = childAt( readCursor, 1, stableGeneration, unstableGeneration ); @@ -581,14 +582,14 @@ public void modifierMustNotRemoveWhenKeyOnlyExistInInternal() throws Exception insert( key( i ), value( i ) ); // And an extra to not cause rebalance // when key to remove exists in internal - KEY keyToRemove = structurePropagation.rightKey; - assertEqualsKey( keyAt( rootId, 0, INTERNAL ), keyToRemove ); + long currentRightChild = structurePropagation.rightChild; + KEY keyToRemove = keyAt( currentRightChild, 0, LEAF ); + assertEquals( getSeed( keyAt( rootId, 0, INTERNAL ) ), getSeed( keyToRemove ) ); // and as first key in right child - long currentRightChild = structurePropagation.rightChild; goTo( readCursor, currentRightChild ); int keyCountInRightChild = keyCount(); - assertEqualsKey( keyAt( 0, LEAF ), keyToRemove ); + assertEquals( "same seed", getSeed( keyToRemove ), getSeed( keyAt( 0, LEAF ) ) ); // and we remove it generationManager.checkpoint(); @@ -598,12 +599,12 @@ public void modifierMustNotRemoveWhenKeyOnlyExistInInternal() throws Exception // then we should still find it in internal assertThat( keyCount(), is( 1 ) ); - assertEqualsKey( keyAt( 0, INTERNAL ), keyToRemove ); + assertEquals( "same seed", getSeed( keyAt( 0, INTERNAL ) ), getSeed( keyToRemove ) ); // but not in right leaf goTo( readCursor, currentRightChild ); assertThat( keyCount(), is( keyCountInRightChild - 1 ) ); - assertEqualsKey( keyAt( 0, LEAF ), key( getSeed( keyToRemove ) + 1 ) ); + assertEquals( "same seed", getSeed( keyAt( 0, LEAF ) ), getSeed( key( getSeed( keyToRemove ) + 1 ) ) ); // and when we remove same key again, nothing should change assertNull( remove( keyToRemove, dontCare ) ); @@ -655,7 +656,7 @@ public void mustNotRebalanceFromRightToLeft() throws Exception // ... no keys should have moved from right sibling int actualKeyCount = TreeNode.keyCount( readCursor ); assertEquals( "actualKeyCount=" + actualKeyCount + ", expectedKeyCount=" + expectedKeyCount, expectedKeyCount, actualKeyCount ); - assertEqualsKey( keyAt( 0, LEAF ), primKey ); + assertEquals( "same seed", getSeed( primKey ), getSeed( keyAt( 0, LEAF ) ) ); } @Test @@ -745,6 +746,7 @@ public void mustPropagateStructureOnMergeFromLeft() throws Exception } goTo( readCursor, rootId ); assertEquals( 2, keyCount() ); + long oldRootId = readCursor.getCurrentPageId(); long oldLeftChild = childAt( readCursor, 0, stableGeneration, unstableGeneration ); long oldMiddleChild = childAt( readCursor, 1, stableGeneration, unstableGeneration ); long oldRightChild = childAt( readCursor, 2, stableGeneration, unstableGeneration ); @@ -752,12 +754,13 @@ public void mustPropagateStructureOnMergeFromLeft() throws Exception // WHEN generationManager.checkpoint(); - KEY middleKey = keyAt( 0, INTERNAL ); // Should be located in middle leaf + KEY middleKey = keyAt( oldMiddleChild,0, LEAF ); // Should be located in middle leaf remove( middleKey, dontCare ); allKeys.remove( middleKey ); // THEN // old root should still have 2 keys + goTo( readCursor, oldRootId ); assertEquals( 2, keyCount() ); // new root should have only 1 key @@ -1593,7 +1596,7 @@ private int keyCount() return TreeNode.keyCount( readCursor ); } - private void initialize() + void initialize() { node.initializeLeaf( cursor, stableGeneration, unstableGeneration ); updateRoot(); @@ -1659,12 +1662,12 @@ private void printTree() throws IOException cursor.next( currentPageId ); } - private KEY key( long seed ) + KEY key( long seed ) { return layout.key( seed ); } - private VALUE value( long seed ) + VALUE value( long seed ) { return layout.value( seed ); } @@ -1707,7 +1710,7 @@ private void assertSiblingOrderAndPointers( long... children ) throws IOExceptio goTo( readCursor, currentPageId ); } - private KEY keyAt( long nodeId, int pos, TreeNode.Type type ) + KEY keyAt( long nodeId, int pos, TreeNode.Type type ) { KEY readKey = layout.newKey(); long prevId = readCursor.getCurrentPageId(); @@ -1732,7 +1735,7 @@ private VALUE valueAt( int pos ) return node.valueAt( readCursor, layout.newValue(), pos ); } - private void insert( KEY key, VALUE value ) throws IOException + void insert( KEY key, VALUE value ) throws IOException { insert( key, value, overwrite() ); } diff --git a/community/index/src/test/java/org/neo4j/index/internal/gbptree/SimpleByteArrayLayout.java b/community/index/src/test/java/org/neo4j/index/internal/gbptree/SimpleByteArrayLayout.java index d45885b52d2b0..f63b894edd1d5 100644 --- a/community/index/src/test/java/org/neo4j/index/internal/gbptree/SimpleByteArrayLayout.java +++ b/community/index/src/test/java/org/neo4j/index/internal/gbptree/SimpleByteArrayLayout.java @@ -33,10 +33,15 @@ public RawBytes newKey() @Override public RawBytes copyKey( RawBytes rawBytes, RawBytes into ) + { + return copyKey( rawBytes, into, rawBytes.bytes.length ); + } + + private RawBytes copyKey( RawBytes rawBytes, RawBytes into, int length ) { byte[] src = rawBytes.bytes; - byte[] target = new byte[src.length]; - System.arraycopy( src, 0, target, 0, src.length ); + byte[] target = new byte[length]; + System.arraycopy( src, 0, target, 0, length ); into.bytes = target; return into; } @@ -99,6 +104,13 @@ public boolean fixedSize() return false; } + @Override + public void minimalSplitter( RawBytes left, RawBytes right, RawBytes into ) + { + // Minimal splitter will always be the first 8B + copyKey( right, into, Long.BYTES ); + } + @Override public long identifier() { diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/StringLayout.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/StringLayout.java index 3cb36bdc69a58..d9d970a32c5c0 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/StringLayout.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/StringLayout.java @@ -82,6 +82,23 @@ public boolean fixedSize() return false; } + @Override + public void minimalSplitter( StringSchemaKey left, StringSchemaKey right, StringSchemaKey into ) + { + int maxLength = Math.min( left.bytesLength, right.bytesLength ); + int targetLength = 0; + for ( ; targetLength < maxLength; targetLength++ ) + { + if ( left.bytes[targetLength] != right.bytes[targetLength] ) + { + // Convert to length from array index + targetLength++; + break; + } + } + into.copyFrom( right, targetLength ); + } + @Override public String toString() { diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/StringSchemaKey.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/StringSchemaKey.java index c8740fe431938..31ae0394c5296 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/StringSchemaKey.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/StringSchemaKey.java @@ -189,8 +189,13 @@ public void writeString( char value ) void copyFrom( StringSchemaKey key ) { - setBytesLength( key.bytesLength ); - System.arraycopy( key.bytes, 0, bytes, 0, key.bytesLength ); + copyFrom( key, key.bytesLength ); + } + + void copyFrom( StringSchemaKey key, int targetLength ) + { + setBytesLength( targetLength ); + System.arraycopy( key.bytes, 0, bytes, 0, targetLength ); setEntityId( key.getEntityId() ); setCompareId( key.getCompareId() ); } diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexTestUtil.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexTestUtil.java index 75e5b82dea5c9..d3029fc4e6166 100644 --- a/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexTestUtil.java +++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexTestUtil.java @@ -144,8 +144,8 @@ private void assertSameHits( Hit[] expectedHits, Hit[] a { Arrays.sort( expectedHits, comparator ); Arrays.sort( actualHits, comparator ); - assertEquals( format( "Array length differ%nExpected:%s%nActual:%s", - Arrays.toString( expectedHits ), Arrays.toString( actualHits ) ), + assertEquals( format( "Array length differ%nExpected:%d, Actual:%d", + expectedHits.length, actualHits.length ), expectedHits.length, actualHits.length ); for ( int i = 0; i < expectedHits.length; i++ ) diff --git a/community/neo4j/src/test/java/org/neo4j/kernel/impl/index/schema/NativeStringIndexingIT.java b/community/neo4j/src/test/java/org/neo4j/kernel/impl/index/schema/NativeStringIndexingIT.java index 4d0a40101e00e..f4c6d6a8fce3b 100644 --- a/community/neo4j/src/test/java/org/neo4j/kernel/impl/index/schema/NativeStringIndexingIT.java +++ b/community/neo4j/src/test/java/org/neo4j/kernel/impl/index/schema/NativeStringIndexingIT.java @@ -28,6 +28,7 @@ import org.neo4j.graphdb.Label; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Transaction; +import org.neo4j.graphdb.factory.GraphDatabaseSettings; import org.neo4j.graphdb.schema.IndexCreator; import org.neo4j.internal.kernel.api.IndexOrder; import org.neo4j.internal.kernel.api.IndexQuery; @@ -56,7 +57,8 @@ public class NativeStringIndexingIT private static final String KEY2 = "key2"; @Rule - public final DatabaseRule db = new EmbeddedDatabaseRule(); + public final DatabaseRule db = new EmbeddedDatabaseRule() + .withSetting( GraphDatabaseSettings.default_schema_provider, GraphDatabaseSettings.SchemaIndex.NATIVE20.providerName() ); @Rule public final RandomRule random = new RandomRule();