Skip to content

Commit

Permalink
Store only minimal splitter in internal nodes in GBPTree
Browse files Browse the repository at this point in the history
Keys in internal nodes only act as guide posts for search and thus we
only need to store just enough to separate the sub ranges from each
other.

For keys with dynamic layout such as strings this can be used to save
space within internal nodes and instead of storing full string we may
only need to store the first few characters depending on the nature
of the strings.
  • Loading branch information
burqen committed Jun 12, 2018
1 parent 2285cbe commit e77d7ec
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 28 deletions.
Expand Up @@ -109,6 +109,19 @@ public interface Layout<KEY, VALUE> extends Comparator<KEY>
* @return true if keys and values are fixed size, otherwise true.
*/
boolean fixedSize();

/**
* Find shortest key (best effort) that separate left from right in sort order
* and initialize into with result.
* @param left key that is less than right
* @param right key that is greater than left.
* @param into will be initialized with result.
*/
default void minimalSplitter( KEY left, KEY right, KEY into )
{
copyKey( right, into );
}

/**
* Used as verification when loading an index after creation, to verify that the same layout is used,
* as the one it was initially created with.
Expand Down
Expand Up @@ -100,6 +100,8 @@ public class TreeNodeDynamicSize<KEY, VALUE> extends TreeNode<KEY,VALUE>
private final int[] newOffset = new int[maxKeyCount];
private final int totalSpace;
private final int halfSpace;
private final KEY tmpKeyLeft;
private final KEY tmpKeyRight;

TreeNodeDynamicSize( int pageSize, Layout<KEY,VALUE> layout )
{
Expand All @@ -115,6 +117,9 @@ public class TreeNodeDynamicSize<KEY, VALUE> extends TreeNode<KEY,VALUE>
"with current page size of %dB. We require this cap to be at least %dB.",
LEAST_NUMBER_OF_ENTRIES_PER_PAGE, keyValueSizeCap, pageSize, Long.SIZE );
}

tmpKeyLeft = layout.newKey();
tmpKeyRight = layout.newKey();
}

@Override
Expand Down Expand Up @@ -633,14 +638,31 @@ void doSplitLeaf( PageCursor leftCursor, int leftKeyCount, PageCursor rightCurso
int keyCountAfterInsert = leftKeyCount + 1;
int middlePos = middleLeaf( leftCursor, insertPos, newKey, newValue, keyCountAfterInsert );

KEY leftInSplit;
KEY rightInSplit;
if ( middlePos == insertPos )
{
layout.copyKey( newKey, newSplitter );
leftInSplit = keyAt( leftCursor, tmpKeyLeft, middlePos - 1, LEAF );
rightInSplit = newKey;

}
else
{
keyAt( leftCursor, newSplitter, insertPos < middlePos ? middlePos - 1 : middlePos, LEAF );
int rightPos = insertPos < middlePos ? middlePos - 1 : middlePos;
rightInSplit = keyAt( leftCursor, tmpKeyRight, rightPos, LEAF );

if ( rightPos == insertPos )
{
leftInSplit = newKey;
}
else
{
int leftPos = rightPos - 1;
leftInSplit = keyAt( leftCursor, tmpKeyLeft, leftPos, LEAF );
}
}
layout.minimalSplitter( leftInSplit, rightInSplit, newSplitter );

int rightKeyCount = keyCountAfterInsert - middlePos;

if ( insertPos < middlePos )
Expand Down
Expand Up @@ -19,6 +19,13 @@
*/
package org.neo4j.index.internal.gbptree;

import org.junit.Test;

import java.io.IOException;

import static org.junit.Assert.assertEquals;
import static org.neo4j.index.internal.gbptree.TreeNode.Type.INTERNAL;

public class InternalTreeLogicDynamicSizeTest extends InternalTreeLogicTestBase<RawBytes,RawBytes>
{
private SimpleByteArrayLayout layout = new SimpleByteArrayLayout();
Expand All @@ -45,4 +52,23 @@ protected TestLayout<RawBytes,RawBytes> getLayout()
{
return layout;
}

@Test
public void storeOnlyMinimalKeyDividerInInternal() throws IOException
{
// given
initialize();
long key = 0;
while ( numberOfRootSplits == 0 )
{
insert( key( key ), value( key ) );
key++;
}

// when
RawBytes rawBytes = keyAt( rootId, 0, INTERNAL );

// then
assertEquals( "expected no tail on internal key but was " + rawBytes.toString(), Long.BYTES, rawBytes.bytes.length );
}
}
Expand Up @@ -94,9 +94,9 @@ public static Collection<Object[]> generators()
@Rule
public RandomRule random = new RandomRule();

private long rootId;
long rootId;
int numberOfRootSplits;
private long rootGeneration;
private int numberOfRootSplits;
private int numberOfRootSuccessors;

@Before
Expand Down Expand Up @@ -512,15 +512,16 @@ public void modifierMustRemoveFromRightChildButNotFromInternalWithHitOnInternalS
insert( key( i ), value( i ) ); // And one more to avoid rebalance

// when key to remove exists in internal
KEY keyToRemove = structurePropagation.rightKey;
KEY internalKey = structurePropagation.rightKey;
goTo( readCursor, rootId );
assertEqualsKey( keyAt( 0, INTERNAL ), keyToRemove );
assertEqualsKey( keyAt( 0, INTERNAL ), internalKey );

// and as first key in right child
long rightChild = structurePropagation.rightChild;
goTo( readCursor, rightChild );
int keyCountInRightChild = keyCount();
assertEqualsKey( keyAt( 0, LEAF ), keyToRemove );
KEY keyToRemove = keyAt( 0, LEAF );
assertEquals( "expected same seed", getSeed( keyToRemove ), getSeed( internalKey ) );

// and we remove it
generationManager.checkpoint();
Expand All @@ -529,7 +530,7 @@ public void modifierMustRemoveFromRightChildButNotFromInternalWithHitOnInternalS
// then we should still find it in internal
goTo( readCursor, rootId );
assertThat( keyCount(), is( 1 ) );
assertEqualsKey( keyAt( 0, INTERNAL ), keyToRemove );
assertEquals( "expected same seed", getSeed( keyAt( 0, INTERNAL ) ), getSeed( keyToRemove ) );

// but not in right leaf
rightChild = childAt( readCursor, 1, stableGeneration, unstableGeneration );
Expand Down Expand Up @@ -581,14 +582,14 @@ public void modifierMustNotRemoveWhenKeyOnlyExistInInternal() throws Exception
insert( key( i ), value( i ) ); // And an extra to not cause rebalance

// when key to remove exists in internal
KEY keyToRemove = structurePropagation.rightKey;
assertEqualsKey( keyAt( rootId, 0, INTERNAL ), keyToRemove );
long currentRightChild = structurePropagation.rightChild;
KEY keyToRemove = keyAt( currentRightChild, 0, LEAF );
assertEquals( getSeed( keyAt( rootId, 0, INTERNAL ) ), getSeed( keyToRemove ) );

// and as first key in right child
long currentRightChild = structurePropagation.rightChild;
goTo( readCursor, currentRightChild );
int keyCountInRightChild = keyCount();
assertEqualsKey( keyAt( 0, LEAF ), keyToRemove );
assertEquals( "same seed", getSeed( keyToRemove ), getSeed( keyAt( 0, LEAF ) ) );

// and we remove it
generationManager.checkpoint();
Expand All @@ -598,12 +599,12 @@ public void modifierMustNotRemoveWhenKeyOnlyExistInInternal() throws Exception

// then we should still find it in internal
assertThat( keyCount(), is( 1 ) );
assertEqualsKey( keyAt( 0, INTERNAL ), keyToRemove );
assertEquals( "same seed", getSeed( keyAt( 0, INTERNAL ) ), getSeed( keyToRemove ) );

// but not in right leaf
goTo( readCursor, currentRightChild );
assertThat( keyCount(), is( keyCountInRightChild - 1 ) );
assertEqualsKey( keyAt( 0, LEAF ), key( getSeed( keyToRemove ) + 1 ) );
assertEquals( "same seed", getSeed( keyAt( 0, LEAF ) ), getSeed( key( getSeed( keyToRemove ) + 1 ) ) );

// and when we remove same key again, nothing should change
assertNull( remove( keyToRemove, dontCare ) );
Expand Down Expand Up @@ -655,7 +656,7 @@ public void mustNotRebalanceFromRightToLeft() throws Exception
// ... no keys should have moved from right sibling
int actualKeyCount = TreeNode.keyCount( readCursor );
assertEquals( "actualKeyCount=" + actualKeyCount + ", expectedKeyCount=" + expectedKeyCount, expectedKeyCount, actualKeyCount );
assertEqualsKey( keyAt( 0, LEAF ), primKey );
assertEquals( "same seed", getSeed( primKey ), getSeed( keyAt( 0, LEAF ) ) );
}

@Test
Expand Down Expand Up @@ -745,19 +746,21 @@ public void mustPropagateStructureOnMergeFromLeft() throws Exception
}
goTo( readCursor, rootId );
assertEquals( 2, keyCount() );
long oldRootId = readCursor.getCurrentPageId();
long oldLeftChild = childAt( readCursor, 0, stableGeneration, unstableGeneration );
long oldMiddleChild = childAt( readCursor, 1, stableGeneration, unstableGeneration );
long oldRightChild = childAt( readCursor, 2, stableGeneration, unstableGeneration );
assertSiblings( oldLeftChild, oldMiddleChild, oldRightChild );

// WHEN
generationManager.checkpoint();
KEY middleKey = keyAt( 0, INTERNAL ); // Should be located in middle leaf
KEY middleKey = keyAt( oldMiddleChild,0, LEAF ); // Should be located in middle leaf
remove( middleKey, dontCare );
allKeys.remove( middleKey );

// THEN
// old root should still have 2 keys
goTo( readCursor, oldRootId );
assertEquals( 2, keyCount() );

// new root should have only 1 key
Expand Down Expand Up @@ -1593,7 +1596,7 @@ private int keyCount()
return TreeNode.keyCount( readCursor );
}

private void initialize()
void initialize()
{
node.initializeLeaf( cursor, stableGeneration, unstableGeneration );
updateRoot();
Expand Down Expand Up @@ -1659,12 +1662,12 @@ private void printTree() throws IOException
cursor.next( currentPageId );
}

private KEY key( long seed )
KEY key( long seed )
{
return layout.key( seed );
}

private VALUE value( long seed )
VALUE value( long seed )
{
return layout.value( seed );
}
Expand Down Expand Up @@ -1707,7 +1710,7 @@ private void assertSiblingOrderAndPointers( long... children ) throws IOExceptio
goTo( readCursor, currentPageId );
}

private KEY keyAt( long nodeId, int pos, TreeNode.Type type )
KEY keyAt( long nodeId, int pos, TreeNode.Type type )
{
KEY readKey = layout.newKey();
long prevId = readCursor.getCurrentPageId();
Expand All @@ -1732,7 +1735,7 @@ private VALUE valueAt( int pos )
return node.valueAt( readCursor, layout.newValue(), pos );
}

private void insert( KEY key, VALUE value ) throws IOException
void insert( KEY key, VALUE value ) throws IOException
{
insert( key, value, overwrite() );
}
Expand Down
Expand Up @@ -33,10 +33,15 @@ public RawBytes newKey()

@Override
public RawBytes copyKey( RawBytes rawBytes, RawBytes into )
{
return copyKey( rawBytes, into, rawBytes.bytes.length );
}

private RawBytes copyKey( RawBytes rawBytes, RawBytes into, int length )
{
byte[] src = rawBytes.bytes;
byte[] target = new byte[src.length];
System.arraycopy( src, 0, target, 0, src.length );
byte[] target = new byte[length];
System.arraycopy( src, 0, target, 0, length );
into.bytes = target;
return into;
}
Expand Down Expand Up @@ -99,6 +104,13 @@ public boolean fixedSize()
return false;
}

@Override
public void minimalSplitter( RawBytes left, RawBytes right, RawBytes into )
{
// Minimal splitter will always be the first 8B
copyKey( right, into, Long.BYTES );
}

@Override
public long identifier()
{
Expand Down
Expand Up @@ -82,6 +82,23 @@ public boolean fixedSize()
return false;
}

@Override
public void minimalSplitter( StringSchemaKey left, StringSchemaKey right, StringSchemaKey into )
{
int maxLength = Math.min( left.bytesLength, right.bytesLength );
int targetLength = 0;
for ( ; targetLength < maxLength; targetLength++ )
{
if ( left.bytes[targetLength] != right.bytes[targetLength] )
{
// Convert to length from array index
targetLength++;
break;
}
}
into.copyFrom( right, targetLength );
}

@Override
public String toString()
{
Expand Down
Expand Up @@ -189,8 +189,13 @@ public void writeString( char value )

void copyFrom( StringSchemaKey key )
{
setBytesLength( key.bytesLength );
System.arraycopy( key.bytes, 0, bytes, 0, key.bytesLength );
copyFrom( key, key.bytesLength );
}

void copyFrom( StringSchemaKey key, int targetLength )
{
setBytesLength( targetLength );
System.arraycopy( key.bytes, 0, bytes, 0, targetLength );
setEntityId( key.getEntityId() );
setCompareId( key.getCompareId() );
}
Expand Down
Expand Up @@ -144,8 +144,8 @@ private void assertSameHits( Hit<KEY, VALUE>[] expectedHits, Hit<KEY, VALUE>[] a
{
Arrays.sort( expectedHits, comparator );
Arrays.sort( actualHits, comparator );
assertEquals( format( "Array length differ%nExpected:%s%nActual:%s",
Arrays.toString( expectedHits ), Arrays.toString( actualHits ) ),
assertEquals( format( "Array length differ%nExpected:%d, Actual:%d",
expectedHits.length, actualHits.length ),
expectedHits.length, actualHits.length );

for ( int i = 0; i < expectedHits.length; i++ )
Expand Down
Expand Up @@ -28,6 +28,7 @@
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.graphdb.schema.IndexCreator;
import org.neo4j.internal.kernel.api.IndexOrder;
import org.neo4j.internal.kernel.api.IndexQuery;
Expand Down Expand Up @@ -56,7 +57,8 @@ public class NativeStringIndexingIT
private static final String KEY2 = "key2";

@Rule
public final DatabaseRule db = new EmbeddedDatabaseRule();
public final DatabaseRule db = new EmbeddedDatabaseRule()
.withSetting( GraphDatabaseSettings.default_schema_provider, GraphDatabaseSettings.SchemaIndex.NATIVE20.providerName() );
@Rule
public final RandomRule random = new RandomRule();

Expand Down

0 comments on commit e77d7ec

Please sign in to comment.