From 3d4bc3ed95318c038d91c5038e4f36262aedafe7 Mon Sep 17 00:00:00 2001 From: Anton Persson Date: Mon, 16 Jan 2017 15:19:46 +0100 Subject: [PATCH] GBPTree - Rebalance implementation When removing keys from a leaf an underflow can occur. If left sibling and current node combined has more keys than what can fit in a single node a rebalance occurs. A rebalance move keys from left sibling to current node to even out the number of keys between them. This makes the keys more evenly distributed among the leaves. Because we change how the keys are split among the two nodes the keys splitting the ranges further up in the tree need to be replaced. This is communicated through StructurePropagation. A new version of left sibling and/or current node is created as necessary and this is also communicated up the tree trough StructurePropagation. --- .../internal/gbptree/InternalTreeLogic.java | 152 ++++++++++++++++-- .../gbptree/StructurePropagation.java | 15 ++ .../index/internal/gbptree/TreeNode.java | 6 + 3 files changed, 161 insertions(+), 12 deletions(-) diff --git a/community/index/src/main/java/org/neo4j/index/internal/gbptree/InternalTreeLogic.java b/community/index/src/main/java/org/neo4j/index/internal/gbptree/InternalTreeLogic.java index ea6c79fc54a00..425fdb89f01e0 100644 --- a/community/index/src/main/java/org/neo4j/index/internal/gbptree/InternalTreeLogic.java +++ b/community/index/src/main/java/org/neo4j/index/internal/gbptree/InternalTreeLogic.java @@ -405,7 +405,8 @@ private void insertInInternal( PageCursor cursor, StructurePropagation stru KEY primKey, long rightChild, long stableGeneration, long unstableGeneration ) throws IOException { - createUnstableVersionIfNeeded( cursor, structurePropagation, stableGeneration, unstableGeneration ); + createUnstableVersionIfNeeded( cursor, structurePropagation, StructurePropagation.UPDATE_MID_CHILD, + stableGeneration, unstableGeneration ); if ( keyCount < bTreeNode.internalMaxKeyCount() ) { // No overflow @@ -607,14 +608,16 @@ private void insertInLeaf( PageCursor cursor, StructurePropagation structur VALUE mergedValue = valueMerger.merge( readValue, value ); if ( mergedValue != null ) { - createUnstableVersionIfNeeded( cursor, structurePropagation, stableGeneration, unstableGeneration ); + createUnstableVersionIfNeeded( cursor, structurePropagation, StructurePropagation.UPDATE_MID_CHILD, + stableGeneration, unstableGeneration ); // simple, just write the merged value right in there bTreeNode.setValueAt( cursor, mergedValue, pos ); } return; // No split has occurred } - createUnstableVersionIfNeeded( cursor, structurePropagation, stableGeneration, unstableGeneration ); + createUnstableVersionIfNeeded( cursor, structurePropagation, StructurePropagation.UPDATE_MID_CHILD, + stableGeneration, unstableGeneration ); if ( keyCount < bTreeNode.leafMaxKeyCount() ) { @@ -834,7 +837,9 @@ VALUE remove( PageCursor cursor, StructurePropagation structurePropagation, return null; } - while ( structurePropagation.hasMidChildUpdate ) + while ( structurePropagation.hasMidChildUpdate || + structurePropagation.hasLeftKeyReplace || + structurePropagation.hasLeftChildUpdate ) { int pos = levels[currentLevel].childPos; if ( !popLevel( cursor ) ) @@ -849,11 +854,55 @@ VALUE remove( PageCursor cursor, StructurePropagation structurePropagation, bTreeNode.setChildAt( cursor, structurePropagation.midChild, pos, stableGeneration, unstableGeneration ); } + if ( structurePropagation.hasLeftKeyReplace && levels[currentLevel].covers( structurePropagation.leftKey ) ) + { + structurePropagation.hasLeftKeyReplace = false; + replaceKeyInInternal( cursor, structurePropagation, structurePropagation.leftKey, pos - 1, + stableGeneration, unstableGeneration ); + } + if ( structurePropagation.hasLeftChildUpdate ) + { + structurePropagation.hasLeftChildUpdate = false; + if ( pos == 0 ) + { + updateRightMostChildInLeftSibling( cursor, structurePropagation.leftChild, + stableGeneration, unstableGeneration ); + } + else + { + bTreeNode.setChildAt( cursor, structurePropagation.leftChild, pos - 1, + stableGeneration, unstableGeneration ); + } + } } return into; } + private void updateRightMostChildInLeftSibling( PageCursor cursor, long child, long stableGeneration, + long unstableGeneration ) throws IOException + { + long currentPageId = cursor.getCurrentPageId(); + long leftSibling = bTreeNode.leftSibling( cursor, stableGeneration, unstableGeneration ); + // Left sibling is not allowed to be NO_NODE here because that means there is a child node with no parent + PointerChecking.checkPointer( leftSibling, false ); + + bTreeNode.goTo( cursor, "left sibling", leftSibling ); + int keyCount = bTreeNode.keyCount( cursor ); + bTreeNode.setChildAt( cursor, child, keyCount, stableGeneration, unstableGeneration ); + + bTreeNode.goTo( cursor, "back to current from left sibling", currentPageId ); + } + + private void replaceKeyInInternal( PageCursor cursor, StructurePropagation structurePropagation, KEY key, + int keyPosition, long stableGeneration, long unstableGeneration ) throws IOException + { + createUnstableVersionIfNeeded( cursor, structurePropagation, StructurePropagation.UPDATE_MID_CHILD, + stableGeneration, unstableGeneration ); + + bTreeNode.setKeyAt( cursor, key, keyPosition ); + } + /** * Remove given {@code key} and associated value from tree if it exists. The removed value will be stored in * provided {@code into} which will be returned for convenience. @@ -876,7 +925,6 @@ private boolean removeFromLeaf( PageCursor cursor, StructurePropagation str { int keyCount = bTreeNode.keyCount( cursor ); - // No overflow, insert key and value int search = search( cursor, key, readKey, keyCount ); int pos = positionOf( search ); boolean hit = isHit( search ); @@ -885,16 +933,94 @@ private boolean removeFromLeaf( PageCursor cursor, StructurePropagation str return false; } - // Remove key/value - createUnstableVersionIfNeeded( cursor, structurePropagation, stableGeneration, unstableGeneration ); + createUnstableVersionIfNeeded( cursor, structurePropagation, StructurePropagation.UPDATE_MID_CHILD, + stableGeneration, unstableGeneration ); + keyCount = simplyRemoveFromLeaf( cursor, into, keyCount, pos ); + + if ( keyCount < (bTreeNode.leafMaxKeyCount() + 1) / 2 ) + { + // Underflow + underflowInLeaf( cursor, structurePropagation, keyCount, stableGeneration, unstableGeneration ); + } + return true; + } + + private void underflowInLeaf( PageCursor cursor, StructurePropagation structurePropagation, int keyCount, + long stableGeneration, long unstableGeneration ) throws IOException + { + long leftSibling = bTreeNode.leftSibling( cursor, stableGeneration, unstableGeneration ); + PointerChecking.checkPointer( leftSibling, true ); + + if ( TreeNode.isNode( leftSibling ) ) + { + // Go to left sibling and read stuff + try ( PageCursor leftSiblingCursor = cursor.openLinkedCursor( GenSafePointerPair.pointer( leftSibling ) ) ) + { + leftSiblingCursor.next(); + int leftSiblingKeyCount = bTreeNode.keyCount( leftSiblingCursor ); + + if ( keyCount + leftSiblingKeyCount >= bTreeNode.leafMaxKeyCount() ) + { + createUnstableVersionIfNeeded( leftSiblingCursor, structurePropagation, + StructurePropagation.UPDATE_LEFT_CHILD, stableGeneration, unstableGeneration ); + rebalanceLeaf( cursor, leftSiblingCursor, structurePropagation, keyCount, leftSiblingKeyCount ); + } + } + } + } + + private void rebalanceLeaf( PageCursor cursor, PageCursor leftSiblingCursor, + StructurePropagation structurePropagation, int keyCount, int leftSiblingKeyCount ) + { + int totalKeyCount = keyCount + leftSiblingKeyCount; + int keyCountInLeftSiblingAfterRebalance = totalKeyCount / 2; + int numberOfKeysToMove = leftSiblingKeyCount - keyCountInLeftSiblingAfterRebalance; + + // Push keys in right sibling to the right + bTreeNode.insertKeySlotsAt( cursor, 0, numberOfKeysToMove, keyCount ); + bTreeNode.insertValueSlotsAt( cursor, 0, numberOfKeysToMove, keyCount ); + + // Move keys from left sibling to right sibling + int sourceOffsetKey = bTreeNode.keyOffset( keyCountInLeftSiblingAfterRebalance ); + int targetOffsetKey = bTreeNode.keyOffset( 0 ); + int bytesToCopyKey = numberOfKeysToMove * bTreeNode.keySize(); + leftSiblingCursor.copyTo( sourceOffsetKey, cursor, targetOffsetKey, bytesToCopyKey ); + bTreeNode.setKeyCount( cursor, keyCount + numberOfKeysToMove ); + + // Move values from left sibling to right sibling + int sourceOffsetValue = bTreeNode.valueOffset( keyCountInLeftSiblingAfterRebalance ); + int targetOffsetValue = bTreeNode.valueOffset( 0 ); + int bytesToCopyValue = numberOfKeysToMove * bTreeNode.keySize(); + leftSiblingCursor.copyTo( sourceOffsetValue, cursor, targetOffsetValue, bytesToCopyValue ); + bTreeNode.setKeyCount( leftSiblingCursor, leftSiblingKeyCount - numberOfKeysToMove ); + + // Propagate change + structurePropagation.hasLeftKeyReplace = true; + bTreeNode.keyAt( cursor, structurePropagation.leftKey, 0 ); + } + + /** + * Remove key and value on given position and decrement key count. Deleted value is stored in {@code into}. + * Key count after remove is returned. + * + * @param cursor Cursor pinned to node in which to remove from, + * @param into VALUE in which to store removed value + * @param keyCount Key count of node before remove + * @param pos Position to remove from + * @return keyCount after remove + */ + private int simplyRemoveFromLeaf( PageCursor cursor, VALUE into, int keyCount, int pos ) + { + // Remove key/value bTreeNode.removeKeyAt( cursor, pos, keyCount ); bTreeNode.valueAt( cursor, into, pos ); bTreeNode.removeValueAt( cursor, pos, keyCount ); // Decrease key count - bTreeNode.setKeyCount( cursor, keyCount - 1 ); - return true; + int newKeyCount = keyCount - 1; + bTreeNode.setKeyCount( cursor, newKeyCount ); + return newKeyCount; } /** @@ -910,12 +1036,15 @@ private boolean removeFromLeaf( PageCursor cursor, StructurePropagation str * * @param cursor {@link PageCursor} pinned to page containing node to potentially create a new version of * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param structureUpdate {@link StructurePropagation.StructureUpdate} define how to update structurePropagation + * if new unstable version is created * @param stableGeneration stable generation, i.e. generations <= this generation are considered stable. * @param unstableGeneration unstable generation, i.e. generation which is under development right now. * @throws IOException on cursor failure */ private void createUnstableVersionIfNeeded( PageCursor cursor, StructurePropagation structurePropagation, - long stableGeneration, long unstableGeneration ) throws IOException + StructurePropagation.StructureUpdate structureUpdate, long stableGeneration, long unstableGeneration ) + throws IOException { long oldGenId = cursor.getCurrentPageId(); long nodeGen = bTreeNode.gen( cursor ); @@ -970,8 +1099,7 @@ private void createUnstableVersionIfNeeded( PageCursor cursor, StructurePropagat bTreeNode.goTo( cursor, "new gen", newGenId ); // Propagate structure change - structurePropagation.hasMidChildUpdate = true; - structurePropagation.midChild = newGenId; + structureUpdate.update( structurePropagation, newGenId ); idProvider.releaseId( stableGeneration, unstableGeneration, oldGenId ); } diff --git a/community/index/src/main/java/org/neo4j/index/internal/gbptree/StructurePropagation.java b/community/index/src/main/java/org/neo4j/index/internal/gbptree/StructurePropagation.java index 200fc20eae7d9..1498c9e39390c 100644 --- a/community/index/src/main/java/org/neo4j/index/internal/gbptree/StructurePropagation.java +++ b/community/index/src/main/java/org/neo4j/index/internal/gbptree/StructurePropagation.java @@ -85,4 +85,19 @@ void clear() hasRightKeyInsert = false; hasLeftKeyReplace = false; } + + interface StructureUpdate + { + void update( StructurePropagation structurePropagation, long childId ); + } + + static final StructureUpdate UPDATE_LEFT_CHILD = ( sp, childId ) -> { + sp.hasLeftChildUpdate = true; + sp.leftChild = childId; + }; + + static final StructureUpdate UPDATE_MID_CHILD = ( sp, childId ) -> { + sp.hasMidChildUpdate = true; + sp.midChild = childId; + }; } diff --git a/community/index/src/main/java/org/neo4j/index/internal/gbptree/TreeNode.java b/community/index/src/main/java/org/neo4j/index/internal/gbptree/TreeNode.java index daa172cc8a958..4d2005d4ca38e 100644 --- a/community/index/src/main/java/org/neo4j/index/internal/gbptree/TreeNode.java +++ b/community/index/src/main/java/org/neo4j/index/internal/gbptree/TreeNode.java @@ -256,6 +256,12 @@ private void removeSlotAt( PageCursor cursor, int pos, int keyCount, int baseOff } } + void setKeyAt( PageCursor cursor, KEY key, int pos ) + { + cursor.setOffset( keyOffset( pos ) ); + layout.writeKey( cursor, key ); + } + VALUE valueAt( PageCursor cursor, VALUE value, int pos ) { cursor.setOffset( valueOffset( pos ) );