diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java b/community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java index e058a2ac81e8a..626fa76c79ad8 100644 --- a/community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java +++ b/community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java @@ -417,12 +417,14 @@ private void checkOutOfBounds( PageCursor cursor ) private class SingleIndexWriter implements IndexWriter { private final InternalTreeLogic treeLogic; + private final StructurePropagation structurePropagation; private PageCursor cursor; private IndexWriter.Options options; private final byte[] tmp = new byte[0]; SingleIndexWriter( InternalTreeLogic treeLogic ) { + this.structurePropagation = new StructurePropagation<>( layout.newKey() ); this.treeLogic = treeLogic; } @@ -444,11 +446,12 @@ public void merge( KEY key, VALUE value, ValueMerger valueMerger ) throws { goToRoot( cursor ); - SplitResult split = treeLogic.insert( cursor, key, value, valueMerger, options, + treeLogic.insert( cursor, structurePropagation, key, value, valueMerger, options, stableGeneration, unstableGeneration ); - if ( split != null ) + if ( structurePropagation.hasSplit ) { + structurePropagation.hasSplit = false; // New root long newRootId = acquireNewId(); if ( !cursor.next( newRootId ) ) @@ -457,12 +460,17 @@ public void merge( KEY key, VALUE value, ValueMerger valueMerger ) throws } bTreeNode.initializeInternal( cursor, stableGeneration, unstableGeneration ); - bTreeNode.insertKeyAt( cursor, split.primKey, 0, 0, tmp ); + bTreeNode.insertKeyAt( cursor, structurePropagation.primKey, 0, 0, tmp ); bTreeNode.setKeyCount( cursor, 1 ); - bTreeNode.setChildAt( cursor, split.left, 0, stableGeneration, unstableGeneration ); - bTreeNode.setChildAt( cursor, split.right, 1, stableGeneration, unstableGeneration ); + bTreeNode.setChildAt( cursor, structurePropagation.left, 0, stableGeneration, unstableGeneration ); + bTreeNode.setChildAt( cursor, structurePropagation.right, 1, stableGeneration, unstableGeneration ); rootId = newRootId; } + else if ( structurePropagation.hasNewGen ) + { + rootId = structurePropagation.left; + } + structurePropagation.hasNewGen = false; checkOutOfBounds( cursor ); } @@ -472,7 +480,13 @@ public VALUE remove( KEY key ) throws IOException { goToRoot( cursor ); - VALUE result = treeLogic.remove( cursor, key, layout.newValue(), stableGeneration, unstableGeneration ); + VALUE result = treeLogic.remove( cursor, structurePropagation, key, layout.newValue(), + stableGeneration, unstableGeneration ); + if ( structurePropagation.hasNewGen ) + { + structurePropagation.hasNewGen = false; + rootId = structurePropagation.left; + } checkOutOfBounds( cursor ); return result; diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/InternalTreeLogic.java b/community/index/src/main/java/org/neo4j/index/gbptree/InternalTreeLogic.java index 10f56f337f9a4..2fba9f41471fa 100644 --- a/community/index/src/main/java/org/neo4j/index/gbptree/InternalTreeLogic.java +++ b/community/index/src/main/java/org/neo4j/index/gbptree/InternalTreeLogic.java @@ -27,7 +27,6 @@ import static java.lang.Integer.max; import static java.lang.Integer.min; - import static org.neo4j.index.gbptree.KeySearch.isHit; import static org.neo4j.index.gbptree.KeySearch.positionOf; import static org.neo4j.index.gbptree.KeySearch.search; @@ -58,7 +57,7 @@ * During this split, reader could see this state: * L[E1,E2,E4,E5] -> R[E3,E4,E5] * ^ ^ x x - * Reader will need to ignore lower keys already seen (TODO how to do in non-unique index?) + * Reader will need to ignore lower keys already seen, assuming unique keys * * SCENARIO2 (new key ends up in left leaf) *
@@ -82,8 +81,6 @@ class InternalTreeLogic
     private final byte[] tmpForKeys;
     private final byte[] tmpForValues;
     private final byte[] tmpForChildren;
-    private final SplitResult internalSplitResult = new SplitResult<>();
-    private final SplitResult leafSplitResult = new SplitResult<>();
     private final Layout layout;
     private final KEY readKey;
     private final VALUE readValue;
@@ -97,30 +94,49 @@ class InternalTreeLogic
         this.tmpForKeys = new byte[(maxKeyCount + 1) * layout.keySize()];
         this.tmpForValues = new byte[(maxKeyCount + 1) * layout.valueSize()];
         this.tmpForChildren = new byte[(maxKeyCount + 2) * bTreeNode.childSize()];
-        this.internalSplitResult.primKey = layout.newKey();
-        this.leafSplitResult.primKey = layout.newKey();
         this.readKey = layout.newKey();
         this.readValue = layout.newValue();
     }
 
     /**
+     * Insert {@code key} and associate it with {@code value} if {@code key} does not already exist in
+     * tree.
+     * 

+ * If {@code key} already exists in tree, {@code valueMerger} will be used to decide how to merge existing value + * with {@code value}. + *

+ * Insert may cause structural changes in the tree in form of splits and or new generation of nodes being created. + * Note that a split in a leaf can propagate all the way up to root node. + *

+ * Structural changes in tree that need to propagate to the level above will be reported through the provided + * {@link StructurePropagation} by overwriting state. This is safe because structure changes happens one level + * at the time. + * {@link StructurePropagation} is provided from outside to minimize garbage. + *

+ * When this method returns, {@code structurePropagation} will be populated with information about split or new + * gen version of root. This needs to be handled by caller. + *

* Leaves cursor at same page as when called. No guarantees on offset. - * @param cursor {@link org.neo4j.io.pagecache.PageCursor} pinned to page where insertion is to be done. - * @param key key to be inserted - * @param value value to be associated with key - * @param valueMerger {@link ValueMerger} for deciding what to do with existing keys - * @param options options for this insert + * + * @param cursor {@link PageCursor} pinned to page where insertion is to be done. + * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param key key to be inserted + * @param value value to be associated with key + * @param valueMerger {@link ValueMerger} for deciding what to do with existing keys + * @param options options for this insert * @param stableGeneration stable generation, i.e. generations <= this generation are considered stable. * @param unstableGeneration unstable generation, i.e. generation which is under development right now. - * @return {@link SplitResult} from insert to be used caller. - * @throws IOException on cursor failure + * @throws IOException on cursor failure */ - public SplitResult insert( PageCursor cursor, KEY key, VALUE value, ValueMerger valueMerger, - IndexWriter.Options options, long stableGeneration, long unstableGeneration ) throws IOException + void insert( PageCursor cursor, StructurePropagation structurePropagation, KEY key, VALUE value, + ValueMerger valueMerger, IndexWriter.Options options, + long stableGeneration, long unstableGeneration ) throws IOException { if ( bTreeNode.isLeaf( cursor ) ) { - return insertInLeaf( cursor, key, value, valueMerger, options, stableGeneration, unstableGeneration ); + insertInLeaf( cursor, structurePropagation, key, value, valueMerger, options, + stableGeneration, unstableGeneration ); + return; } int keyCount = bTreeNode.keyCount( cursor ); @@ -135,40 +151,45 @@ public SplitResult insert( PageCursor cursor, KEY key, VALUE value, ValueMe long childId = bTreeNode.childAt( cursor, pos, stableGeneration, unstableGeneration ); PointerChecking.checkChildPointer( childId ); - goTo( cursor, childId ); + goTo( cursor, childId, stableGeneration, unstableGeneration ); - SplitResult split = insert( cursor, key, value, valueMerger, options, stableGeneration, - unstableGeneration ); + insert( cursor, structurePropagation, key, value, valueMerger, options, stableGeneration, unstableGeneration ); - goTo( cursor, currentId ); + goTo( cursor, currentId, stableGeneration, unstableGeneration ); - if ( split != null ) + if ( structurePropagation.hasNewGen ) { - return insertInInternal( cursor, currentId, keyCount, split.primKey, split.right, options, - stableGeneration, unstableGeneration ); + structurePropagation.hasNewGen = false; + bTreeNode.setChildAt( cursor, structurePropagation.left, pos, stableGeneration, unstableGeneration ); + } + if ( structurePropagation.hasSplit ) + { + structurePropagation.hasSplit = false; + insertInInternal( cursor, structurePropagation, currentId, keyCount, structurePropagation.primKey, + structurePropagation.right, options, stableGeneration, unstableGeneration ); } - return null; } /** * Leaves cursor at same page as when called. No guarantees on offset. - * + *

* Insertion in internal is always triggered by a split in child. * The result of a split is a primary key that is sent upwards in the b+tree and the newly created right child. * - * @param cursor {@link org.neo4j.io.pagecache.PageCursor} pinned to page containing internal node, - * current node - * @param nodeId id of current node - * @param keyCount the key count of current node - * @param primKey the primary key to be inserted - * @param rightChild the right child of primKey - * @return {@link SplitResult} from insert to be used caller. - * @throws IOException on cursor failure + * @param cursor {@link PageCursor} pinned to page containing internal node, + * current node + * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param nodeId id of current node + * @param keyCount the key count of current node + * @param primKey the primary key to be inserted + * @param rightChild the right child of primKey + * @throws IOException on cursor failure */ - private SplitResult insertInInternal( PageCursor cursor, long nodeId, int keyCount, - KEY primKey, long rightChild, IndexWriter.Options options, long stableGeneration, long unstableGeneration ) - throws IOException + private void insertInInternal( PageCursor cursor, StructurePropagation structurePropagation, + long nodeId, int keyCount, KEY primKey, long rightChild, IndexWriter.Options options, + long stableGeneration, long unstableGeneration ) throws IOException { + createUnstableVersionIfNeeded( cursor, structurePropagation, stableGeneration, unstableGeneration ); if ( keyCount < bTreeNode.internalMaxKeyCount() ) { // No overflow @@ -183,34 +204,32 @@ private SplitResult insertInInternal( PageCursor cursor, long nodeId, int k // Increase key count bTreeNode.setKeyCount( cursor, keyCount + 1 ); - return null; + return; } // Overflow - return splitInternal( cursor, nodeId, primKey, rightChild, keyCount, options, + splitInternal( cursor, structurePropagation, nodeId, primKey, rightChild, keyCount, options, stableGeneration, unstableGeneration ); } /** - * * Leaves cursor at same page as when called. No guarantees on offset. - * + *

* Split in internal node caused by an insertion of primKey and newRightChild * - * @param cursor {@link org.neo4j.io.pagecache.PageCursor} pinned to page containing internal node, fullNode. - * @param fullNode id of node to be split. - * @param primKey primary key to be inserted, causing the split + * @param cursor {@link PageCursor} pinned to page containing internal node, fullNode. + * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param fullNode id of node to be split. + * @param primKey primary key to be inserted, causing the split * @param newRightChild right child of primKey - * @param keyCount key count for fullNode - * @return {@link SplitResult} from insert to be used caller. - * @throws IOException on cursor failure + * @param keyCount key count for fullNode + * @throws IOException on cursor failure */ - private SplitResult splitInternal( PageCursor cursor, long fullNode, KEY primKey, long newRightChild, - int keyCount, IndexWriter.Options options, long stableGeneration, long unstableGeneration ) - throws IOException + private void splitInternal( PageCursor cursor, StructurePropagation structurePropagation, + long fullNode, KEY primKey, long newRightChild, int keyCount, IndexWriter.Options options, + long stableGeneration, long unstableGeneration ) throws IOException { long current = cursor.getCurrentPageId(); - long newLeft = current; long oldRight = bTreeNode.rightSibling( cursor, stableGeneration, unstableGeneration ); long newRight = idProvider.acquireNewId(); @@ -227,16 +246,16 @@ private SplitResult splitInternal( PageCursor cursor, long fullNode, KEY pr int keyCountAfterInsert = keyCount + 1; int middlePos = middle( keyCountAfterInsert, options.splitRetentionFactor() ); - SplitResult split = internalSplitResult; - split.left = newLeft; - split.right = newRight; + structurePropagation.hasSplit = true; + structurePropagation.left = current; + structurePropagation.right = newRight; { // Update new right // NOTE: don't include middle - goTo( cursor, newRight ); + goTo( cursor, newRight, stableGeneration, unstableGeneration ); bTreeNode.initializeInternal( cursor, stableGeneration, unstableGeneration ); bTreeNode.setRightSibling( cursor, oldRight, stableGeneration, unstableGeneration ); - bTreeNode.setLeftSibling( cursor, newLeft, stableGeneration, unstableGeneration ); + bTreeNode.setLeftSibling( cursor, current, stableGeneration, unstableGeneration ); bTreeNode.writeKeys( cursor, tmpForKeys, middlePos + 1, 0, keyCountAfterInsert - (middlePos + 1) ); bTreeNode.writeChildren( cursor, tmpForChildren, middlePos + 1, 0, keyCountAfterInsert - middlePos /*there's one more child than key to copy*/ ); @@ -247,19 +266,19 @@ private SplitResult splitInternal( PageCursor cursor, long fullNode, KEY pr PageCursor buffer = ByteArrayPageCursor.wrap( tmpForKeys, middleOffset, bTreeNode.keySize() ); // Populate split result - layout.readKey( buffer, split.primKey ); + layout.readKey( buffer, structurePropagation.primKey ); } // Update old right with new left sibling (newRight) if ( oldRight != TreeNode.NO_NODE_FLAG ) { - goTo( cursor, oldRight ); + goTo( cursor, oldRight, stableGeneration, unstableGeneration ); bTreeNode.setLeftSibling( cursor, newRight, stableGeneration, unstableGeneration ); } // Update left node // Move cursor back to left - goTo( cursor, fullNode ); + goTo( cursor, fullNode, stableGeneration, unstableGeneration ); bTreeNode.setKeyCount( cursor, middlePos ); if ( pos < middlePos ) { @@ -274,8 +293,6 @@ private SplitResult splitInternal( PageCursor cursor, long fullNode, KEY pr } bTreeNode.setRightSibling( cursor, newRight, stableGeneration, unstableGeneration ); - - return split; } private static int middle( int keyCountAfterInsert, float splitLeftChildSize ) @@ -288,19 +305,20 @@ private static int middle( int keyCountAfterInsert, float splitLeftChildSize ) /** * Leaves cursor at same page as when called. No guarantees on offset. - * + *

* Split in leaf node caused by an insertion of key and value * - * @param cursor {@link org.neo4j.io.pagecache.PageCursor} pinned to page containing leaf node targeted for - * insertion. - * @param key key to be inserted - * @param value value to be associated with key - * @param valueMerger {@link ValueMerger} for deciding what to do with existing keys - * @param options options for this insert - * @return {@link SplitResult} from insert to be used caller. - * @throws IOException on cursor failure + * @param cursor {@link PageCursor} pinned to page containing leaf node targeted for + * insertion. + * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param key key to be inserted + * @param value value to be associated with key + * @param valueMerger {@link ValueMerger} for deciding what to do with existing keys + * @param options options for this insert + * @throws IOException on cursor failure */ - private SplitResult insertInLeaf( PageCursor cursor, KEY key, VALUE value, ValueMerger valueMerger, + private void insertInLeaf( PageCursor cursor, StructurePropagation structurePropagation, + KEY key, VALUE value, ValueMerger valueMerger, IndexWriter.Options options, long stableGeneration, long unstableGeneration ) throws IOException { int keyCount = bTreeNode.keyCount( cursor ); @@ -313,12 +331,15 @@ private SplitResult insertInLeaf( PageCursor cursor, KEY key, VALUE value, VALUE mergedValue = valueMerger.merge( readValue, value ); if ( mergedValue != null ) { + createUnstableVersionIfNeeded( cursor, structurePropagation, stableGeneration, unstableGeneration ); // simple, just write the merged value right in there bTreeNode.setValueAt( cursor, mergedValue, pos ); } - return null; // No split has occurred + return; // No split has occurred } + createUnstableVersionIfNeeded( cursor, structurePropagation, stableGeneration, unstableGeneration ); + if ( keyCount < bTreeNode.leafMaxKeyCount() ) { // No overflow, insert key and value @@ -326,26 +347,27 @@ private SplitResult insertInLeaf( PageCursor cursor, KEY key, VALUE value, bTreeNode.insertValueAt( cursor, value, pos, keyCount, tmpForValues ); bTreeNode.setKeyCount( cursor, keyCount + 1 ); - return null; // No split has occurred + return; // No split has occurred } - // Overflow, split leaf - return splitLeaf( cursor, key, value, keyCount, options, stableGeneration, unstableGeneration ); + splitLeaf( cursor, structurePropagation, key, value, keyCount, options, stableGeneration, unstableGeneration ); } /** * Leaves cursor at same page as when called. No guarantees on offset. * Cursor is expected to be pointing to full leaf. - * @param cursor cursor pointing into full (left) leaf that should be split in two. - * @param newKey key to be inserted - * @param newValue value to be inserted (in association with key) - * @param keyCount number of keys in this leaf (it was already read anyway) - * @param options options for this insert - * @return {@link SplitResult} with necessary information to inform parent - * @throws IOException if cursor.next( newRight ) fails + * + * @param cursor cursor pointing into full (left) leaf that should be split in two. + * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param newKey key to be inserted + * @param newValue value to be inserted (in association with key) + * @param keyCount number of keys in this leaf (it was already read anyway) + * @param options options for this insert + * @throws IOException if cursor.next( newRight ) fails */ - private SplitResult splitLeaf( PageCursor cursor, KEY newKey, VALUE newValue, int keyCount, - IndexWriter.Options options, long stableGeneration, long unstableGeneration ) throws IOException + private void splitLeaf( PageCursor cursor, StructurePropagation structurePropagation, + KEY newKey, VALUE newValue, int keyCount, IndexWriter.Options options, + long stableGeneration, long unstableGeneration ) throws IOException { // To avoid moving cursor between pages we do all operations on left node first. // Save data that needs transferring and then add it to right node. @@ -363,7 +385,6 @@ private SplitResult splitLeaf( PageCursor cursor, KEY newKey, VALUE newValu // long current = cursor.getCurrentPageId(); - long newLeft = current; long oldRight = bTreeNode.rightSibling( cursor, stableGeneration, unstableGeneration ); long newRight = idProvider.acquireNewId(); @@ -425,24 +446,24 @@ private SplitResult splitLeaf( PageCursor cursor, KEY newKey, VALUE newValu // We now have everything we need to start working on newRight // and everything that needs to be updated in left has been so. - SplitResult split = leafSplitResult; - split.left = newLeft; - split.right = newRight; + structurePropagation.hasSplit = true; + structurePropagation.left = current; + structurePropagation.right = newRight; if ( middlePos == pos ) { - layout.copyKey( newKey, split.primKey ); + layout.copyKey( newKey, structurePropagation.primKey ); } else { - bTreeNode.keyAt( cursor, split.primKey, pos < middlePos ? middlePos - 1 : middlePos ); + bTreeNode.keyAt( cursor, structurePropagation.primKey, pos < middlePos ? middlePos - 1 : middlePos ); } { // Update new right - goTo( cursor, newRight ); + goTo( cursor, newRight, stableGeneration, unstableGeneration ); bTreeNode.initializeLeaf( cursor, stableGeneration, unstableGeneration ); bTreeNode.setRightSibling( cursor, oldRight, stableGeneration, unstableGeneration ); - bTreeNode.setLeftSibling( cursor, newLeft, stableGeneration, unstableGeneration ); + bTreeNode.setLeftSibling( cursor, current, stableGeneration, unstableGeneration ); bTreeNode.writeKeys( cursor, tmpForKeys, middlePos, 0, keyCountAfterInsert - middlePos ); bTreeNode.writeValues( cursor, tmpForValues, middlePos, 0, keyCountAfterInsert - middlePos ); bTreeNode.setKeyCount( cursor, keyCountAfterInsert - middlePos ); @@ -451,12 +472,12 @@ private SplitResult splitLeaf( PageCursor cursor, KEY newKey, VALUE newValu // Update old right with new left sibling (newRight) if ( oldRight != TreeNode.NO_NODE_FLAG ) { - goTo( cursor, oldRight ); + goTo( cursor, oldRight, stableGeneration, unstableGeneration ); bTreeNode.setLeftSibling( cursor, newRight, stableGeneration, unstableGeneration ); } // Update left child - goTo( cursor, current ); + goTo( cursor, current, stableGeneration, unstableGeneration ); bTreeNode.setKeyCount( cursor, middlePos ); // If pos < middle. Write shifted values to left node. Else, don't write anything. if ( pos < middlePos ) @@ -465,16 +486,37 @@ private SplitResult splitLeaf( PageCursor cursor, KEY newKey, VALUE newValu bTreeNode.writeValues( cursor, tmpForValues, pos, pos, middlePos - pos ); } bTreeNode.setRightSibling( cursor, newRight, stableGeneration, unstableGeneration ); - - return split; } - public VALUE remove( PageCursor cursor, KEY key, VALUE into, long stableGeneration, long unstableGeneration ) - throws IOException + /** + * Remove given {@code key} and associated value from tree if it exists. The removed value will be stored in + * provided {@code into} which will be returned for convenience. + *

+ * If the given {@code key} does not exist in tree, return {@code null}. + *

+ * Structural changes in tree that need to propagate to the level above will be reported through the provided + * {@link StructurePropagation} by overwriting state. This is safe because structure changes happens one level + * at the time. + * {@link StructurePropagation} is provided from outside to minimize garbage. + *

+ * Leaves cursor at same page as when called. No guarantees on offset. + * + * @param cursor {@link PageCursor} pinned to page where remove should traversing tree from. + * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param key key to be removed + * @param into {@code VALUE} instance to write removed value to + * @param stableGeneration stable generation, i.e. generations <= this generation are considered stable. + * @param unstableGeneration unstable generation, i.e. generation which is under development right now. + * @return Provided {@code into}, populated with removed value for convenience if {@code key} was removed. + * Otherwise {@code null}. + * @throws IOException on cursor failure + */ + VALUE remove( PageCursor cursor, StructurePropagation structurePropagation, KEY key, VALUE into, + long stableGeneration, long unstableGeneration ) throws IOException { if ( bTreeNode.isLeaf( cursor ) ) { - return removeFromLeaf( cursor, key, into ); + return removeFromLeaf( cursor, structurePropagation, key, into, stableGeneration, unstableGeneration ); } int keyCount = bTreeNode.keyCount( cursor ); @@ -486,16 +528,41 @@ public VALUE remove( PageCursor cursor, KEY key, VALUE into, long stableGenerati } long currentId = cursor.getCurrentPageId(); - goTo( cursor, bTreeNode.childAt( cursor, pos, stableGeneration, unstableGeneration ) ); + long childId = bTreeNode.childAt( cursor, pos, stableGeneration, unstableGeneration ); + goTo( cursor, childId, stableGeneration, unstableGeneration ); - VALUE result = remove( cursor, key, into, stableGeneration, unstableGeneration ); + VALUE result = remove( cursor, structurePropagation, key, into, stableGeneration, unstableGeneration ); - goTo( cursor, currentId ); + goTo( cursor, currentId, stableGeneration, unstableGeneration ); + if ( structurePropagation.hasNewGen ) + { + structurePropagation.hasNewGen = false; + bTreeNode.setChildAt( cursor, structurePropagation.left, pos, stableGeneration, unstableGeneration ); + } return result; } - private VALUE removeFromLeaf( PageCursor cursor, KEY key, VALUE into ) + /** + * Remove given {@code key} and associated value from tree if it exists. The removed value will be stored in + * provided {@code into} which will be returned for convenience. + *

+ * If the given {@code key} does not exist in tree, return {@code null}. + *

+ * Leaves cursor at same page as when called. No guarantees on offset. + * + * @param cursor {@link PageCursor} pinned to page where remove is to be done. + * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param key key to be removed + * @param into {@code VALUE} instance to write removed value to + * @param stableGeneration stable generation, i.e. generations <= this generation are considered stable. + * @param unstableGeneration unstable generation, i.e. generation which is under development right now. + * @return Provided {@code into}, populated with removed value for convenience if {@code key} was removed. + * Otherwise {@code null}. + * @throws IOException on cursor failure + */ + private VALUE removeFromLeaf( PageCursor cursor, StructurePropagation structurePropagation, + KEY key, VALUE into, long stableGeneration, long unstableGeneration ) throws IOException { int keyCount = bTreeNode.keyCount( cursor ); @@ -509,6 +576,8 @@ private VALUE removeFromLeaf( PageCursor cursor, KEY key, VALUE into ) } // Remove key/value + createUnstableVersionIfNeeded( cursor, structurePropagation, stableGeneration, unstableGeneration ); + bTreeNode.removeKeyAt( cursor, pos, keyCount, tmpForKeys ); bTreeNode.valueAt( cursor, into, pos ); bTreeNode.removeValueAt( cursor, pos, keyCount, tmpForValues ); @@ -519,11 +588,95 @@ private VALUE removeFromLeaf( PageCursor cursor, KEY key, VALUE into ) return into; } - private void goTo( PageCursor cursor, long childId ) throws IOException + /** + * Create a new node and copy content from current node (where {@code cursor} sits) if current node is not already + * of {@code unstableGeneration}. + *

+ * Neighbouring nodes' sibling pointers will be updated to point to new node. + *

+ * Current node will be updated with new gen pointer to new node. + *

+ * {@code structurePropagation} will be updated with information about this new node so that it can report to + * level above. + * + * @param cursor {@link PageCursor} pinned to page containing node to potentially create a new version of + * @param structurePropagation {@link StructurePropagation} used to report structure changes between tree levels. + * @param stableGeneration stable generation, i.e. generations <= this generation are considered stable. + * @param unstableGeneration unstable generation, i.e. generation which is under development right now. + * @throws IOException on cursor failure + */ + private void createUnstableVersionIfNeeded( PageCursor cursor, StructurePropagation structurePropagation, + long stableGeneration, long unstableGeneration ) throws IOException + { + long nodeGen = bTreeNode.gen( cursor ); + if ( nodeGen == unstableGeneration ) + { + // Don't copy + return; + } + + // Do copy + long newGenId = idProvider.acquireNewId(); + try ( PageCursor newGenCursor = cursor.openLinkedCursor( newGenId ) ) + { + cursor.copyTo( 0, newGenCursor, 0, cursor.getCurrentPageSize() ); + bTreeNode.setGen( newGenCursor, unstableGeneration ); + } + + // Insert new gen pointer in old stable version + // (stableNode) + // | + // [newgen] + // | + // v + // (newUnstableNode) + bTreeNode.setNewGen( cursor, newGenId, stableGeneration, unstableGeneration ); + + // Redirect sibling pointers + // ---------[leftSibling]---------(stableNode)----------[rightSibling]--------- + // | | | + // | [newgen] | + // | | | + // v v v + // (leftSiblingOfStableNode) -[rightSibling]-> (newUnstableNode) <-[leftSibling]- (rightSiblingOfStableNode) + long leftSibling = bTreeNode.leftSibling( cursor, stableGeneration, unstableGeneration ); + long rightSibling = bTreeNode.rightSibling( cursor, stableGeneration, unstableGeneration ); + if ( leftSibling != TreeNode.NO_NODE_FLAG ) + { + goTo( cursor, leftSibling, stableGeneration, unstableGeneration ); + bTreeNode.setRightSibling( cursor, newGenId, stableGeneration, unstableGeneration ); + } + if ( rightSibling != TreeNode.NO_NODE_FLAG ) + { + goTo( cursor, rightSibling, stableGeneration, unstableGeneration ); + bTreeNode.setLeftSibling( cursor, newGenId, stableGeneration, unstableGeneration ); + } + + // Leave cursor at new tree node + goTo( cursor, newGenId, stableGeneration, unstableGeneration ); + + // Propagate structure change + structurePropagation.hasNewGen = true; + structurePropagation.left = newGenId; + } + + private void goTo( PageCursor cursor, long childId, long stableGeneration, long unstableGeneration ) + throws IOException { if ( !cursor.next( childId ) ) { throw new IllegalStateException( "Could not go to " + childId ); } + verifyGen( cursor, stableGeneration, unstableGeneration ); + } + + private void verifyGen( PageCursor cursor, long stableGeneration, long unstableGeneration ) + { + long gen = bTreeNode.gen( cursor ); + if ( ( gen > stableGeneration && gen < unstableGeneration ) || gen > unstableGeneration ) + { + throw new IllegalStateException( "Reached a node with generation=" + gen + + ", stableGeneration=" + stableGeneration + ", unstableGeneration=" + unstableGeneration ); + } } } diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/SplitResult.java b/community/index/src/main/java/org/neo4j/index/gbptree/StructurePropagation.java similarity index 85% rename from community/index/src/main/java/org/neo4j/index/gbptree/SplitResult.java rename to community/index/src/main/java/org/neo4j/index/gbptree/StructurePropagation.java index 347900b39ab49..71b3d579fa1de 100644 --- a/community/index/src/main/java/org/neo4j/index/gbptree/SplitResult.java +++ b/community/index/src/main/java/org/neo4j/index/gbptree/StructurePropagation.java @@ -25,9 +25,16 @@ * * @param type of key. */ -class SplitResult +class StructurePropagation { - KEY primKey; + boolean hasNewGen; + boolean hasSplit; + final KEY primKey; long left; long right; + + StructurePropagation( KEY primKey ) + { + this.primKey = primKey; + } } diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/TreeNode.java b/community/index/src/main/java/org/neo4j/index/gbptree/TreeNode.java index 6d1ca8d0a1e91..e32d2ee75ced1 100644 --- a/community/index/src/main/java/org/neo4j/index/gbptree/TreeNode.java +++ b/community/index/src/main/java/org/neo4j/index/gbptree/TreeNode.java @@ -187,10 +187,10 @@ void setLeftSibling( PageCursor cursor, long leftSiblingId, long stableGeneratio GenSafePointerPair.write( cursor, leftSiblingId, stableGeneration, unstableGeneration ); } - void setNewGen( PageCursor cursor, long pageId, long stableGeneration, long unstableGeneration ) + void setNewGen( PageCursor cursor, long newGenId, long stableGeneration, long unstableGeneration ) { cursor.setOffset( BYTE_POS_NEWGEN ); - GenSafePointerPair.write( cursor, pageId, stableGeneration, unstableGeneration ); + GenSafePointerPair.write( cursor, newGenId, stableGeneration, unstableGeneration ); } // BODY METHODS diff --git a/community/index/src/test/java/org/neo4j/index/gbptree/InternalTreeLogicTest.java b/community/index/src/test/java/org/neo4j/index/gbptree/InternalTreeLogicTest.java index 0d7d19a29b75f..56ba705cf4dc1 100644 --- a/community/index/src/test/java/org/neo4j/index/gbptree/InternalTreeLogicTest.java +++ b/community/index/src/test/java/org/neo4j/index/gbptree/InternalTreeLogicTest.java @@ -32,7 +32,7 @@ import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -63,6 +63,7 @@ public class InternalTreeLogicTest private final MutableLong readKey = new MutableLong(); private final MutableLong readValue = new MutableLong(); private final byte[] tmp = new byte[pageSize]; + private final StructurePropagation structurePropagation = new StructurePropagation<>( layout.newKey() ); @Rule public RandomRule random = new RandomRule(); @@ -149,7 +150,8 @@ public void modifierMustSplitWhenInsertingMiddleOfFullLeaf() throws Exception // then long middle = maxKeyCount; - assertNotNull( insert( middle, middle ) ); + insert( middle, middle ); + assertTrue( structurePropagation.hasSplit ); } @Test @@ -159,12 +161,14 @@ public void modifierMustSplitWhenInsertingLastInFullLeaf() throws Exception long key = 0; while ( key < maxKeyCount ) { - assertNull( insert( key, key ) ); + insert( key, key ); + assertFalse( structurePropagation.hasSplit ); key++; } // then - assertNotNull( insert( key, key ) ); // Should cause a split + insert( key, key ); + assertTrue( structurePropagation.hasSplit ); // Should cause a split } @Test @@ -174,11 +178,13 @@ public void modifierMustSplitWhenInsertingFirstInFullLeaf() throws Exception for ( int i = 0; i < maxKeyCount; i++ ) { long key = i + 1; - assertNull( insert( key, key ) ); + insert( key, key ); + assertFalse( structurePropagation.hasSplit ); } // then - assertNotNull( insert( 0L, 0L ) ); + insert( 0L, 0L ); + assertTrue( structurePropagation.hasSplit ); } @Test @@ -189,12 +195,14 @@ public void modifierMustLeaveCursorOnSamePageAfterSplitInLeaf() throws Exception long key = 0; while ( key < maxKeyCount ) { - assertNull( insert( key, key ) ); + insert( key, key ); + assertFalse( structurePropagation.hasSplit ); key++; } // when - assertNotNull( insert( key, key ) ); // Should cause a split + insert( key, key ); + assertTrue( structurePropagation.hasSplit ); // Should cause a split // then assertThat( cursor.getCurrentPageId(), is( pageId ) ); @@ -213,9 +221,9 @@ public void modifierMustUpdatePointersInSiblingsToSplit() throws Exception } // First split - SplitResult split = insert( someLargeNumber - i, i ); + insert( someLargeNumber - i, i ); i++; - newRootFromSplit( split ); + newRootFromSplit( structurePropagation ); // Assert child pointers and sibling pointers are intact after split in root long child0 = node.childAt( cursor, 0, STABLE_GENERATION, UNSTABLE_GENERATION ); @@ -326,21 +334,20 @@ public void modifierMustRemoveLastInFullLeaf() throws Exception public void modifierMustRemoveFromLeftChild() throws Exception { // given - SplitResult split = null; - for ( int i = 0; split == null; i++ ) + for ( int i = 0; !structurePropagation.hasSplit; i++ ) { - split = insert( i, i ); + insert( i, i ); } - long rootId = newRootFromSplit( split ); + long rootId = newRootFromSplit( structurePropagation ); // when - cursor.next( split.left ); + cursor.next( structurePropagation.left ); assertThat( keyAt( 0 ), is( 0L ) ); cursor.next( rootId ); remove( 0, readValue ); // then - cursor.next( split.left ); + cursor.next( structurePropagation.left ); assertThat( keyAt( 0 ), is( 1L ) ); } @@ -348,19 +355,18 @@ public void modifierMustRemoveFromLeftChild() throws Exception public void modifierMustRemoveFromRightChildButNotFromInternalWithHitOnInternalSearch() throws Exception { // given - SplitResult split = null; - for ( int i = 0; split == null; i++ ) + for ( int i = 0; !structurePropagation.hasSplit; i++ ) { - split = insert( i, i ); + insert( i, i ); } - long rootId = newRootFromSplit( split ); + long rootId = newRootFromSplit( structurePropagation ); // when key to remove exists in internal - Long keyToRemove = split.primKey.getValue(); + Long keyToRemove = structurePropagation.primKey.getValue(); assertThat( keyAt( 0 ), is( keyToRemove ) ); // and as first key in right child - cursor.next( split.right ); + cursor.next( structurePropagation.right ); int keyCountInRightChild = node.keyCount( cursor ); assertThat( keyAt( 0 ), is( keyToRemove ) ); @@ -373,7 +379,7 @@ public void modifierMustRemoveFromRightChildButNotFromInternalWithHitOnInternalS assertThat( keyAt( 0 ), is( keyToRemove ) ); // but not in right leaf - cursor.next( split.right ); + cursor.next( structurePropagation.right ); assertThat( node.keyCount( cursor ), is( keyCountInRightChild - 1 ) ); assertThat( keyAt( 0 ), is( keyToRemove + 1 ) ); } @@ -382,16 +388,15 @@ public void modifierMustRemoveFromRightChildButNotFromInternalWithHitOnInternalS public void modifierMustLeaveCursorOnInitialPageAfterRemove() throws Exception { // given - SplitResult split = null; - for ( int i = 0; split == null; i++ ) + for ( int i = 0; !structurePropagation.hasSplit; i++ ) { - split = insert( i, i ); + insert( i, i ); } - long rootId = newRootFromSplit( split ); + long rootId = newRootFromSplit( structurePropagation ); // when assertThat( cursor.getCurrentPageId(), is( rootId) ); - remove( split.primKey.getValue(), readValue ); + remove( structurePropagation.primKey.getValue(), readValue ); // then assertThat( cursor.getCurrentPageId(), is( rootId ) ); @@ -422,19 +427,18 @@ public void modifierMustNotRemoveWhenKeyDoesNotExist() throws Exception public void modifierMustNotRemoveWhenKeyOnlyExistInInternal() throws Exception { // given - SplitResult split = null; - for ( int i = 0; split == null; i++ ) + for ( int i = 0; !structurePropagation.hasSplit; i++ ) { - split = insert( i, i ); + insert( i, i ); } - long rootId = newRootFromSplit( split ); + long rootId = newRootFromSplit( structurePropagation ); // when key to remove exists in internal - Long keyToRemove = split.primKey.getValue(); + Long keyToRemove = structurePropagation.primKey.getValue(); assertThat( keyAt( 0 ), is( keyToRemove ) ); // and as first key in right child - cursor.next( split.right ); + cursor.next( structurePropagation.right ); int keyCountInRightChild = node.keyCount( cursor ); assertThat( keyAt( 0 ), is( keyToRemove ) ); @@ -447,7 +451,7 @@ public void modifierMustNotRemoveWhenKeyOnlyExistInInternal() throws Exception assertThat( keyAt( 0 ), is( keyToRemove ) ); // but not in right leaf - cursor.next( split.right ); + cursor.next( structurePropagation.right ); assertThat( node.keyCount( cursor ), is( keyCountInRightChild - 1 ) ); assertThat( keyAt( 0 ), is( keyToRemove + 1 ) ); @@ -465,10 +469,10 @@ public void modifierMustProduceConsistentTreeWithRandomInserts() throws Exceptio int numberOfEntries = 100_000; for ( int i = 0; i < numberOfEntries; i++ ) { - SplitResult split = insert( random.nextLong(), random.nextLong() ); - if ( split != null ) + insert( random.nextLong(), random.nextLong() ); + if ( structurePropagation.hasSplit ) { - newRootFromSplit( split ); + newRootFromSplit( structurePropagation ); } } @@ -542,12 +546,11 @@ public void shouldMergeValueInRootLeaf() throws Exception public void shouldMergeValueInLeafLeftOfParentKey() throws Exception { // GIVEN - SplitResult split = null; - for ( int i = 0; split == null; i++ ) + for ( int i = 0; !structurePropagation.hasSplit; i++ ) { - split = insert( i, i ); + insert( i, i ); } - newRootFromSplit( split ); + newRootFromSplit( structurePropagation ); // WHEN long key = 1; @@ -556,7 +559,7 @@ public void shouldMergeValueInLeafLeftOfParentKey() throws Exception insert( key, toAdd, ADDER ); // THEN - cursor.next( split.left ); + cursor.next( structurePropagation.left ); int searchResult = KeySearch.search( cursor, node, key( key ), new MutableLong(), node.keyCount( cursor ) ); assertTrue( KeySearch.isHit( searchResult ) ); int pos = KeySearch.positionOf( searchResult ); @@ -569,21 +572,20 @@ public void shouldMergeValueInLeafLeftOfParentKey() throws Exception public void shouldMergeValueInLeafAtParentKey() throws Exception { // GIVEN - SplitResult split = null; - for ( int i = 0; split == null; i++ ) + for ( int i = 0; !structurePropagation.hasSplit; i++ ) { - split = insert( i, i ); + insert( i, i ); } - newRootFromSplit( split ); + newRootFromSplit( structurePropagation ); // WHEN - long key = split.primKey.longValue(); + long key = structurePropagation.primKey.longValue(); long baseValue = key; int toAdd = 5; insert( key, toAdd, ADDER ); // THEN - cursor.next( split.right ); + cursor.next( structurePropagation.right ); int searchResult = KeySearch.search( cursor, node, key( key ), new MutableLong(), node.keyCount( cursor ) ); assertTrue( KeySearch.isHit( searchResult ) ); int pos = KeySearch.positionOf( searchResult ); @@ -601,12 +603,12 @@ public void shouldMergeValueInLeafBetweenTwoParentKeys() throws Exception long firstSplitPrimKey = -1; for ( int i = 0; rootId == -1 || node.keyCount( cursor ) == 1; i++ ) { - SplitResult split = insert( i, i ); - if ( split != null ) + insert( i, i ); + if ( structurePropagation.hasSplit ) { - rootId = newRootFromSplit( split ); - middle = split.right; - firstSplitPrimKey = split.primKey.longValue(); + rootId = newRootFromSplit( structurePropagation ); + middle = structurePropagation.right; + firstSplitPrimKey = structurePropagation.primKey.longValue(); } } @@ -637,8 +639,9 @@ private MutableLong key( long key ) return new MutableLong( key ); } - private long newRootFromSplit( SplitResult split ) throws IOException + private long newRootFromSplit( StructurePropagation split ) throws IOException { + assertTrue( split.hasSplit ); long rootId = id.acquireNewId(); cursor.next( rootId ); node.initializeInternal( cursor, STABLE_GENERATION, UNSTABLE_GENERATION ); @@ -673,24 +676,25 @@ private Long valueAt( int pos ) return node.valueAt( cursor, readValue, pos ).getValue(); } - private SplitResult insert( long key, long value ) throws IOException + private void insert( long key, long value ) throws IOException { - return insert( key, value, overwrite() ); + insert( key, value, overwrite() ); } - private SplitResult insert( long key, long value, ValueMerger valueMerger ) - throws IOException + private void insert( long key, long value, ValueMerger valueMerger ) throws IOException { + structurePropagation.hasSplit = false; + structurePropagation.hasNewGen = false; insertKey.setValue( key ); insertValue.setValue( value ); - return treeLogic.insert( cursor, insertKey, insertValue, valueMerger, DEFAULTS, + treeLogic.insert( cursor, structurePropagation, insertKey, insertValue, valueMerger, DEFAULTS, STABLE_GENERATION, UNSTABLE_GENERATION ); } private MutableLong remove( long key, MutableLong into ) throws IOException { insertKey.setValue( key ); - return treeLogic.remove( cursor, insertKey, into, STABLE_GENERATION, UNSTABLE_GENERATION ); + return treeLogic.remove( cursor, structurePropagation, insertKey, into, STABLE_GENERATION, UNSTABLE_GENERATION ); } private static class SimpleIdProvider implements IdProvider