diff --git a/community/common/src/test/java/org/neo4j/test/rule/RandomRule.java b/community/common/src/test/java/org/neo4j/test/rule/RandomRule.java
index 0833caf30010..4834d58f3fbd 100644
--- a/community/common/src/test/java/org/neo4j/test/rule/RandomRule.java
+++ b/community/common/src/test/java/org/neo4j/test/rule/RandomRule.java
@@ -138,6 +138,11 @@ public long nextLong( long n )
return Math.abs( nextLong() ) % n;
}
+ public long nextLong( long origin, long bound )
+ {
+ return nextLong( (bound - origin) + 1L ) + origin;
+ }
+
// ============================
// Methods from Randoms
// ============================
diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/ConsistencyChecker.java b/community/index/src/main/java/org/neo4j/index/gbptree/ConsistencyChecker.java
index f43700d3bb8f..ca12bdf92093 100644
--- a/community/index/src/main/java/org/neo4j/index/gbptree/ConsistencyChecker.java
+++ b/community/index/src/main/java/org/neo4j/index/gbptree/ConsistencyChecker.java
@@ -28,6 +28,8 @@
import static java.lang.String.format;
+import static org.neo4j.index.gbptree.GenSafePointerPair.pointer;
+
/**
*
* Checks:
@@ -57,11 +59,11 @@ class ConsistencyChecker
this.unstableGeneration = unstableGeneration;
}
- public boolean check( PageCursor cursor ) throws IOException
+ public boolean check( PageCursor cursor, long expectedGen ) throws IOException
{
assertOnTreeNode( cursor );
KeyRange openRange = new KeyRange<>( comparator, null, null, layout, null );
- boolean result = checkSubtree( cursor, openRange, 0 );
+ boolean result = checkSubtree( cursor, openRange, expectedGen, 0 );
// Assert that rightmost node on each level has empty right sibling.
rightmostPerLevel.forEach( RightmostInChain::assertLast );
@@ -77,7 +79,8 @@ private void assertOnTreeNode( PageCursor cursor )
}
}
- private boolean checkSubtree( PageCursor cursor, KeyRange range, int level ) throws IOException
+ private boolean checkSubtree( PageCursor cursor, KeyRange range, long expectedGen, int level )
+ throws IOException
{
// check header pointers
assertNoCrashOrBrokenPointerInGSPP(
@@ -89,9 +92,13 @@ private boolean checkSubtree( PageCursor cursor, KeyRange range, int level
long pageId = assertSiblings( cursor, level );
+ checkNewGenPointerGen( cursor );
+
+ assertPointerGenMatchesGen( cursor, expectedGen );
+
if ( node.isInternal( cursor ) )
{
- assertKeyOrderAndSubtrees( cursor, pageId, range, level );
+ assertKeyOrderAndSubtrees( cursor, pageId, range, expectedGen, level );
}
else if ( node.isLeaf( cursor ) )
{
@@ -105,6 +112,34 @@ else if ( node.isLeaf( cursor ) )
return true;
}
+ private void assertPointerGenMatchesGen( PageCursor cursor, long expectedGen )
+ {
+ long nodeGen = node.gen( cursor );
+ assert nodeGen <= expectedGen : "Expected node:" + cursor.getCurrentPageId() + " gen:" + nodeGen +
+ " to be ≤ pointer gen:" + expectedGen;
+ }
+
+ private void checkNewGenPointerGen( PageCursor cursor ) throws IOException
+ {
+ long newGen = node.newGen( cursor, stableGeneration, unstableGeneration );
+ if ( TreeNode.isNode( newGen ) )
+ {
+ System.err.println( "WARNING: we ended up on an old generation " + cursor.getCurrentPageId() +
+ " which had newGen:" + pointer( newGen ) );
+ long newGenGen = node.pointerGen( cursor, newGen );
+ long origin = cursor.getCurrentPageId();
+ node.goTo( cursor, "newGen", newGen );
+ try
+ {
+ assertPointerGenMatchesGen( cursor, newGenGen );
+ }
+ finally
+ {
+ node.goTo( cursor, "back", origin );
+ }
+ }
+ }
+
// Assumption: We traverse the tree from left to right on every level
private long assertSiblings( PageCursor cursor, int level )
{
@@ -119,7 +154,8 @@ private long assertSiblings( PageCursor cursor, int level )
return rightmost.assertNext( cursor );
}
- private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange range, int level )
+ private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange range, long expectedGen,
+ int level )
throws IOException
{
int keyCount = node.keyCount( cursor );
@@ -138,7 +174,8 @@ private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange
}
long child = childAt( cursor, pos );
- node.goTo( cursor, "child at pos " + pos, child, stableGeneration, unstableGeneration );
+ long childGen = node.pointerGen( cursor, child );
+ node.goTo( cursor, "child at pos " + pos, child );
if ( pos == 0 )
{
childRange = range.restrictRight( readKey );
@@ -147,8 +184,8 @@ private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange
{
childRange = range.restrictLeft( prev ).restrictRight( readKey );
}
- checkSubtree( cursor, childRange, level + 1 );
- node.goTo( cursor, "parent", pageId, stableGeneration, unstableGeneration );
+ checkSubtree( cursor, childRange, childGen, level + 1 );
+ node.goTo( cursor, "parent", pageId );
layout.copyKey( readKey, prev );
pos++;
@@ -156,10 +193,11 @@ private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange
// Check last child
long child = childAt( cursor, pos );
- node.goTo( cursor, "child at pos " + pos, child, stableGeneration, unstableGeneration );
+ long childGen = node.pointerGen( cursor, child );
+ node.goTo( cursor, "child at pos " + pos, child );
childRange = range.restrictLeft( prev );
- checkSubtree( cursor, childRange, level + 1 );
- node.goTo( cursor, "parent", pageId, stableGeneration, unstableGeneration );
+ checkSubtree( cursor, childRange, childGen, level + 1 );
+ node.goTo( cursor, "parent", pageId );
}
private long childAt( PageCursor cursor, int pos )
diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java b/community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java
index 6bc964d82bf2..312e75850d11 100644
--- a/community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java
+++ b/community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java
@@ -42,7 +42,6 @@
import org.neo4j.io.pagecache.PagedFile;
import static java.lang.String.format;
-
import static org.neo4j.index.gbptree.Generation.generation;
import static org.neo4j.index.gbptree.Generation.stableGeneration;
import static org.neo4j.index.gbptree.Generation.unstableGeneration;
@@ -134,6 +133,11 @@ default void checkpointCompleted()
}
}
+ interface RootCatchup
+ {
+ long goTo( PageCursor cursor ) throws IOException;
+ }
+
/**
* No-op {@link Monitor}.
*/
@@ -213,10 +217,23 @@ default void checkpointCompleted()
*/
private volatile long generation;
+ /**
+ * Guard for being able to read rootId and rootGen atomically. Read by looping.
+ *
+ * Odd -> being updated
+ * Even -> stable
+ */
+ private volatile int rootUpdateGuard;
+
/**
* Current page id which contains the root of the tree.
*/
- private volatile long rootId = IdSpace.MIN_TREE_NODE_ID;
+ private long rootId = IdSpace.MIN_TREE_NODE_ID;
+
+ /**
+ * Generation of current {@link #rootId}.
+ */
+ private long rootGen;
/**
* Supplier of generation to readers. This supplier will actually very rarely be used, because normally
@@ -226,6 +243,8 @@ default void checkpointCompleted()
*/
private final LongSupplier generationSupplier = () -> generation;
+ private final RootCatchup rootCatchup = this::goToRootAndGetGeneration;
+
/**
* Called on certain events.
*/
@@ -255,6 +274,7 @@ public GBPTree( PageCache pageCache, File indexFile, Layout layout, i
this.indexFile = indexFile;
this.monitor = monitor;
this.generation = Generation.generation( GenSafePointer.MIN_GENERATION, GenSafePointer.MIN_GENERATION + 1 );
+ this.rootGen = Generation.unstableGeneration( generation );
this.layout = layout;
this.pagedFile = openOrCreate( pageCache, indexFile, tentativePageSize, layout );
this.bTreeNode = new TreeNode<>( pageSize, layout );
@@ -343,7 +363,7 @@ private void loadState( PagedFile pagedFile ) throws IOException
Pair states = readStatePages( pagedFile );
TreeState state = TreeStatePair.selectNewestValidState( states );
generation = Generation.generation( state.stableGeneration(), state.unstableGeneration() );
- rootId = state.rootId();
+ setRoot( state.rootId(), state.rootGen() );
long lastId = state.lastId();
long freeListWritePageId = state.freeListWritePageId();
@@ -361,7 +381,7 @@ private void writeState( PagedFile pagedFile ) throws IOException
try ( PageCursor cursor = pagedFile.io( pageToOverwrite, PagedFile.PF_SHARED_WRITE_LOCK ) )
{
PageCursorUtil.goTo( cursor, "state page", pageToOverwrite );
- TreeState.write( cursor, stableGeneration( generation ), unstableGeneration( generation ), rootId,
+ TreeState.write( cursor, stableGeneration( generation ), unstableGeneration( generation ), rootId, rootGen,
freeList.lastId(), freeList.writePageId(), freeList.readPageId(),
freeList.writePos(), freeList.readPos() );
checkOutOfBounds( cursor );
@@ -459,17 +479,36 @@ private PagedFile mapWithCorrectPageSize( PageCache pageCache, File indexFile, P
@Override
public RawCursor,IOException> seek( KEY fromInclusive, KEY toExclusive ) throws IOException
{
- PageCursor cursor = pagedFile.io( rootId, PagedFile.PF_SHARED_READ_LOCK );
KEY key = layout.newKey();
VALUE value = layout.newValue();
long generation = this.generation;
long stableGeneration = stableGeneration( generation );
long unstableGeneration = unstableGeneration( generation );
- goToRoot( cursor, stableGeneration, unstableGeneration );
+
+ PageCursor cursor = pagedFile.io( 0L /*ignored*/, PagedFile.PF_SHARED_READ_LOCK );
+ long rootGen = goToRootAndGetGeneration( cursor );
// Returns cursor which is now initiated with left-most leaf node for the specified range
return new SeekCursor<>( cursor, key, value, bTreeNode, fromInclusive, toExclusive, layout,
- stableGeneration, unstableGeneration, generationSupplier );
+ stableGeneration, unstableGeneration, generationSupplier, rootCatchup, rootGen );
+ }
+
+ private long goToRootAndGetGeneration( PageCursor cursor ) throws IOException
+ {
+ int rootUpdateGuard;
+ long rootId;
+ long rootGen;
+ // Atomically read rootId and rootGen with this little loop
+ do
+ {
+ rootUpdateGuard = this.rootUpdateGuard;
+ rootId = this.rootId;
+ rootGen = this.rootGen;
+ }
+ while ( rootUpdateGuard != this.rootUpdateGuard || rootUpdateGuard % 2 == 1 );
+ long generation = this.generation;
+ goToRoot( cursor, rootId, stableGeneration( generation ), unstableGeneration( generation ) );
+ return rootGen;
}
@Override
@@ -560,7 +599,13 @@ private void releaseWriter()
private void goToRoot( PageCursor cursor, long stableGeneration, long unstableGeneration ) throws IOException
{
- bTreeNode.goTo( cursor, "root", rootId, stableGeneration, unstableGeneration );
+ goToRoot( cursor, rootId, stableGeneration, unstableGeneration );
+ }
+
+ private void goToRoot( PageCursor cursor, long rootId, long stableGeneration, long unstableGeneration )
+ throws IOException
+ {
+ bTreeNode.goTo( cursor, "root", rootId );
}
/**
@@ -579,14 +624,19 @@ public void prepareForRecovery() throws IOException
pagedFile.flushAndForce();
}
- // Utility method
void printTree() throws IOException
+ {
+ printTree( true );
+ }
+
+ // Utility method
+ void printTree( boolean printValues ) throws IOException
{
try ( PageCursor cursor = pagedFile.io( rootId, PagedFile.PF_SHARED_READ_LOCK ) )
{
cursor.next();
TreePrinter.printTree( cursor, bTreeNode, layout,
- stableGeneration( generation ), unstableGeneration( generation ), System.out );
+ stableGeneration( generation ), unstableGeneration( generation ), System.out, printValues );
}
}
@@ -598,7 +648,7 @@ boolean consistencyCheck() throws IOException
cursor.next();
return new ConsistencyChecker<>( bTreeNode, layout,
stableGeneration( generation ), unstableGeneration( generation ) )
- .check( cursor );
+ .check( cursor, rootGen );
}
}
@@ -663,11 +713,11 @@ public void merge( KEY key, VALUE value, ValueMerger valueMerger ) throws
bTreeNode.setKeyCount( cursor, 1 );
bTreeNode.setChildAt( cursor, structurePropagation.left, 0, stableGeneration, unstableGeneration );
bTreeNode.setChildAt( cursor, structurePropagation.right, 1, stableGeneration, unstableGeneration );
- rootId = newRootId;
+ setRoot( newRootId, unstableGeneration );
}
else if ( structurePropagation.hasNewGen )
{
- rootId = structurePropagation.left;
+ setRoot( structurePropagation.left, unstableGeneration );
}
structurePropagation.clear();
@@ -683,7 +733,7 @@ public VALUE remove( KEY key ) throws IOException
stableGeneration, unstableGeneration );
if ( structurePropagation.hasNewGen )
{
- rootId = structurePropagation.left;
+ setRoot( structurePropagation.left, unstableGeneration );
}
structurePropagation.clear();
@@ -704,4 +754,12 @@ public void close() throws IOException
releaseWriter();
}
}
+
+ void setRoot( long newRootId, long generation )
+ {
+ rootUpdateGuard++; // is now odd
+ rootId = newRootId;
+ rootGen = generation;
+ rootUpdateGuard++; // is now even
+ }
}
diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/InternalTreeLogic.java b/community/index/src/main/java/org/neo4j/index/gbptree/InternalTreeLogic.java
index d4e8df20b115..1a8544ebb346 100644
--- a/community/index/src/main/java/org/neo4j/index/gbptree/InternalTreeLogic.java
+++ b/community/index/src/main/java/org/neo4j/index/gbptree/InternalTreeLogic.java
@@ -152,11 +152,11 @@ void insert( PageCursor cursor, StructurePropagation structurePropagation,
long childId = bTreeNode.childAt( cursor, pos, stableGeneration, unstableGeneration );
PointerChecking.checkPointer( childId, false );
- bTreeNode.goTo( cursor, "child", childId, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "child", childId );
insert( cursor, structurePropagation, key, value, valueMerger, options, stableGeneration, unstableGeneration );
- bTreeNode.goTo( cursor, "parent", currentId, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "parent", currentId );
if ( structurePropagation.hasNewGen )
{
@@ -272,13 +272,13 @@ private void splitInternal( PageCursor cursor, StructurePropagation structu
// Update old right with new left sibling (newRight)
if ( TreeNode.isNode( oldRight ) )
{
- bTreeNode.goTo( cursor, "old right sibling", oldRight, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "old right sibling", oldRight );
bTreeNode.setLeftSibling( cursor, newRight, stableGeneration, unstableGeneration );
}
// Update left node
// Move cursor back to left
- bTreeNode.goTo( cursor, "left", current, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "left", current );
bTreeNode.setKeyCount( cursor, middlePos );
if ( pos < middlePos )
{
@@ -473,12 +473,12 @@ private void splitLeaf( PageCursor cursor, StructurePropagation structurePr
// Update old right with new left sibling (newRight)
if ( TreeNode.isNode( oldRight ) )
{
- bTreeNode.goTo( cursor, "old right sibling", oldRight, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "old right sibling", oldRight );
bTreeNode.setLeftSibling( cursor, newRight, stableGeneration, unstableGeneration );
}
// Update left child
- bTreeNode.goTo( cursor, "left", current, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "left", current );
bTreeNode.setKeyCount( cursor, middlePos );
// If pos < middle. Write shifted values to left node. Else, don't write anything.
if ( pos < middlePos )
@@ -531,11 +531,11 @@ VALUE remove( PageCursor cursor, StructurePropagation structurePropagation,
long currentId = cursor.getCurrentPageId();
long childId = bTreeNode.childAt( cursor, pos, stableGeneration, unstableGeneration );
PointerChecking.checkPointer( childId, false );
- bTreeNode.goTo( cursor, "child", childId, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "child", childId );
VALUE result = remove( cursor, structurePropagation, key, into, stableGeneration, unstableGeneration );
- bTreeNode.goTo( cursor, "parent", currentId, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "parent", currentId );
if ( structurePropagation.hasNewGen )
{
structurePropagation.hasNewGen = false;
@@ -650,17 +650,17 @@ private void createUnstableVersionIfNeeded( PageCursor cursor, StructurePropagat
PointerChecking.checkPointer( rightSibling, true );
if ( TreeNode.isNode( leftSibling ) )
{
- bTreeNode.goTo( cursor, "left sibling in split", leftSibling, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "left sibling in split", leftSibling );
bTreeNode.setRightSibling( cursor, newGenId, stableGeneration, unstableGeneration );
}
if ( TreeNode.isNode( rightSibling ) )
{
- bTreeNode.goTo( cursor, "right sibling in split", rightSibling, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "right sibling in split", rightSibling );
bTreeNode.setLeftSibling( cursor, newGenId, stableGeneration, unstableGeneration );
}
// Leave cursor at new tree node
- bTreeNode.goTo( cursor, "new gen", newGenId, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "new gen", newGenId );
// Propagate structure change
structurePropagation.hasNewGen = true;
diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/RightmostInChain.java b/community/index/src/main/java/org/neo4j/index/gbptree/RightmostInChain.java
index c9667beea5c3..f9747a6b75d6 100644
--- a/community/index/src/main/java/org/neo4j/index/gbptree/RightmostInChain.java
+++ b/community/index/src/main/java/org/neo4j/index/gbptree/RightmostInChain.java
@@ -39,6 +39,8 @@ class RightmostInChain
private long currentRightmost;
private long expectedNextRightmost;
+ private long expectedNextRightmostGen;
+ private long currentRightmostGen;
RightmostInChain( TreeNode node, long stableGeneration, long unstableGeneration )
{
@@ -53,21 +55,34 @@ long assertNext( PageCursor cursor )
{
long pageId = cursor.getCurrentPageId();
- long leftSibling = pointer( node.leftSibling( cursor, stableGeneration, unstableGeneration ) );
- long rightSibling = pointer( node.rightSibling( cursor, stableGeneration, unstableGeneration ) );
+ long leftSibling = node.leftSibling( cursor, stableGeneration, unstableGeneration );
+ long rightSibling = node.rightSibling( cursor, stableGeneration, unstableGeneration );
+ long leftSiblingGen = node.pointerGen( cursor, leftSibling );
+ long rightSiblingGen = node.pointerGen( cursor, rightSibling );
+ leftSibling = pointer( leftSibling );
+ rightSibling = pointer( rightSibling );
+ long gen = node.gen( cursor );
// Assert we have reached expected node and that we agree about being siblings
assert leftSibling == currentRightmost :
"Sibling pointer does align with tree structure. Expected left sibling to be " +
currentRightmost + " but was " + leftSibling;
+ assert leftSiblingGen <= currentRightmostGen || currentRightmost == NO_NODE_FLAG:
+ "Sibling pointer gen differs from expected. Expected left sigling gen to be " +
+ currentRightmostGen + ", but was " + leftSiblingGen;
assert pageId == expectedNextRightmost ||
(expectedNextRightmost == NO_NODE_FLAG && currentRightmost == NO_NODE_FLAG) :
"Sibling pointer does not align with tree structure. Expected right sibling to be " +
expectedNextRightmost + " but was " + rightSibling;
+ assert gen <= expectedNextRightmostGen || expectedNextRightmost == NO_NODE_FLAG:
+ "Sibling pointer gen differs from expected. Expected right sigling gen to be " +
+ expectedNextRightmostGen + ", but was " + gen;
// Update currentRightmost = pageId;
currentRightmost = pageId;
+ currentRightmostGen = gen;
expectedNextRightmost = rightSibling;
+ expectedNextRightmostGen = rightSiblingGen;
return pageId;
}
@@ -76,5 +91,4 @@ void assertLast()
assert expectedNextRightmost == NO_NODE_FLAG : "Expected rightmost right sibling to be " +
NO_NODE_FLAG + " but was " + expectedNextRightmost;
}
-
}
diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/SeekCursor.java b/community/index/src/main/java/org/neo4j/index/gbptree/SeekCursor.java
index 6c1f36b36d37..88d9f26a4f14 100644
--- a/community/index/src/main/java/org/neo4j/index/gbptree/SeekCursor.java
+++ b/community/index/src/main/java/org/neo4j/index/gbptree/SeekCursor.java
@@ -44,28 +44,117 @@
*/
class SeekCursor implements RawCursor,IOException>, Hit
{
+ /**
+ * Cursor for reading from tree nodes and also will be moved around when following pointers.
+ */
private final PageCursor cursor;
+
+ /**
+ * Key instance to use for reading keys from current node.
+ */
private final KEY mutableKey;
+
+ /**
+ * Value instances to use for reading values from current node.
+ */
private final VALUE mutableValue;
+
+ /**
+ * Provided when constructing the {@link SeekCursor}, marks the low end of the key range to seek.
+ */
private final KEY fromInclusive;
+
+ /**
+ * Provided when constructing the {@link SeekCursor}, marks the high end (exclusive) of the key range to seek.
+ */
private final KEY toExclusive;
+
+ /**
+ * {@link Layout} instance used to perform some functions around keys, like copying and comparing.
+ */
private final Layout layout;
+
+ /**
+ * Logic for reading data from tree nodes.
+ */
private final TreeNode bTreeNode;
+
+ /**
+ * Contains the highest returned key, i.e. from the last call to {@link #next()} returning {@code true}.
+ */
private final KEY prevKey;
+
+ /**
+ * Retrieves latest generation, only used when noticing that reading given a stale generation.
+ */
private final LongSupplier generationSupplier;
+
+ /**
+ * Retrieves latest root id and generation, moving the {@link PageCursor} to the root id and returning
+ * the root generation. This is used when a query is re-traversing from the root, due to e.g. ending up
+ * on a reused tree node and not knowing how to proceed from there.
+ */
+ private final GBPTree.RootCatchup rootCatchup;
+
+ /**
+ * Whether or not some result has been found, i.e. if {@code true} if there have been no call to
+ * {@link #next()} returning {@code true}, otherwise {@code false}. If {@code false} then value in
+ * {@link #prevKey} can be used and trusted.
+ */
private boolean first = true;
+
+ /**
+ * Current stable generation from this seek cursor's POV. Can be refreshed using {@link #generationSupplier}.
+ */
private long stableGeneration;
+
+ /**
+ * Current stable generation from this seek cursor's POV. Can be refreshed using {@link #generationSupplier}.
+ */
private long unstableGeneration;
- // data structures for the current b-tree node
+ // *** Data structures for the current b-tree node ***
+
+ /**
+ * Position in current node, this is used when scanning the values of a leaf, each call to {@link #next()}
+ * incrementing this position and reading the next key/value.
+ */
private int pos;
+
+ /**
+ * Number of keys in the current leaf, this value is cached and only re-read every time there's
+ * a {@link PageCursor#shouldRetry() retry due to concurrent write}.
+ */
private int keyCount;
- private boolean rereadHeader;
+
+ /**
+ * Set if the position of the last returned key need to be found again.
+ */
private boolean rediscoverKeyPosition;
+ /**
+ * {@link TreeNode#gen(PageCursor) generation} of the current leaf node, read every call to {@link #next()}.
+ */
+ private long currentNodeGen;
+
+ /**
+ * Generation of the pointer which was last followed, either a
+ * {@link TreeNode#rightSibling(PageCursor, long, long) sibling} during scan or otherwise following
+ * {@link TreeNode#newGen(PageCursor, long, long) newGen} or
+ * {@link TreeNode#childAt(PageCursor, int, long, long) child}.
+ */
+ private long lastFollowedPointerGen;
+
+ /**
+ * Cached {@link TreeNode#gen(PageCursor) generation} of the current leaf node, read every time a pointer
+ * is followed to a new node. Used to ensure that a node hasn't been reused between two calls to {@link #next()}.
+ */
+ private long expectedCurrentNodeGen;
+
SeekCursor( PageCursor leafCursor, KEY mutableKey, VALUE mutableValue, TreeNode bTreeNode,
KEY fromInclusive, KEY toExclusive, Layout layout,
- long stableGeneration, long unstableGeneration, LongSupplier generationSupplier ) throws IOException
+ long stableGeneration, long unstableGeneration, LongSupplier generationSupplier,
+ GBPTree.RootCatchup rootCatchup, long lastFollowedPointerGen ) throws IOException
{
this.cursor = leafCursor;
this.mutableKey = mutableKey;
@@ -77,6 +166,8 @@ class SeekCursor implements RawCursor,IOException>, Hi
this.unstableGeneration = unstableGeneration;
this.generationSupplier = generationSupplier;
this.bTreeNode = bTreeNode;
+ this.rootCatchup = rootCatchup;
+ this.lastFollowedPointerGen = lastFollowedPointerGen;
this.prevKey = layout.newKey();
traverseDownToFirstLeaf();
@@ -87,13 +178,21 @@ private void traverseDownToFirstLeaf() throws IOException
long newGen;
boolean isInternal;
int keyCount;
- long childId = 0; // initialized to satisfy compiler
+ long childId = -1; // initialized to satisfy compiler
int pos;
+ long newGenGen = -1; // initialized to satisfy compiler
+ long childIdGen = -1; // initialized to satisfy compiler
do
{
do
{
+ currentNodeGen = bTreeNode.gen( cursor );
+
newGen = bTreeNode.newGen( cursor, stableGeneration, unstableGeneration );
+ if ( GenSafePointerPair.isSuccess( newGen ) )
+ {
+ newGenGen = bTreeNode.pointerGen( cursor, newGen );
+ }
isInternal = bTreeNode.isInternal( cursor );
// Find the left-most key within from-range
keyCount = bTreeNode.keyCount( cursor );
@@ -109,18 +208,32 @@ private void traverseDownToFirstLeaf() throws IOException
if ( isInternal )
{
childId = bTreeNode.childAt( cursor, pos, stableGeneration, unstableGeneration );
+ if ( GenSafePointerPair.isSuccess( childId ) )
+ {
+ childIdGen = bTreeNode.pointerGen( cursor, childId );
+ }
}
}
while ( cursor.shouldRetry() );
checkOutOfBounds( cursor );
+ if ( !verifyNodeGenInvariants() )
+ {
+ generationCatchup();
+ lastFollowedPointerGen = rootCatchup.goTo( cursor );
+ isInternal = true;
+ continue;
+ }
+
if ( pointerCheckingWithGenerationCatchup( newGen, true ) )
{
continue;
}
else if ( TreeNode.isNode( newGen ) )
{
- bTreeNode.goTo( cursor, "new gen", newGen, stableGeneration, unstableGeneration );
+ // JUMP
+ bTreeNode.goTo( cursor, "new gen", newGen );
+ lastFollowedPointerGen = newGenGen;
continue;
}
@@ -131,7 +244,9 @@ else if ( TreeNode.isNode( newGen ) )
continue;
}
- bTreeNode.goTo( cursor, "child", childId, stableGeneration, unstableGeneration );
+ // JUMP
+ bTreeNode.goTo( cursor, "child", childId );
+ lastFollowedPointerGen = childIdGen;
}
}
while ( isInternal && keyCount > 0 );
@@ -155,13 +270,19 @@ public boolean next() throws IOException
pos++;
long newGen;
long rightSibling = -1; // initialized to satisfy the compiler
+ long newGenGen = -1; // initialized to satisfy the compiler
+ long rightSiblingGen = -1; // initialized to satisfy the compiler
do
{
+ currentNodeGen = bTreeNode.gen( cursor );
newGen = bTreeNode.newGen( cursor, stableGeneration, unstableGeneration );
- if ( rereadHeader )
+ keyCount = bTreeNode.keyCount( cursor );
+
+ if ( GenSafePointerPair.isSuccess( newGen ) )
{
- rereadCurrentNodeHeader();
+ newGenGen = bTreeNode.pointerGen( cursor, newGen );
}
+
if ( rediscoverKeyPosition )
{
rediscoverKeyPosition();
@@ -173,6 +294,10 @@ public boolean next() throws IOException
{
// Read right sibling
rightSibling = bTreeNode.rightSibling( cursor, stableGeneration, unstableGeneration );
+ if ( GenSafePointerPair.isSuccess( rightSibling ) )
+ {
+ rightSiblingGen = bTreeNode.pointerGen( cursor, rightSibling );
+ }
}
else
{
@@ -181,23 +306,36 @@ public boolean next() throws IOException
bTreeNode.valueAt( cursor, mutableValue, pos );
}
}
- while ( rediscoverKeyPosition = rereadHeader = cursor.shouldRetry() );
+ while ( rediscoverKeyPosition = cursor.shouldRetry() );
checkOutOfBounds( cursor );
+ if ( !verifyNodeGenInvariants() )
+ {
+ generationCatchup();
+ lastFollowedPointerGen = rootCatchup.goTo( cursor );
+ if ( !first )
+ {
+ layout.copyKey( prevKey, fromInclusive );
+ }
+ traverseDownToFirstLeaf();
+ continue;
+ }
+
// Go to newGen if read successfully and
if ( pointerCheckingWithGenerationCatchup( newGen, true ) )
{
// Reading newGen pointer resulted in a bad read, but generation had changed (a checkpoint has
// occurred since we started this cursor) so the generation fields in this cursor are now updated
// with the latest, so let's try that read again.
- rereadHeader = rediscoverKeyPosition = true;
+ rediscoverKeyPosition = true;
continue;
}
else if ( TreeNode.isNode( newGen ) )
{
// We ended up on a node which has a newGen set, let's go to it and read from that one instead.
- bTreeNode.goTo( cursor, "new gen", newGen, stableGeneration, unstableGeneration );
- rereadHeader = rediscoverKeyPosition = true;
+ bTreeNode.goTo( cursor, "new gen", newGen );
+ rediscoverKeyPosition = true;
+ lastFollowedPointerGen = newGenGen;
continue;
}
@@ -210,27 +348,27 @@ else if ( TreeNode.isNode( newGen ) )
// Reading rightSibling pointer resulted in a bad read, but generation had changed
// (a checkpoint has occurred since we started this cursor) so the generation fields in this
// cursor are now updated with the latest, so let's try that read again.
- rereadHeader = rediscoverKeyPosition = true;
+ rediscoverKeyPosition = true;
continue;
}
else if ( TreeNode.isNode( rightSibling ) )
{
// TODO: Check if rightSibling is within expected range before calling next.
// TODO: Possibly by getting highest expected from IdProvider
- bTreeNode.goTo( cursor, "right sibling", rightSibling, stableGeneration, unstableGeneration );
+ bTreeNode.goTo( cursor, "right sibling", rightSibling );
+ lastFollowedPointerGen = rightSiblingGen;
if ( first )
{
// Have not yet found first hit among leaves.
// First hit can be several leaves to the right.
// Continue to use binary search in right leaf
- rereadHeader = rediscoverKeyPosition = true;
+ rediscoverKeyPosition = true;
}
else
{
// It is likely that first key in right sibling is a next hit.
// Continue using scan
pos = -1;
- rereadHeader = true;
}
continue; // in the outer loop, with the position reset to the beginning of the right sibling
}
@@ -240,14 +378,13 @@ else if ( layout.compare( mutableKey, toExclusive ) < 0 )
if ( layout.compare( mutableKey, fromInclusive ) < 0 )
{
// too far to the left possibly because page reuse
- rereadHeader = rediscoverKeyPosition = true;
+ rediscoverKeyPosition = true;
continue;
}
else if ( !first && layout.compare( prevKey, mutableKey ) >= 0 )
{
// We've come across a bad read in the middle of a split
// This is outlined in InternalTreeLogic, skip this value (it's fine)
- rereadHeader = true;
continue;
}
@@ -268,24 +405,37 @@ else if ( !first && layout.compare( prevKey, mutableKey ) >= 0 )
}
}
- private void rereadCurrentNodeHeader()
+ private boolean verifyNodeGenInvariants()
{
- keyCount = bTreeNode.keyCount( cursor );
+ if ( lastFollowedPointerGen != 0 )
+ {
+ if ( currentNodeGen > lastFollowedPointerGen )
+ {
+ // We've just followed a pointer to a new node, we have arrived there and made
+ // the first read on it. It looks like the node we arrived at have a higher generation
+ // than the pointer generation, this means that this node node have been reused between
+ // following the pointer and reading the node after getting there.
+ return false;
+ }
+ lastFollowedPointerGen = 0;
+ expectedCurrentNodeGen = currentNodeGen;
+ }
+ else if ( currentNodeGen != expectedCurrentNodeGen )
+ {
+ // We've read more than once from this node and between reads the node generation has changed.
+ // This means the node has been reused.
+ return false;
+ }
+ return true;
}
private void rediscoverKeyPosition()
{
// Keys could have been moved to the left so we need to make sure we are not missing any keys by
// moving position back until we find previously returned key
- if ( !first )
- {
- int searchResult = KeySearch.search( cursor, bTreeNode, prevKey, mutableKey, keyCount );
- pos = KeySearch.positionOf( searchResult );
- }
- else
- {
- pos = 0;
- }
+ KEY keyToRediscover = first ? fromInclusive : prevKey;
+ int searchResult = KeySearch.search( cursor, bTreeNode, keyToRediscover, mutableKey, keyCount );
+ pos = KeySearch.positionOf( searchResult );
}
private boolean pointerCheckingWithGenerationCatchup( long pointer, boolean allowNoNode )
@@ -295,13 +445,8 @@ private boolean pointerCheckingWithGenerationCatchup( long pointer, boolean allo
// An unexpected sibling read, this could have been caused by a concurrent checkpoint
// where generation has been incremented. Re-read generation and, if changed since this
// seek started then update generation locally
- long newGeneration = generationSupplier.getAsLong();
- long newStableGeneration = Generation.stableGeneration( newGeneration );
- long newUnstableGeneration = Generation.unstableGeneration( newGeneration );
- if ( newStableGeneration != stableGeneration || newUnstableGeneration != unstableGeneration )
+ if ( generationCatchup() )
{
- stableGeneration = newStableGeneration;
- unstableGeneration = newUnstableGeneration;
return true;
}
PointerChecking.checkPointer( pointer, allowNoNode );
@@ -309,6 +454,20 @@ private boolean pointerCheckingWithGenerationCatchup( long pointer, boolean allo
return false;
}
+ private boolean generationCatchup()
+ {
+ long newGeneration = generationSupplier.getAsLong();
+ long newStableGeneration = Generation.stableGeneration( newGeneration );
+ long newUnstableGeneration = Generation.unstableGeneration( newGeneration );
+ if ( newStableGeneration != stableGeneration || newUnstableGeneration != unstableGeneration )
+ {
+ stableGeneration = newStableGeneration;
+ unstableGeneration = newUnstableGeneration;
+ return true;
+ }
+ return false;
+ }
+
@Override
public KEY key()
{
diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/TreeNode.java b/community/index/src/main/java/org/neo4j/index/gbptree/TreeNode.java
index d36b14f9ebb4..3a1b003b91b7 100644
--- a/community/index/src/main/java/org/neo4j/index/gbptree/TreeNode.java
+++ b/community/index/src/main/java/org/neo4j/index/gbptree/TreeNode.java
@@ -402,21 +402,10 @@ int readChildrenWithInsertRecordInPosition( PageCursor cursor, Consumer stableGeneration && gen < unstableGeneration ) || gen > unstableGeneration )
- {
- throw new TreeInconsistencyException( "Reached a node with generation=" + gen +
- ", stableGeneration=" + stableGeneration + ", unstableGeneration=" + unstableGeneration );
- }
}
/**
diff --git a/community/index/src/main/java/org/neo4j/index/gbptree/TreePrinter.java b/community/index/src/main/java/org/neo4j/index/gbptree/TreePrinter.java
index 2b60e7a049ee..28e9bb215a31 100644
--- a/community/index/src/main/java/org/neo4j/index/gbptree/TreePrinter.java
+++ b/community/index/src/main/java/org/neo4j/index/gbptree/TreePrinter.java
@@ -24,6 +24,8 @@
import org.neo4j.io.pagecache.PageCursor;
+import static org.neo4j.index.gbptree.GenSafePointerPair.pointer;
+
/**
* Utility class for printing a {@link GBPTree}, either whole or sub-tree.
*/
@@ -43,8 +45,8 @@ class TreePrinter
* @throws IOException on page cache access error.
*/
static void printTree( PageCursor cursor, TreeNode treeNode,
- Layout layout, long stableGeneration, long unstableGeneration, PrintStream out )
- throws IOException
+ Layout layout, long stableGeneration, long unstableGeneration, PrintStream out,
+ boolean printValues ) throws IOException
{
int level = 0;
long firstId = cursor.getCurrentPageId();
@@ -53,37 +55,38 @@ static void printTree( PageCursor cursor, TreeNode treeNo
{
out.println( "Level " + level++ );
leftmostOnLevel = cursor.getCurrentPageId();
- printKeysOfSiblings( cursor, treeNode, layout, stableGeneration, unstableGeneration, out );
+ printKeysOfSiblings( cursor, treeNode, layout, stableGeneration, unstableGeneration, out, printValues );
out.println();
cursor.next( leftmostOnLevel );
- cursor.next( treeNode.childAt( cursor, 0, stableGeneration, unstableGeneration ) );
+ cursor.next( pointer( treeNode.childAt( cursor, 0, stableGeneration, unstableGeneration ) ) );
}
out.println( "Level " + level );
- printKeysOfSiblings( cursor, treeNode, layout, stableGeneration, unstableGeneration, out );
+ printKeysOfSiblings( cursor, treeNode, layout, stableGeneration, unstableGeneration, out, printValues );
out.println();
cursor.next( firstId );
}
private static void printKeysOfSiblings( PageCursor cursor,
TreeNode bTreeNode, Layout layout, long stableGeneration, long unstableGeneration,
- PrintStream out ) throws IOException
+ PrintStream out, boolean printValues ) throws IOException
{
while ( true )
{
- printKeys( cursor, bTreeNode, layout, stableGeneration, unstableGeneration, out );
+ printKeys( cursor, bTreeNode, layout, stableGeneration, unstableGeneration, out, printValues );
long rightSibling = bTreeNode.rightSibling( cursor, stableGeneration, unstableGeneration );
if ( !TreeNode.isNode( rightSibling ) )
{
break;
}
- cursor.next( rightSibling );
+ cursor.next( pointer( rightSibling ) );
}
}
private static void printKeys( PageCursor cursor, TreeNode bTreeNode,
- Layout layout, long stableGeneration, long unstableGeneration, PrintStream out )
+ Layout layout, long stableGeneration, long unstableGeneration, PrintStream out,
+ boolean printValues )
{
boolean isLeaf = bTreeNode.isLeaf( cursor );
int keyCount = bTreeNode.keyCount( cursor );
@@ -100,13 +103,16 @@ private static void printKeys( PageCursor cursor, TreeNode void printKeys( PageCursor cursor, TreeNode.
+ */
+package org.neo4j.index.gbptree;
+
+import org.apache.commons.lang3.mutable.MutableLong;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.Random;
+import java.util.TreeMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.neo4j.cursor.RawCursor;
+import org.neo4j.index.Hit;
+import org.neo4j.index.Index;
+import org.neo4j.index.IndexWriter;
+import org.neo4j.io.fs.DefaultFileSystemAbstraction;
+import org.neo4j.io.pagecache.IOLimiter;
+import org.neo4j.io.pagecache.PageCache;
+import org.neo4j.io.pagecache.PageSwapperFactory;
+import org.neo4j.io.pagecache.impl.SingleFilePageSwapperFactory;
+import org.neo4j.io.pagecache.impl.muninn.MuninnPageCache;
+import org.neo4j.test.rule.RandomRule;
+
+import static java.lang.Integer.max;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.neo4j.index.IndexWriter.Options.DEFAULTS;
+import static org.neo4j.index.gbptree.GBPTree.NO_MONITOR;
+import static org.neo4j.io.pagecache.tracing.PageCacheTracer.NULL;
+
+public class GBPTreeIT
+{
+ @Rule
+ public final TemporaryFolder folder = new TemporaryFolder( new File( "target" ) );
+ @Rule
+ public final RandomRule random = new RandomRule();
+ private final Layout layout = new SimpleLongLayout();
+ private GBPTree index;
+
+ private GBPTree createIndex( int pageSize )
+ throws IOException
+ {
+ return createIndex( pageSize, NO_MONITOR );
+ }
+ private GBPTree createIndex( int pageSize, GBPTree.Monitor monitor )
+ throws IOException
+ {
+ PageCache pageCache = new MuninnPageCache( swapperFactory(), 10_000, pageSize, NULL );
+ File indexFile = new File( folder.getRoot(), "index" );
+ return index = new GBPTree<>( pageCache, indexFile, layout, 0/*use whatever page cache says*/, monitor );
+ }
+
+ private static PageSwapperFactory swapperFactory()
+ {
+ PageSwapperFactory swapperFactory = new SingleFilePageSwapperFactory();
+ swapperFactory.setFileSystemAbstraction( new DefaultFileSystemAbstraction() );
+ return swapperFactory;
+ }
+
+ @Test
+ public void shouldStayCorrectAfterRandomModifications() throws Exception
+ {
+ // GIVEN
+ Index index = createIndex( 1024 );
+ Comparator keyComparator = layout;
+ Map data = new TreeMap<>( keyComparator );
+ int count = 1000;
+ for ( int i = 0; i < count; i++ )
+ {
+ data.put( randomKey( random.random() ), randomKey( random.random() ) );
+ }
+
+ // WHEN
+ try ( IndexWriter writer = index.writer( DEFAULTS ) )
+ {
+ for ( Map.Entry entry : data.entrySet() )
+ {
+ writer.put( entry.getKey(), entry.getValue() );
+ }
+ }
+
+ for ( int round = 0; round < 10; round++ )
+ {
+ // THEN
+ for ( int i = 0; i < count; i++ )
+ {
+ MutableLong first = randomKey( random.random() );
+ MutableLong second = randomKey( random.random() );
+ MutableLong from, to;
+ if ( first.longValue() < second.longValue() )
+ {
+ from = first;
+ to = second;
+ }
+ else
+ {
+ from = second;
+ to = first;
+ }
+ Map expectedHits = expectedHits( data, from, to, keyComparator );
+ try ( RawCursor,IOException> result = index.seek( from, to ) )
+ {
+ while ( result.next() )
+ {
+ MutableLong key = result.get().key();
+ if ( expectedHits.remove( key ) == null )
+ {
+ fail( "Unexpected hit " + key + " when searching for " + from + " - " + to );
+ }
+
+ assertTrue( keyComparator.compare( key, from ) >= 0 );
+ assertTrue( keyComparator.compare( key, to ) < 0 );
+ }
+ if ( !expectedHits.isEmpty() )
+ {
+ fail( "There were results which were expected to be returned, but weren't:" + expectedHits +
+ " when searching range " + from + " - " + to );
+ }
+ }
+ }
+
+ randomlyModifyIndex( index, data, random.random() );
+ }
+ }
+
+ @Test
+ public void shouldReadCorrectlyWhenConcurrentlyInsertingInOrder() throws Throwable
+ {
+ // GIVEN
+ int minCheckpointInterval = 10;
+ int maxCheckpointInterval = 20;
+ index = createIndex( 256 );
+ int readers = max( 1, Runtime.getRuntime().availableProcessors() - 1 );
+ CountDownLatch readerReadySignal = new CountDownLatch( readers );
+ CountDownLatch startSignal = new CountDownLatch( 1 );
+ AtomicBoolean endSignal = new AtomicBoolean();
+ AtomicInteger highestId = new AtomicInteger( -1 );
+ AtomicReference readerError = new AtomicReference<>();
+ AtomicInteger numberOfReads = new AtomicInteger();
+ Runnable reader = () -> {
+ int numberOfLocalReads = 0;
+ try
+ {
+ readerReadySignal.countDown();
+ startSignal.await( 10, SECONDS );
+
+ while ( !endSignal.get() )
+ {
+ long upToId = highestId.get();
+ if ( upToId < 10 )
+ {
+ continue;
+ }
+
+ // Read one go, we should see up to highId
+ long start = Long.max( 0, upToId - 1000 );
+ long lastSeen = start - 1;
+ long startTime;
+ long startTimeLeaf;
+ long endTime;
+ startTime = System.currentTimeMillis();
+ try ( RawCursor,IOException> cursor =
+ // "to" is exclusive so do +1 on that
+ index.seek( new MutableLong( start ), new MutableLong( upToId + 1 ) ) )
+ {
+ startTimeLeaf = System.currentTimeMillis();
+ while ( cursor.next() )
+ {
+ MutableLong hit = cursor.get().key();
+ long value = cursor.get().value().longValue();
+ if ( hit.longValue() != value )
+ {
+ fail( String.format( "Read mismatching key value pair, key=%d, value=%d%n",
+ hit.longValue(), value ) );
+ }
+
+ if ( hit.longValue() != lastSeen + 1 )
+ {
+ fail( "Expected to see " + (lastSeen + 1) + " as next hit, but was " + hit +
+ " where start was " + start );
+
+ }
+ assertEquals( lastSeen + 1, hit.longValue() );
+ lastSeen = hit.longValue();
+ }
+ endTime = System.currentTimeMillis();
+ }
+ // It's possible that the writer has gone further since we started,
+ // but we should at least have seen upToId
+ if ( lastSeen < upToId )
+ {
+ fail( "Seeked " + start + " - " + upToId + " (inclusive), but only saw " + lastSeen +
+ ". Read took " + (endTime - startTime) + "ms," +
+ " of which " + (endTime - startTimeLeaf) + "ms among leaves. " +
+ "MaxCheckpointInterval=" + maxCheckpointInterval );
+ }
+
+ // Keep a local counter and update the global one now and then, we don't want
+ // out little statistic here to affect concurrency
+ if ( ++numberOfLocalReads == 30 )
+ {
+ numberOfReads.addAndGet( numberOfLocalReads );
+ numberOfLocalReads = 0;
+ }
+ }
+ }
+ catch ( Throwable e )
+ {
+ readerError.set( e );
+ }
+ finally
+ {
+ numberOfReads.addAndGet( numberOfLocalReads );
+ }
+ };
+
+ // WHEN starting the readers
+ Thread[] readerThreads = new Thread[readers];
+ for ( int i = 0; i < readers; i++ )
+ {
+ readerThreads[i] = new Thread( reader );
+ readerThreads[i].start();
+ }
+
+ // and starting the checkpointer
+ Thread checkpointer = checkpointerThread( minCheckpointInterval, maxCheckpointInterval, endSignal );
+ checkpointer.start();
+
+ // and then starting the writer
+ try
+ {
+ assertTrue( readerReadySignal.await( 10, SECONDS ) );
+ startSignal.countDown();
+ Random random1 = ThreadLocalRandom.current();
+ int inserted = 0;
+ while ( (inserted < 100_000 || numberOfReads.get() < 100) && readerError.get() == null )
+ {
+ try ( IndexWriter writer = index.writer( DEFAULTS ) )
+ {
+ int groupCount = random1.nextInt( 1000 ) + 1;
+ for ( int i = 0; i < groupCount; i++, inserted++ )
+ {
+ MutableLong thing = new MutableLong( inserted );
+ writer.put( thing, thing );
+ highestId.set( inserted );
+ }
+ }
+ // Sleep a little in between update groups (transactions, sort of)
+ MILLISECONDS.sleep( random1.nextInt( 10 ) + 3 );
+ }
+ }
+ finally
+ {
+ // THEN no reader should have failed and by this time there have been a certain
+ // number of successful reads. A successful read means that all results were ordered,
+ // no holes and we saw all values that was inserted at the point of making the seek call.
+ endSignal.set( true );
+ for ( Thread readerThread : readerThreads )
+ {
+ readerThread.join( SECONDS.toMillis( 10 ) );
+ }
+ if ( readerError.get() != null )
+ {
+ throw readerError.get();
+ }
+ checkpointer.join();
+ }
+ }
+
+ @Test
+ public void shouldReadCorrectlyWhenConcurrentlyInsertingOutOfOrder() throws Throwable
+ {
+ // Checkpoint config
+ int minCheckpointInterval = 10;
+ int maxCheckpointInterval = 20;
+
+ // Write group config
+ int nbrOfGroups = 10;
+ int wantedRangeWidth = 1_000;
+ int rangeWidth = wantedRangeWidth - wantedRangeWidth % nbrOfGroups;
+ int wantedNbrOfReads = 10_000;
+
+ // Readers config
+ int readers = max( 1, Runtime.getRuntime().availableProcessors() - 1 );
+
+ // Thread communication
+ AtomicInteger currentWriteIteration = new AtomicInteger( 0 );
+ AtomicLong lastWrittenKey = new AtomicLong( -1 );
+ CountDownLatch readerReadySignal = new CountDownLatch( readers );
+ CountDownLatch startSignal = new CountDownLatch( 1 );
+ AtomicBoolean endSignal = new AtomicBoolean();
+ AtomicBoolean failHalt = new AtomicBoolean(); // Readers signal to writer that there is a failure
+ AtomicReference readerError = new AtomicReference<>();
+ AtomicInteger numberOfReads = new AtomicInteger();
+
+ // given
+ index = createIndex( 256 );
+
+ Runnable reader = () -> {
+ int numberOfLocalReads = 0;
+ try
+ {
+ readerReadySignal.countDown();
+ while ( currentWriteIteration.get() < 1 )
+ {
+ startSignal.await( 5, SECONDS );
+ }
+
+ while ( !endSignal.get() && !failHalt.get() )
+ {
+ // Read one go, we should see up to highId
+
+ // Kept for SeekCursor life
+ int iterationExpectedToSee = currentWriteIteration.get() - 1;
+ int rangeModulus = iterationExpectedToSee % nbrOfGroups;
+ long start = minRange( nbrOfGroups, rangeWidth, iterationExpectedToSee );
+ long end = maxRange( nbrOfGroups, rangeWidth, iterationExpectedToSee );
+
+ // Updated during SeekCursor life
+ long lastSeenKey = -1;
+ long nextToSeeBase = start;
+ long nextToSeeDelta = 0;
+ long nextToSee = nextToSeeBase + nextToSeeDelta;
+ long lastWrittenBeforeStart;
+ long lastWrittenWhenFinished;
+ long lastWrittenBeforeTraversingTree;
+
+ lastWrittenBeforeTraversingTree = lastWrittenKey.get();
+ try ( RawCursor,IOException> cursor =
+ index.seek( new MutableLong( start ), new MutableLong( end ) ) )
+ {
+ lastWrittenWhenFinished = lastWrittenKey.get();
+ while ( cursor.next() )
+ {
+ lastWrittenBeforeStart = lastWrittenWhenFinished;
+ lastWrittenWhenFinished = lastWrittenKey.get();
+
+ lastSeenKey = cursor.get().key().longValue();
+ long lastSeenValue = cursor.get().value().longValue();
+ if ( lastSeenKey != lastSeenValue )
+ {
+ failHalt.set( true );
+ fail( String.format( "Read mismatching key value pair, key=%d, value=%d%n",
+ lastSeenKey, lastSeenValue ) );
+ }
+
+ nextToSee = nextToSeeBase + nextToSeeDelta;
+ if ( nextToSee == lastSeenKey )
+ {
+ if ( nextToSeeDelta < rangeModulus )
+ {
+ nextToSeeDelta++;
+ }
+ else
+ {
+ nextToSeeDelta = 0;
+ nextToSeeBase += nbrOfGroups;
+ }
+ }
+ else if ( lastSeenKey > nextToSee )
+ {
+ failHalt.set( true );
+ fail( String.format( "Expected to see %d+%d=%d but went straight to %d, " +
+ "lastWrittenBeforeTraversingTree=%d, " +
+ "lastWrittenBeforeNext=%d, " +
+ "lastWrittenAfterNext=%d%n",
+ nextToSeeBase, nextToSeeDelta, nextToSee, lastSeenKey,
+ lastWrittenBeforeTraversingTree,
+ lastWrittenBeforeStart,
+ lastWrittenWhenFinished ) );
+ }
+ }
+ long difference = Math.abs( end - nextToSee );
+ boolean condition = difference <= nbrOfGroups;
+ if ( !condition )
+ {
+ failHalt.set( true );
+ fail( String.format( "Expected distance between end and nextToSee to be less " +
+ "than %d but was %d. lastSeenKey=%d, nextToSee=%d, end=%d%n",
+ nbrOfGroups, difference, lastSeenKey, nextToSee, end ) );
+ }
+ }
+
+ // Keep a local counter and update the global one now and then, we don't want
+ // out little statistic here to affect concurrency
+ if ( ++numberOfLocalReads == 30 )
+ {
+ numberOfReads.addAndGet( numberOfLocalReads );
+ numberOfLocalReads = 0;
+ }
+ }
+ }
+ catch ( Throwable e )
+ {
+ readerError.set( e );
+ }
+ finally
+ {
+ numberOfReads.addAndGet( numberOfLocalReads );
+ }
+ };
+
+ // WHEN starting the readers
+ Thread[] readerThreads = new Thread[readers];
+ for ( int i = 0; i < readers; i++ )
+ {
+ readerThreads[i] = new Thread( reader );
+ readerThreads[i].start();
+ }
+
+ // and starting the checkpointer
+ Thread checkpointer = checkpointerThread( minCheckpointInterval, maxCheckpointInterval, endSignal );
+ checkpointer.start();
+
+ // and then starting the writer
+ try
+ {
+ assertTrue( readerReadySignal.await( 10, SECONDS ) );
+ startSignal.countDown();
+ int iteration = currentWriteIteration.get();
+ while ( !failHalt.get() && numberOfReads.get() < wantedNbrOfReads && readerError.get() == null )
+ {
+ try ( IndexWriter writer = index.writer( DEFAULTS ) )
+ {
+ for ( long i = minRange( nbrOfGroups, rangeWidth, iteration ) + iteration % nbrOfGroups;
+ i < maxRange( nbrOfGroups, rangeWidth, iteration ); i += nbrOfGroups )
+ {
+ MutableLong thing = new MutableLong( i );
+ writer.put( thing, thing );
+ lastWrittenKey.set( i );
+ if ( failHalt.get() )
+ {
+ break;
+ }
+ }
+ }
+ iteration = currentWriteIteration.incrementAndGet();
+ // Sleep a little in between update groups (transactions, sort of)
+ MILLISECONDS.sleep( random.nextInt( 10 ) + 3 );
+ }
+ }
+ finally
+ {
+ // THEN no reader should have failed and by this time there have been a certain
+ // number of successful reads. A successful read means that all results were ordered,
+ // no holes and we saw all values that was inserted at the point of making the seek call.
+ endSignal.set( true );
+ for ( Thread readerThread : readerThreads )
+ {
+ readerThread.join( SECONDS.toMillis( 10 ) );
+ }
+ checkpointer.join();
+ if ( failHalt.get() )
+ {
+ index.printTree( false);
+ }
+ if ( readerError.get() != null )
+ {
+ throw readerError.get();
+ }
+ }
+ }
+
+ private Thread checkpointerThread( int minCheckpointInterval, int maxCheckpointInterval, AtomicBoolean endSignal )
+ {
+ return new Thread( () ->
+ {
+ while ( !endSignal.get() )
+ {
+ try
+ {
+ index.checkpoint( IOLimiter.unlimited() );
+ // Sleep a little in between update groups (transactions, sort of)
+ MILLISECONDS.sleep( random.nextInt( minCheckpointInterval,maxCheckpointInterval ) );
+ }
+ catch ( Exception e )
+ {
+ throw new RuntimeException( e );
+ }
+ }
+ });
+ }
+
+ private int maxRange( int nbrOfGroups, int rangeWidth, int iteration )
+ {
+ return (iteration / nbrOfGroups + 1) * rangeWidth;
+ }
+
+ private int minRange( int nbrOfGroups, int rangeWidth, int iteration )
+ {
+ return iteration / nbrOfGroups * rangeWidth;
+ }
+
+ private static void randomlyModifyIndex( Index index,
+ Map data, Random random ) throws IOException
+ {
+ int changeCount = random.nextInt( 10 ) + 10;
+ try ( IndexWriter writer = index.writer( DEFAULTS ) )
+ {
+ for ( int i = 0; i < changeCount; i++ )
+ {
+ if ( random.nextBoolean() && data.size() > 0 )
+ { // remove
+ MutableLong key = randomKey( data, random );
+ MutableLong value = data.remove( key );
+ MutableLong removedValue = writer.remove( key );
+ assertEquals( "For " + key, value, removedValue );
+ }
+ else
+ { // put
+ MutableLong key = randomKey( random );
+ MutableLong value = randomKey( random );
+ writer.put( key, value );
+ data.put( key, value );
+ }
+ }
+ }
+ }
+
+ private static Map expectedHits( Map data,
+ MutableLong from, MutableLong to, Comparator comparator )
+ {
+ Map hits = new TreeMap<>( comparator );
+ for ( Map.Entry candidate : data.entrySet() )
+ {
+ if ( comparator.compare( candidate.getKey(), from ) >= 0 &&
+ comparator.compare( candidate.getKey(), to ) < 0 )
+ {
+ hits.put( candidate.getKey(), candidate.getValue() );
+ }
+ }
+ return hits;
+ }
+
+ private static MutableLong randomKey( Map data, Random random )
+ {
+ MutableLong[] keys = data.keySet().toArray( new MutableLong[data.size()] );
+ return keys[random.nextInt( keys.length )];
+ }
+
+ private static MutableLong randomKey( Random random )
+ {
+ return new MutableLong( random.nextInt( 1_000 ) );
+ }
+}
diff --git a/community/index/src/test/java/org/neo4j/index/gbptree/GBPTreeTest.java b/community/index/src/test/java/org/neo4j/index/gbptree/GBPTreeTest.java
index 045b1de862f5..38bcca680d0e 100644
--- a/community/index/src/test/java/org/neo4j/index/gbptree/GBPTreeTest.java
+++ b/community/index/src/test/java/org/neo4j/index/gbptree/GBPTreeTest.java
@@ -29,16 +29,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Comparator;
import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.TreeMap;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
import org.neo4j.collection.primitive.Primitive;
import org.neo4j.collection.primitive.PrimitiveLongCollections;
@@ -57,9 +48,6 @@
import org.neo4j.test.Barrier;
import org.neo4j.test.rule.RandomRule;
-import static java.lang.Integer.max;
-import static java.util.concurrent.TimeUnit.MILLISECONDS;
-import static java.util.concurrent.TimeUnit.SECONDS;
import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -510,71 +498,6 @@ public void shouldSeeSimpleInsertions() throws Exception
/* Randomized tests */
- @Test
- public void shouldStayCorrectAfterRandomModifications() throws Exception
- {
- // GIVEN
- Index index = createIndex( 1024 );
- Comparator keyComparator = layout;
- Map data = new TreeMap<>( keyComparator );
- int count = 1000;
- for ( int i = 0; i < count; i++ )
- {
- data.put( randomKey( random.random() ), randomKey( random.random() ) );
- }
-
- // WHEN
- try ( IndexWriter writer = index.writer( DEFAULTS ) )
- {
- for ( Map.Entry entry : data.entrySet() )
- {
- writer.put( entry.getKey(), entry.getValue() );
- }
- }
-
- for ( int round = 0; round < 10; round++ )
- {
- // THEN
- for ( int i = 0; i < count; i++ )
- {
- MutableLong first = randomKey( random.random() );
- MutableLong second = randomKey( random.random() );
- MutableLong from, to;
- if ( first.longValue() < second.longValue() )
- {
- from = first;
- to = second;
- }
- else
- {
- from = second;
- to = first;
- }
- Map expectedHits = expectedHits( data, from, to, keyComparator );
- try ( RawCursor,IOException> result = index.seek( from, to ) )
- {
- while ( result.next() )
- {
- MutableLong key = result.get().key();
- if ( expectedHits.remove( key ) == null )
- {
- fail( "Unexpected hit " + key + " when searching for " + from + " - " + to );
- }
-
- assertTrue( keyComparator.compare( key, from ) >= 0 );
- assertTrue( keyComparator.compare( key, to ) < 0 );
- }
- if ( !expectedHits.isEmpty() )
- {
- fail( "There were results which were expected to be returned, but weren't:" + expectedHits +
- " when searching range " + from + " - " + to );
- }
- }
- }
-
- randomlyModifyIndex( index, data, random.random() );
- }
- }
@Test
public void shouldSplitCorrectly() throws Exception
@@ -624,214 +547,6 @@ public void shouldSplitCorrectly() throws Exception
}
}
- @Test
- public void shouldReadCorrectlyWhenConcurrentlyInsertingInOrder() throws Throwable
- {
- // GIVEN
- int maxCheckpointInterval = random.intBetween( 50, 400 );
- index = createIndex( 256 );
- int readers = max( 1, Runtime.getRuntime().availableProcessors() - 1 );
- CountDownLatch readerReadySignal = new CountDownLatch( readers );
- CountDownLatch startSignal = new CountDownLatch( 1 );
- AtomicBoolean endSignal = new AtomicBoolean();
- AtomicInteger highestId = new AtomicInteger( -1 );
- AtomicReference readerError = new AtomicReference<>();
- AtomicInteger numberOfReads = new AtomicInteger();
- Runnable reader = () -> {
- int numberOfLocalReads = 0;
- try
- {
- readerReadySignal.countDown();
- startSignal.await( 10, SECONDS );
-
- while ( !endSignal.get() )
- {
- long upToId = highestId.get();
- if ( upToId < 10 )
- {
- continue;
- }
-
- // Read one go, we should see up to highId
- long start = Long.max( 0, upToId - 1000 );
- long lastSeen = start - 1;
- long startTime;
- long startTimeLeaf;
- long endTime;
- startTime = System.currentTimeMillis();
- try ( RawCursor,IOException> cursor =
- // "to" is exclusive so do +1 on that
- index.seek( new MutableLong( start ), new MutableLong( upToId + 1 ) ) )
- {
- startTimeLeaf = System.currentTimeMillis();
- while ( cursor.next() )
- {
- MutableLong hit = cursor.get().key();
- if ( hit.longValue() != lastSeen + 1 )
- {
- fail( "Expected to see " + (lastSeen + 1) + " as next hit, but was " + hit +
- " where start was " + start );
-
- }
- assertEquals( lastSeen + 1, hit.longValue() );
- lastSeen = hit.longValue();
- }
- endTime = System.currentTimeMillis();
- }
- // It's possible that the writer has gone further since we started,
- // but we should at least have seen upToId
- if ( lastSeen < upToId )
- {
- fail( "Seeked " + start + " - " + upToId + " (inclusive), but only saw " + lastSeen +
- ". Read took " + (endTime - startTime) + "ms," +
- " of which " + (endTime - startTimeLeaf) + "ms among leaves. " +
- "MaxCheckpointInterval=" + maxCheckpointInterval );
- }
-
- // Keep a local counter and update the global one now and then, we don't want
- // out little statistic here to affect concurrency
- if ( ++numberOfLocalReads == 30 )
- {
- numberOfReads.addAndGet( numberOfLocalReads );
- numberOfLocalReads = 0;
- }
- }
- }
- catch ( Throwable e )
- {
- readerError.set( e );
- }
- finally
- {
- numberOfReads.addAndGet( numberOfLocalReads );
- }
- };
-
- // WHEN starting the readers
- Thread[] readerThreads = new Thread[readers];
- for ( int i = 0; i < readers; i++ )
- {
- readerThreads[i] = new Thread( reader );
- readerThreads[i].start();
- }
-
- // and starting the checkpointer
- Thread checkpointer = new Thread( () ->
- {
- while ( !endSignal.get() )
- {
- try
- {
- index.checkpoint( IOLimiter.unlimited() );
- // Sleep a little in between update groups (transactions, sort of)
- MILLISECONDS.sleep( random.nextInt( maxCheckpointInterval ) );
- }
- catch ( Exception e )
- {
- throw new RuntimeException( e );
- }
- }
- });
- checkpointer.start();
-
- // and then starting the writer
- try
- {
- assertTrue( readerReadySignal.await( 10, SECONDS ) );
- startSignal.countDown();
- Random random = ThreadLocalRandom.current();
- int inserted = 0;
- while ( (inserted < 100_000 || numberOfReads.get() < 100) && readerError.get() == null )
- {
- try ( IndexWriter writer = index.writer( DEFAULTS ) )
- {
- int groupCount = random.nextInt( 1000 ) + 1;
- for ( int i = 0; i < groupCount; i++, inserted++ )
- {
- MutableLong thing = new MutableLong( inserted );
- writer.put( thing, thing );
- highestId.set( inserted );
- }
- }
- // Sleep a little in between update groups (transactions, sort of)
- MILLISECONDS.sleep( random.nextInt( 10 ) + 3 );
- }
- }
- finally
- {
- // THEN no reader should have failed and by this time there have been a certain
- // number of successful reads. A successful read means that all results were ordered,
- // no holes and we saw all values that was inserted at the point of making the seek call.
- endSignal.set( true );
- for ( Thread readerThread : readerThreads )
- {
- readerThread.join( SECONDS.toMillis( 10 ) );
- }
- if ( readerError.get() != null )
- {
- throw readerError.get();
- }
- checkpointer.join();
- }
- }
-
- // todo shouldReadCorrectlyWhenConcurrentlyInsertingOutOfOrder
- // todo shouldReadCorrectlyWhenReadSpansSingleCheckpoint
- // todo shouldReadCorrectlyWhenReadSpansMultipleCheckpoints
- // todo readKillerTestNextCheckpointNextCheckpointNext...
-
- private static void randomlyModifyIndex( Index index,
- Map data, Random random ) throws IOException
- {
- int changeCount = random.nextInt( 10 ) + 10;
- try ( IndexWriter writer = index.writer( DEFAULTS ) )
- {
- for ( int i = 0; i < changeCount; i++ )
- {
- if ( random.nextBoolean() && data.size() > 0 )
- { // remove
- MutableLong key = randomKey( data, random );
- MutableLong value = data.remove( key );
- MutableLong removedValue = writer.remove( key );
- assertEquals( "For " + key, value, removedValue );
- }
- else
- { // put
- MutableLong key = randomKey( random );
- MutableLong value = randomKey( random );
- writer.put( key, value );
- data.put( key, value );
- }
- }
- }
- }
-
- private static MutableLong randomKey( Map data, Random random )
- {
- MutableLong[] keys = data.keySet().toArray( new MutableLong[data.size()] );
- return keys[random.nextInt( keys.length )];
- }
-
- private static Map expectedHits( Map data,
- MutableLong from, MutableLong to, Comparator comparator )
- {
- Map hits = new TreeMap<>( comparator );
- for ( Map.Entry candidate : data.entrySet() )
- {
- if ( comparator.compare( candidate.getKey(), from ) >= 0 &&
- comparator.compare( candidate.getKey(), to ) < 0 )
- {
- hits.put( candidate.getKey(), candidate.getValue() );
- }
- }
- return hits;
- }
-
- private static MutableLong randomKey( Random random )
- {
- return new MutableLong( random.nextInt( 1_000 ) );
- }
-
private static class CheckpointControlledMonitor implements Monitor
{
private final Barrier.Control barrier = new Barrier.Control();
diff --git a/community/index/src/test/java/org/neo4j/index/gbptree/InternalTreeLogicTest.java b/community/index/src/test/java/org/neo4j/index/gbptree/InternalTreeLogicTest.java
index 53bcb5038682..4ac0ae703e7a 100644
--- a/community/index/src/test/java/org/neo4j/index/gbptree/InternalTreeLogicTest.java
+++ b/community/index/src/test/java/org/neo4j/index/gbptree/InternalTreeLogicTest.java
@@ -35,7 +35,6 @@
import org.neo4j.index.ValueMergers;
import org.neo4j.io.pagecache.PageCursor;
import org.neo4j.test.rule.RandomRule;
-
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -551,6 +550,7 @@ public void modifierMustProduceConsistentTreeWithRandomInserts() throws Exceptio
// given
node.initializeLeaf( cursor, stableGen, unstableGen );
int numberOfEntries = 100_000;
+ long rootGen = unstableGen;
for ( int i = 0; i < numberOfEntries; i++ )
{
// when
@@ -558,6 +558,11 @@ public void modifierMustProduceConsistentTreeWithRandomInserts() throws Exceptio
if ( structurePropagation.hasSplit )
{
newRootFromSplit( structurePropagation );
+ rootGen = unstableGen;
+ }
+ else if ( structurePropagation.hasNewGen )
+ {
+ rootGen = unstableGen;
}
if ( i == numberOfEntries / 2 )
@@ -569,7 +574,7 @@ public void modifierMustProduceConsistentTreeWithRandomInserts() throws Exceptio
// then
ConsistencyChecker consistencyChecker =
new ConsistencyChecker<>( node, layout, stableGen, unstableGen );
- consistencyChecker.check( cursor );
+ consistencyChecker.check( cursor, rootGen );
}
/* TEST VALUE MERGER */
@@ -751,7 +756,7 @@ public void shouldCreateNewVersionWhenInsertInStableRootAsLeaf() throws Exceptio
assertNotEquals( oldGenId, newGenId );
assertEquals( 1, node.keyCount( cursor ) );
- node.goTo( cursor, "old gen", oldGenId, stableGen, unstableGen );
+ node.goTo( cursor, "old gen", oldGenId );
assertEquals( newGenId, newGen( cursor, stableGen, unstableGen ) );
assertEquals( 0, node.keyCount( cursor ) );
}
@@ -779,7 +784,7 @@ public void shouldCreateNewVersionWhenRemoveInStableRootAsLeaf() throws Exceptio
assertNotEquals( oldGenId, newGenId );
assertEquals( 0, node.keyCount( cursor ) );
- node.goTo( cursor, "old gen", oldGenId, stableGen, unstableGen );
+ node.goTo( cursor, "old gen", oldGenId );
assertEquals( newGenId, newGen( cursor, stableGen, unstableGen ) );
assertEquals( 1, node.keyCount( cursor ) );
}
@@ -1080,7 +1085,7 @@ private void assertSiblings( long left, long middle, long right ) throws IOExcep
// KEEP even if unused
private void printTree() throws IOException
{
- TreePrinter.printTree( cursor, node, layout, stableGen, unstableGen, System.out );
+ TreePrinter.printTree( cursor, node, layout, stableGen, unstableGen, System.out, true );
}
private MutableLong key( long key )
diff --git a/community/index/src/test/java/org/neo4j/index/gbptree/SeekCursorTest.java b/community/index/src/test/java/org/neo4j/index/gbptree/SeekCursorTest.java
index 81db8bb67ed8..43b473de5cc4 100644
--- a/community/index/src/test/java/org/neo4j/index/gbptree/SeekCursorTest.java
+++ b/community/index/src/test/java/org/neo4j/index/gbptree/SeekCursorTest.java
@@ -19,6 +19,7 @@
*/
package org.neo4j.index.gbptree;
+import org.apache.commons.lang3.mutable.MutableBoolean;
import org.apache.commons.lang3.mutable.MutableLong;
import org.junit.Before;
import org.junit.Test;
@@ -53,6 +54,9 @@ public long getAsLong()
return Generation.generation( stableGen, unstableGen );
}
};
+ private static final GBPTree.RootCatchup failingRootCatchup = c -> {
+ throw new AssertionError( "Should not happen" );
+ };
private final SimpleIdProvider id = new SimpleIdProvider();
private final Layout layout = new SimpleLongLayout();
@@ -72,6 +76,7 @@ public long getAsLong()
private final MutableLong from = layout.newKey();
private final MutableLong to = layout.newKey();
+
private static long stableGen = GenSafePointer.MIN_GENERATION;
private static long unstableGen = stableGen + 1;
@@ -605,7 +610,7 @@ public void mustRereadHeadersOnRetry() throws Exception
// WHEN
try ( SeekCursor cursor = new SeekCursor<>( this.cursor, insertKey, insertValue,
- node, from, to, layout, stableGen, unstableGen, () -> 0L ) )
+ node, from, to, layout, stableGen, unstableGen, () -> 0L, failingRootCatchup, unstableGen ) )
{
// reading a couple of keys
assertTrue( cursor.next() );
@@ -990,6 +995,119 @@ public void shouldFailChildPointerIfReadFailureNotCausedByCheckpoint() throws Ex
}
}
+ @Test
+ public void shouldCatchupRootWhenRootNodeHasTooNewGeneration() throws Exception
+ {
+ // given
+ long gen = node.gen( cursor );
+ MutableBoolean triggered = new MutableBoolean( false );
+ GBPTree.RootCatchup rootCatchup = c ->
+ {
+ triggered.setTrue();
+ return gen;
+ };
+
+ // when
+ try ( SeekCursor seek = new SeekCursor<>( cursor, readKey, readValue, node, from, to, layout, stableGen, unstableGen,
+ generationSupplier, rootCatchup, gen - 1 ) )
+ {
+ // do nothing
+ }
+
+ // then
+ assertTrue( triggered.getValue() );
+ }
+
+ @Test
+ public void shouldCatchupRootWhenNodeHasTooNewGenerationWhileTraversingDownTree() throws Exception
+ {
+ // given
+ long gen = node.gen( cursor );
+ MutableBoolean triggered = new MutableBoolean( false );
+ GBPTree.RootCatchup rootCatchup = c ->
+ {
+ triggered.setTrue();
+ return gen;
+ };
+
+ // a newer leaf
+ long leftChild = cursor.getCurrentPageId();
+ node.initializeLeaf( cursor, stableGen + 1, unstableGen + 1 ); // A newer leaf
+ cursor.next();
+
+ // a root
+ node.initializeInternal( cursor, stableGen, unstableGen );
+ long keyInRoot = 10L;
+ insertKey.setValue( keyInRoot );
+ node.insertKeyAt( cursor, insertKey, 0, 0, tmp );
+ node.setKeyCount( cursor, 1 );
+ // with old pointer to child (simulating reuse of internal node)
+ node.setChildAt( cursor, leftChild, 0, stableGen, unstableGen );
+
+ // when
+ from.setValue( 1L );
+ to.setValue( 2L );
+ try ( SeekCursor seek = new SeekCursor<>( cursor, readKey, readValue, node, from, to,
+ layout, stableGen, unstableGen, generationSupplier, rootCatchup, unstableGen ) )
+ {
+ // do nothing
+ }
+
+ // then
+ assertTrue( triggered.getValue() );
+ }
+
+ @Test
+ public void shouldCatchupRootWhenNodeHasTooNewGenerationWhileTraversingLeaves() throws Exception
+ {
+ // given
+ MutableBoolean triggered = new MutableBoolean( false );
+
+ // a newer right leaf
+ long rightChild = cursor.getCurrentPageId();
+ node.initializeLeaf( cursor, stableGen, unstableGen );
+ cursor.next();
+
+ GBPTree.RootCatchup rootCatchup = c ->
+ {
+ // Use right child as new start over root to terminate test
+ cursor.next( rightChild );
+ triggered.setTrue();
+ return node.gen( cursor );
+ };
+
+ // a left leaf
+ long leftChild = cursor.getCurrentPageId();
+ node.initializeLeaf( cursor, stableGen - 1, unstableGen - 1 );
+ // with an old pointer to right sibling
+ node.setRightSibling( cursor, rightChild, stableGen - 1, unstableGen - 1 );
+ cursor.next();
+
+ // a root
+ node.initializeInternal( cursor, stableGen - 1, unstableGen - 1 );
+ long keyInRoot = 10L;
+ insertKey.setValue( keyInRoot );
+ node.insertKeyAt( cursor, insertKey, 0, 0, tmp );
+ node.setKeyCount( cursor, 1 );
+ // with old pointer to child (simulating reuse of internal node)
+ node.setChildAt( cursor, leftChild, 0, stableGen, unstableGen );
+
+ // when
+ from.setValue( 1L );
+ to.setValue( 20L );
+ try ( SeekCursor seek = new SeekCursor<>( cursor, readKey, readValue, node, from, to,
+ layout, stableGen - 1, unstableGen - 1, generationSupplier, rootCatchup, unstableGen ) )
+ {
+ while ( seek.next() )
+ {
+ seek.get();
+ }
+ }
+
+ // then
+ assertTrue( triggered.getValue() );
+ }
+
private void checkpoint()
{
stableGen = unstableGen;
@@ -1046,7 +1164,7 @@ private SeekCursor seekCursor( long fromInclusive, long
from.setValue( fromInclusive );
to.setValue( toExclusive );
return new SeekCursor<>( pageCursor, readKey, readValue, node, from,
- to, layout, stableGen, unstableGen, generationSupplier );
+ to, layout, stableGen, unstableGen, generationSupplier, failingRootCatchup, unstableGen );
}
/**
diff --git a/community/index/src/test/java/org/neo4j/index/gbptree/TreeNodeTest.java b/community/index/src/test/java/org/neo4j/index/gbptree/TreeNodeTest.java
index 7ea094c2e23d..13b527eb1236 100644
--- a/community/index/src/test/java/org/neo4j/index/gbptree/TreeNodeTest.java
+++ b/community/index/src/test/java/org/neo4j/index/gbptree/TreeNodeTest.java
@@ -428,82 +428,6 @@ public void shouldReadAndInsertChildren() throws Exception
assertEquals( thirdChild, childAt( cursor, 2, STABLE_GENERATION, UNSTABLE_GENERATION ) );
}
- @Test
- public void shouldThrowWhenGoToCrashGenLeaf() throws Exception
- {
- // GIVEN
- node.initializeLeaf( cursor, STABLE_GENERATION, CRASH_GENERATION );
-
- try
- {
- // WHEN
- node.goTo( cursor, "page id", cursor.getCurrentPageId(), STABLE_GENERATION, UNSTABLE_GENERATION );
- fail( "Expected throw" );
- }
- catch ( TreeInconsistencyException e )
- {
- // THEN
- // Good
- }
- }
-
- @Test
- public void shouldThrowWhenGoToCrashGenInternal() throws Exception
- {
- // GIVEN
- node.initializeInternal( cursor, STABLE_GENERATION, CRASH_GENERATION );
-
- try
- {
- // WHEN
- node.goTo( cursor, "page id", cursor.getCurrentPageId(), STABLE_GENERATION, UNSTABLE_GENERATION );
- fail( "Expected throw" );
- }
- catch ( TreeInconsistencyException e )
- {
- // THEN
- // Good
- }
- }
-
- @Test
- public void shouldThrowWhenGoToHighGenLeaf() throws Exception
- {
- // GIVEN
- node.initializeLeaf( cursor, STABLE_GENERATION, HIGH_GENERATION );
-
- try
- {
- // WHEN
- node.goTo( cursor, "page id", cursor.getCurrentPageId(), STABLE_GENERATION, UNSTABLE_GENERATION );
- fail( "Expected throw" );
- }
- catch ( TreeInconsistencyException e )
- {
- // THEN
- // Good
- }
- }
-
- @Test
- public void shouldThrowWhenGoToHighGenInternal() throws Exception
- {
- // GIVEN
- node.initializeInternal( cursor, STABLE_GENERATION, HIGH_GENERATION );
-
- try
- {
- // WHEN
- node.goTo( cursor, "page id", cursor.getCurrentPageId(), STABLE_GENERATION, UNSTABLE_GENERATION );
- fail( "Expected throw" );
- }
- catch ( TreeInconsistencyException e )
- {
- // THEN
- // Good
- }
- }
-
@Test
public void shouldInsertAndRemoveRandomKeysAndValues() throws Exception
{
diff --git a/community/index/src/test/java/org/neo4j/index/gbptree/TreeStatePairTest.java b/community/index/src/test/java/org/neo4j/index/gbptree/TreeStatePairTest.java
index 428a5ed070b9..df8164a184fc 100644
--- a/community/index/src/test/java/org/neo4j/index/gbptree/TreeStatePairTest.java
+++ b/community/index/src/test/java/org/neo4j/index/gbptree/TreeStatePairTest.java
@@ -126,7 +126,7 @@ void write( PageCursor cursor )
@Override
void write( PageCursor cursor ) throws IOException
{
- TreeState.write( cursor, 1, 2, 3, 4, 5, 6, 7, 8 );
+ TreeState.write( cursor, 1, 2, 3, 4, 5, 6, 7, 8, 9 );
cursor.rewind();
// flip some of the bits as to break the checksum
long someOfTheBits = cursor.getLong( cursor.getOffset() );
@@ -138,7 +138,7 @@ void write( PageCursor cursor ) throws IOException
@Override
void write( PageCursor cursor ) throws IOException
{
- TreeState.write( cursor, 5, 6, 7, 8, 9, 10, 11, 12 );
+ TreeState.write( cursor, 5, 6, 7, 8, 9, 10, 11, 12, 13 );
}
},
OLD_VALID
@@ -146,7 +146,7 @@ void write( PageCursor cursor ) throws IOException
@Override
void write( PageCursor cursor ) throws IOException
{
- TreeState.write( cursor, 2, 3, 4, 5, 6, 7, 8, 9 );
+ TreeState.write( cursor, 2, 3, 4, 5, 6, 7, 8, 9, 10 );
}
};
diff --git a/community/index/src/test/java/org/neo4j/index/gbptree/TreeStateTest.java b/community/index/src/test/java/org/neo4j/index/gbptree/TreeStateTest.java
index 15d225e395e9..ba1e61877ba2 100644
--- a/community/index/src/test/java/org/neo4j/index/gbptree/TreeStateTest.java
+++ b/community/index/src/test/java/org/neo4j/index/gbptree/TreeStateTest.java
@@ -59,7 +59,7 @@ public void shouldReadValidPage() throws Exception
{
// GIVEN valid state
long pageId = cursor.getCurrentPageId();
- TreeState expected = new TreeState( pageId, 1, 2, 3, 4, 5, 6, 7, 8, true );
+ TreeState expected = new TreeState( pageId, 1, 2, 3, 4, 5, 6, 7, 8, 9, true );
write( cursor, expected );
cursor.rewind();
@@ -75,7 +75,7 @@ public void readBrokenStateShouldFail() throws Exception
{
// GIVEN broken state
long pageId = cursor.getCurrentPageId();
- TreeState expected = new TreeState( pageId, 1, 2, 3, 4, 5, 6, 7, 8, true );
+ TreeState expected = new TreeState( pageId, 1, 2, 3, 4, 5, 6, 7, 8, 9, true );
write( cursor, expected );
cursor.rewind();
assertTrue( TreeState.read( cursor ).isValid() );
@@ -99,7 +99,7 @@ public void shouldNotWriteInvalidStableGeneration() throws Exception
try
{
long pageId = cursor.getCurrentPageId();
- write( cursor, new TreeState( pageId, generation, 2, 3, 4, 5, 6, 7, 8, true ) );
+ write( cursor, new TreeState( pageId, generation, 2, 3, 4, 5, 6, 7, 8, 9, true ) );
fail( "Should have failed" );
}
catch ( IllegalArgumentException e )
@@ -118,7 +118,7 @@ public void shouldNotWriteInvalidUnstableGeneration() throws Exception
try
{
long pageId = cursor.getCurrentPageId();
- write( cursor, new TreeState( pageId, 1, generation, 3, 4, 5, 6, 7, 8, true ) );
+ write( cursor, new TreeState( pageId, 1, generation, 3, 4, 5, 6, 7, 8, 9, true ) );
fail( "Should have failed" );
}
catch ( IllegalArgumentException e )
@@ -141,6 +141,7 @@ private void write( PageCursor cursor, TreeState origin )
origin.stableGeneration(),
origin.unstableGeneration(),
origin.rootId(),
+ origin.rootGen(),
origin.lastId(),
origin.freeListWritePageId(),
origin.freeListReadPageId(),