Skip to content

Commit

Permalink
SeekCursor concurrency fix
Browse files Browse the repository at this point in the history
- Guard for nodes being reused while pinned by SeekCursor
A SeekCursor can read unexpected data caused by node page being
reused in two cases:
1. The page currently pinned is reused -> node will have a changed generation.
2. The page is reused after reading a pointer to it but before "landing" on it
-> node will have a generation larger than that of the followed pointer.

We now guard for this by verifying that generation
of node is not larger than the generation of the pointer that
was followed to reach the node and by verifying that the generation
of the node does not change during the SeekCursor's visit.
IF such a generaton abnormailty is observerd, the seek is restarted
from the root with an updated range starting from the previously returned
key.
Note that it IS valid for node generation to be lower than the followed
pointer generation because the generation of the pointer is updated
when we copy if from an older node, e.g. rightSibling of right node in a split.

ConsistenctChecker is also updated with this generation verification.

- verifyGen is no longer done in TreeNode.goTo for two reasons:
1. Reading generation field here is not valid because it is not always
guarded by a shouldRetry or a write lock.
2. The assumption that pointer generation always match generation
of the node that it points to is false. The correct assumption
is that the node generation can not be larger than the pointer
generation.

- verifyType is no longer done in TreeNode.goTo. See reason nr1 above.

- SeekCursor now reread keyCount on every call to next
Previously weird behaviour was observed where the keyCount was
not updated propery. The reason why is unclear but re-reading
it every time also reduse complexity. Win.

- Fixes issue with rediscovering key in SeekCursor
where there would be a need to rediscover key due to read key being too far
to the left and no result had been returned yet.
Previously this would just set pos to 0. This would be a problem being
too far to the left because this would result in an endless loop where
the cursor would never leave the node and never leave the next() either.

Other changes:
- Add very aggressive read concurrency test
- Extract long running tests from GBPTreeTest to GBPTreeIT
- Add possibility to not print values in TreePrinter
- Fix TreePrinter to extract pointer value using GSPP
  • Loading branch information
tinwelint committed Dec 16, 2016
1 parent a2b2864 commit 64ee456
Show file tree
Hide file tree
Showing 16 changed files with 1,130 additions and 486 deletions.
Expand Up @@ -138,6 +138,11 @@ public long nextLong( long n )
return Math.abs( nextLong() ) % n;
}

public long nextLong( long origin, long bound )
{
return nextLong( (bound - origin) + 1L ) + origin;
}

// ============================
// Methods from Randoms
// ============================
Expand Down
Expand Up @@ -28,6 +28,8 @@

import static java.lang.String.format;

import static org.neo4j.index.gbptree.GenSafePointerPair.pointer;

/**
* <ul>
* Checks:
Expand Down Expand Up @@ -57,11 +59,11 @@ class ConsistencyChecker<KEY>
this.unstableGeneration = unstableGeneration;
}

public boolean check( PageCursor cursor ) throws IOException
public boolean check( PageCursor cursor, long expectedGen ) throws IOException
{
assertOnTreeNode( cursor );
KeyRange<KEY> openRange = new KeyRange<>( comparator, null, null, layout, null );
boolean result = checkSubtree( cursor, openRange, 0 );
boolean result = checkSubtree( cursor, openRange, expectedGen, 0 );

// Assert that rightmost node on each level has empty right sibling.
rightmostPerLevel.forEach( RightmostInChain::assertLast );
Expand All @@ -77,7 +79,8 @@ private void assertOnTreeNode( PageCursor cursor )
}
}

private boolean checkSubtree( PageCursor cursor, KeyRange<KEY> range, int level ) throws IOException
private boolean checkSubtree( PageCursor cursor, KeyRange<KEY> range, long expectedGen, int level )
throws IOException
{
// check header pointers
assertNoCrashOrBrokenPointerInGSPP(
Expand All @@ -89,9 +92,13 @@ private boolean checkSubtree( PageCursor cursor, KeyRange<KEY> range, int level

long pageId = assertSiblings( cursor, level );

checkNewGenPointerGen( cursor );

assertPointerGenMatchesGen( cursor, expectedGen );

if ( node.isInternal( cursor ) )
{
assertKeyOrderAndSubtrees( cursor, pageId, range, level );
assertKeyOrderAndSubtrees( cursor, pageId, range, expectedGen, level );
}
else if ( node.isLeaf( cursor ) )
{
Expand All @@ -105,6 +112,34 @@ else if ( node.isLeaf( cursor ) )
return true;
}

private void assertPointerGenMatchesGen( PageCursor cursor, long expectedGen )
{
long nodeGen = node.gen( cursor );
assert nodeGen <= expectedGen : "Expected node:" + cursor.getCurrentPageId() + " gen:" + nodeGen +
" to be ≤ pointer gen:" + expectedGen;
}

private void checkNewGenPointerGen( PageCursor cursor ) throws IOException
{
long newGen = node.newGen( cursor, stableGeneration, unstableGeneration );
if ( TreeNode.isNode( newGen ) )
{
System.err.println( "WARNING: we ended up on an old generation " + cursor.getCurrentPageId() +
" which had newGen:" + pointer( newGen ) );
long newGenGen = node.pointerGen( cursor, newGen );
long origin = cursor.getCurrentPageId();
node.goTo( cursor, "newGen", newGen );
try
{
assertPointerGenMatchesGen( cursor, newGenGen );
}
finally
{
node.goTo( cursor, "back", origin );
}
}
}

// Assumption: We traverse the tree from left to right on every level
private long assertSiblings( PageCursor cursor, int level )
{
Expand All @@ -119,7 +154,8 @@ private long assertSiblings( PageCursor cursor, int level )
return rightmost.assertNext( cursor );
}

private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange<KEY> range, int level )
private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange<KEY> range, long expectedGen,
int level )
throws IOException
{
int keyCount = node.keyCount( cursor );
Expand All @@ -138,7 +174,8 @@ private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange
}

long child = childAt( cursor, pos );
node.goTo( cursor, "child at pos " + pos, child, stableGeneration, unstableGeneration );
long childGen = node.pointerGen( cursor, child );
node.goTo( cursor, "child at pos " + pos, child );
if ( pos == 0 )
{
childRange = range.restrictRight( readKey );
Expand All @@ -147,19 +184,20 @@ private void assertKeyOrderAndSubtrees( PageCursor cursor, long pageId, KeyRange
{
childRange = range.restrictLeft( prev ).restrictRight( readKey );
}
checkSubtree( cursor, childRange, level + 1 );
node.goTo( cursor, "parent", pageId, stableGeneration, unstableGeneration );
checkSubtree( cursor, childRange, childGen, level + 1 );
node.goTo( cursor, "parent", pageId );

layout.copyKey( readKey, prev );
pos++;
}

// Check last child
long child = childAt( cursor, pos );
node.goTo( cursor, "child at pos " + pos, child, stableGeneration, unstableGeneration );
long childGen = node.pointerGen( cursor, child );
node.goTo( cursor, "child at pos " + pos, child );
childRange = range.restrictLeft( prev );
checkSubtree( cursor, childRange, level + 1 );
node.goTo( cursor, "parent", pageId, stableGeneration, unstableGeneration );
checkSubtree( cursor, childRange, childGen, level + 1 );
node.goTo( cursor, "parent", pageId );
}

private long childAt( PageCursor cursor, int pos )
Expand Down
86 changes: 72 additions & 14 deletions community/index/src/main/java/org/neo4j/index/gbptree/GBPTree.java
Expand Up @@ -42,7 +42,6 @@
import org.neo4j.io.pagecache.PagedFile;

import static java.lang.String.format;

import static org.neo4j.index.gbptree.Generation.generation;
import static org.neo4j.index.gbptree.Generation.stableGeneration;
import static org.neo4j.index.gbptree.Generation.unstableGeneration;
Expand Down Expand Up @@ -134,6 +133,11 @@ default void checkpointCompleted()
}
}

interface RootCatchup
{
long goTo( PageCursor cursor ) throws IOException;
}

/**
* No-op {@link Monitor}.
*/
Expand Down Expand Up @@ -213,10 +217,23 @@ default void checkpointCompleted()
*/
private volatile long generation;

/**
* Guard for being able to read rootId and rootGen atomically. Read by looping.
*
* Odd -> being updated
* Even -> stable
*/
private volatile int rootUpdateGuard;

/**
* Current page id which contains the root of the tree.
*/
private volatile long rootId = IdSpace.MIN_TREE_NODE_ID;
private long rootId = IdSpace.MIN_TREE_NODE_ID;

/**
* Generation of current {@link #rootId}.
*/
private long rootGen;

/**
* Supplier of generation to readers. This supplier will actually very rarely be used, because normally
Expand All @@ -226,6 +243,8 @@ default void checkpointCompleted()
*/
private final LongSupplier generationSupplier = () -> generation;

private final RootCatchup rootCatchup = this::goToRootAndGetGeneration;

/**
* Called on certain events.
*/
Expand Down Expand Up @@ -255,6 +274,7 @@ public GBPTree( PageCache pageCache, File indexFile, Layout<KEY,VALUE> layout, i
this.indexFile = indexFile;
this.monitor = monitor;
this.generation = Generation.generation( GenSafePointer.MIN_GENERATION, GenSafePointer.MIN_GENERATION + 1 );
this.rootGen = Generation.unstableGeneration( generation );
this.layout = layout;
this.pagedFile = openOrCreate( pageCache, indexFile, tentativePageSize, layout );
this.bTreeNode = new TreeNode<>( pageSize, layout );
Expand Down Expand Up @@ -343,7 +363,7 @@ private void loadState( PagedFile pagedFile ) throws IOException
Pair<TreeState,TreeState> states = readStatePages( pagedFile );
TreeState state = TreeStatePair.selectNewestValidState( states );
generation = Generation.generation( state.stableGeneration(), state.unstableGeneration() );
rootId = state.rootId();
setRoot( state.rootId(), state.rootGen() );

long lastId = state.lastId();
long freeListWritePageId = state.freeListWritePageId();
Expand All @@ -361,7 +381,7 @@ private void writeState( PagedFile pagedFile ) throws IOException
try ( PageCursor cursor = pagedFile.io( pageToOverwrite, PagedFile.PF_SHARED_WRITE_LOCK ) )
{
PageCursorUtil.goTo( cursor, "state page", pageToOverwrite );
TreeState.write( cursor, stableGeneration( generation ), unstableGeneration( generation ), rootId,
TreeState.write( cursor, stableGeneration( generation ), unstableGeneration( generation ), rootId, rootGen,
freeList.lastId(), freeList.writePageId(), freeList.readPageId(),
freeList.writePos(), freeList.readPos() );
checkOutOfBounds( cursor );
Expand Down Expand Up @@ -459,17 +479,36 @@ private PagedFile mapWithCorrectPageSize( PageCache pageCache, File indexFile, P
@Override
public RawCursor<Hit<KEY,VALUE>,IOException> seek( KEY fromInclusive, KEY toExclusive ) throws IOException
{
PageCursor cursor = pagedFile.io( rootId, PagedFile.PF_SHARED_READ_LOCK );
KEY key = layout.newKey();
VALUE value = layout.newValue();
long generation = this.generation;
long stableGeneration = stableGeneration( generation );
long unstableGeneration = unstableGeneration( generation );
goToRoot( cursor, stableGeneration, unstableGeneration );

PageCursor cursor = pagedFile.io( 0L /*ignored*/, PagedFile.PF_SHARED_READ_LOCK );
long rootGen = goToRootAndGetGeneration( cursor );

// Returns cursor which is now initiated with left-most leaf node for the specified range
return new SeekCursor<>( cursor, key, value, bTreeNode, fromInclusive, toExclusive, layout,
stableGeneration, unstableGeneration, generationSupplier );
stableGeneration, unstableGeneration, generationSupplier, rootCatchup, rootGen );
}

private long goToRootAndGetGeneration( PageCursor cursor ) throws IOException
{
int rootUpdateGuard;
long rootId;
long rootGen;
// Atomically read rootId and rootGen with this little loop
do
{
rootUpdateGuard = this.rootUpdateGuard;
rootId = this.rootId;
rootGen = this.rootGen;
}
while ( rootUpdateGuard != this.rootUpdateGuard || rootUpdateGuard % 2 == 1 );
long generation = this.generation;
goToRoot( cursor, rootId, stableGeneration( generation ), unstableGeneration( generation ) );
return rootGen;
}

@Override
Expand Down Expand Up @@ -560,7 +599,13 @@ private void releaseWriter()

private void goToRoot( PageCursor cursor, long stableGeneration, long unstableGeneration ) throws IOException
{
bTreeNode.goTo( cursor, "root", rootId, stableGeneration, unstableGeneration );
goToRoot( cursor, rootId, stableGeneration, unstableGeneration );
}

private void goToRoot( PageCursor cursor, long rootId, long stableGeneration, long unstableGeneration )
throws IOException
{
bTreeNode.goTo( cursor, "root", rootId );
}

/**
Expand All @@ -579,14 +624,19 @@ public void prepareForRecovery() throws IOException
pagedFile.flushAndForce();
}

// Utility method
void printTree() throws IOException
{
printTree( true );
}

// Utility method
void printTree( boolean printValues ) throws IOException
{
try ( PageCursor cursor = pagedFile.io( rootId, PagedFile.PF_SHARED_READ_LOCK ) )
{
cursor.next();
TreePrinter.printTree( cursor, bTreeNode, layout,
stableGeneration( generation ), unstableGeneration( generation ), System.out );
stableGeneration( generation ), unstableGeneration( generation ), System.out, printValues );
}
}

Expand All @@ -598,7 +648,7 @@ boolean consistencyCheck() throws IOException
cursor.next();
return new ConsistencyChecker<>( bTreeNode, layout,
stableGeneration( generation ), unstableGeneration( generation ) )
.check( cursor );
.check( cursor, rootGen );
}
}

Expand Down Expand Up @@ -663,11 +713,11 @@ public void merge( KEY key, VALUE value, ValueMerger<VALUE> valueMerger ) throws
bTreeNode.setKeyCount( cursor, 1 );
bTreeNode.setChildAt( cursor, structurePropagation.left, 0, stableGeneration, unstableGeneration );
bTreeNode.setChildAt( cursor, structurePropagation.right, 1, stableGeneration, unstableGeneration );
rootId = newRootId;
setRoot( newRootId, unstableGeneration );
}
else if ( structurePropagation.hasNewGen )
{
rootId = structurePropagation.left;
setRoot( structurePropagation.left, unstableGeneration );
}
structurePropagation.clear();

Expand All @@ -683,7 +733,7 @@ public VALUE remove( KEY key ) throws IOException
stableGeneration, unstableGeneration );
if ( structurePropagation.hasNewGen )
{
rootId = structurePropagation.left;
setRoot( structurePropagation.left, unstableGeneration );
}
structurePropagation.clear();

Expand All @@ -704,4 +754,12 @@ public void close() throws IOException
releaseWriter();
}
}

void setRoot( long newRootId, long generation )
{
rootUpdateGuard++; // is now odd
rootId = newRootId;
rootGen = generation;
rootUpdateGuard++; // is now even
}
}

0 comments on commit 64ee456

Please sign in to comment.