Skip to content

Commit

Permalink
Make the fast page cache tests pass
Browse files Browse the repository at this point in the history
- OptiLock.writeLock has been changed to tryWriteLock and no longer blocks.
  This means OptiLock is now entirely non-blocking and allocation free.
- A bunch of data race bugs has been fixed. Particularly around page faulting
  and pinning. And eviction. And background flushing.
- Pages are now always exclusively locked when they are in the freelist. They
  are locked by eviction and unlocked by page faulting. This solves a race
  where a page pin, an eviction and a page fault happen concurrently and
  interleave in a particular way.
- RandomPageCacheTestHarness now also simulates entity locks, and ensures that
  records are locked when written to.
- Flushing a paged file will now spin on exclusively locked pages in its
  translation table. This way, the flush won't skip pages that are being looked
  at by the eviciton thread, or the background flush thread. This is important
  because the flushes might fail, in which case we don't want to miss dirty
  pages since this can lead to lost data. Especially if we are flushing the
  file because we want to unmap it.

The slow page cache tests are still failing, and we still haven't allowed
multiple open page cursors per thread.
  • Loading branch information
chrisvest committed Jan 20, 2016
1 parent fe4bb8f commit 02a5a30
Show file tree
Hide file tree
Showing 13 changed files with 310 additions and 330 deletions.
Expand Up @@ -36,7 +36,7 @@ public interface PagedFile extends AutoCloseable
* *
* This cannot be combined with PF_EXCLUSIVE_LOCK. * This cannot be combined with PF_EXCLUSIVE_LOCK.
*/ */
int PF_SHARED_LOCK = 1; int PF_SHARED_LOCK = 1; // TODO rename PF_SHARED_READ_LOCK
/** /**
* Pin the pages with an exclusive lock. * Pin the pages with an exclusive lock.
* *
Expand All @@ -45,7 +45,7 @@ public interface PagedFile extends AutoCloseable
* *
* This cannot be combined with PF_SHARED_LOCK. * This cannot be combined with PF_SHARED_LOCK.
*/ */
int PF_EXCLUSIVE_LOCK = 1 << 1; int PF_EXCLUSIVE_LOCK = 1 << 1; // TODO rename to PF_SHARED_WRITE_LOCK
/** /**
* Disallow pinning and navigating to pages outside the range of the * Disallow pinning and navigating to pages outside the range of the
* underlying file. * underlying file.
Expand Down Expand Up @@ -160,6 +160,9 @@ public interface PagedFile extends AutoCloseable
* *
* If this is the last handle to the file, it will be flushed and closed. * If this is the last handle to the file, it will be flushed and closed.
* *
* Note that this operation assumes that there are no write page cursors open on the paged file. If there are, then
* their writes may be lost, as they might miss the last flush that can happen on their data.
*
* @see AutoCloseable#close() * @see AutoCloseable#close()
* @throws IOException instead of the Exception superclass as defined in AutoCloseable, if . * @throws IOException instead of the Exception superclass as defined in AutoCloseable, if .
*/ */
Expand Down
Expand Up @@ -58,7 +58,7 @@ final class MuninnPage extends OptiLock implements Page
public Object nextFree; public Object nextFree;


private PageSwapper swapper; private PageSwapper swapper;
private long filePageId = PageCursor.UNBOUND_PAGE_ID; private volatile long filePageId = PageCursor.UNBOUND_PAGE_ID;


public MuninnPage( int cachePageSize, MemoryManager memoryManager ) public MuninnPage( int cachePageSize, MemoryManager memoryManager )
{ {
Expand Down Expand Up @@ -106,7 +106,7 @@ public void markAsClean()
public void incrementUsage() public void incrementUsage()
{ {
// This is intentionally left benignly racy for performance. // This is intentionally left benignly racy for performance.
byte usage = UnsafeUtil.getByteVolatile( this, usageStampOffset ); byte usage = getUsageCounter();
if ( usage < 4 ) // avoid cache sloshing by not doing a write if counter is already maxed out if ( usage < 4 ) // avoid cache sloshing by not doing a write if counter is already maxed out
{ {
usage <<= 1; usage <<= 1;
Expand All @@ -120,14 +120,19 @@ public void incrementUsage()
public boolean decrementUsage() public boolean decrementUsage()
{ {
// This is intentionally left benignly racy for performance. // This is intentionally left benignly racy for performance.
byte usage = UnsafeUtil.getByteVolatile( this, usageStampOffset ); byte usage = getUsageCounter();
usage >>>= 1; usage >>>= 1;
UnsafeUtil.putByteVolatile( this, usageStampOffset, usage ); UnsafeUtil.putByteVolatile( this, usageStampOffset, usage );
return usage == 0; return usage == 0;
} }


private byte getUsageCounter()
{
return UnsafeUtil.getByteVolatile( this, usageStampOffset );
}

/** /**
* NOTE: This method must be called while holding a pessimistic lock on the page. * NOTE: This method must be called while holding an exclusive lock on the page.
*/ */
public void flush( FlushEventOpportunity flushOpportunity ) throws IOException public void flush( FlushEventOpportunity flushOpportunity ) throws IOException
{ {
Expand Down Expand Up @@ -159,7 +164,7 @@ private void doFlush(
} }


/** /**
* NOTE: This method MUST be called while holding the page write lock. * NOTE: This method MUST be called while holding the exclusive page lock.
*/ */
public void fault( public void fault(
PageSwapper swapper, PageSwapper swapper,
Expand Down Expand Up @@ -242,8 +247,8 @@ public long getFilePageId()
@Override @Override
public String toString() public String toString()
{ {
return format( "MuninnPage@%x[%s -> %x, filePageId = %s%s, swapper = %s]%s", return format( "MuninnPage@%x[%s -> %x, filePageId = %s%s, swapper = %s, usage counter = %s, %s]",
hashCode(), getCachePageId(), pointer, filePageId, (isDirty() ? ", dirty" : ""), hashCode(), getCachePageId(), pointer, filePageId, (isDirty() ? ", dirty" : ""),
swapper, super.toString() ); swapper, getUsageCounter(), super.toString() );
} }
} }
Expand Up @@ -246,6 +246,7 @@ public MuninnPageCache(
while ( pageIndex --> 0 ) while ( pageIndex --> 0 )
{ {
MuninnPage page = new MuninnPage( cachePageSize, memoryManager ); MuninnPage page = new MuninnPage( cachePageSize, memoryManager );
page.tryExclusiveLock(); // All pages in the free-list are exclusively locked, and unlocked by page fault.
pages[pageIndex] = page; pages[pageIndex] = page;


if ( pageList == null ) if ( pageList == null )
Expand Down Expand Up @@ -584,7 +585,7 @@ int getPageCacheId()
return pageCacheId; return pageCacheId;
} }


MuninnPage grabFreePage( PageFaultEvent faultEvent ) throws IOException MuninnPage grabFreeAndExclusivelyLockedPage( PageFaultEvent faultEvent ) throws IOException
{ {
// Review the comment on the freelist field before making changes to // Review the comment on the freelist field before making changes to
// this part of the code. // this part of the code.
Expand Down Expand Up @@ -672,7 +673,10 @@ private MuninnPage cooperativelyEvict( PageFaultEvent faultEvent ) throws IOExce
} }
finally finally
{ {
page.unlockExclusive(); if ( !evicted )
{
page.unlockExclusive();
}
} }
} }
} }
Expand Down Expand Up @@ -774,7 +778,8 @@ else if ( freelistHead.getClass() == FreePage.class )


int evictPages( int pageCountToEvict, int clockArm, EvictionRunEvent evictionRunEvent ) int evictPages( int pageCountToEvict, int clockArm, EvictionRunEvent evictionRunEvent )
{ {
while ( pageCountToEvict > 0 && !closed ) { while ( pageCountToEvict > 0 && !closed )
{
if ( clockArm == pages.length ) if ( clockArm == pages.length )
{ {
clockArm = 0; clockArm = 0;
Expand Down Expand Up @@ -808,10 +813,6 @@ int evictPages( int pageCountToEvict, int clockArm, EvictionRunEvent evictionRun
{ {
pageEvicted = page.isLoaded() && evictPage( page, evictionEvent ); pageEvicted = page.isLoaded() && evictPage( page, evictionEvent );
} }
finally
{
page.unlockExclusive();
}


if ( pageEvicted ) if ( pageEvicted )
{ {
Expand All @@ -826,8 +827,14 @@ int evictPages( int pageCountToEvict, int clockArm, EvictionRunEvent evictionRun
freePage.setNext( (FreePage) current ); freePage.setNext( (FreePage) current );
nextListHead = freePage; nextListHead = freePage;
} }
while ( !compareAndSetFreelistHead( while ( !compareAndSetFreelistHead( current, nextListHead ) );
current, nextListHead ) ); }
else
{
// Pages we put into the free-list remain exclusively locked until a page fault unlocks them
// If we somehow failed to evict the page, then we need to make sure that we release the
// exclusive lock.
page.unlockExclusive();
} }
} }
} }
Expand Down
Expand Up @@ -139,12 +139,12 @@ public final File getCurrentFile()
/** /**
* Pin the desired file page to this cursor, page faulting it into memory if it isn't there already. * Pin the desired file page to this cursor, page faulting it into memory if it isn't there already.
* @param filePageId The file page id we want to pin this cursor to. * @param filePageId The file page id we want to pin this cursor to.
* @param exclusive 'true' if we will be taking an exclusive lock on the page as part of the pin. * @param writeLock 'true' if we will be taking a write lock on the page as part of the pin.
* @throws IOException if anything goes wrong with the pin, most likely during a page fault. * @throws IOException if anything goes wrong with the pin, most likely during a page fault.
*/ */
protected void pin( long filePageId, boolean exclusive ) throws IOException protected void pin( long filePageId, boolean writeLock ) throws IOException
{ {
pinEvent = tracer.beginPin( exclusive, filePageId, swapper ); pinEvent = tracer.beginPin( writeLock, filePageId, swapper );
int chunkId = MuninnPagedFile.computeChunkId( filePageId ); int chunkId = MuninnPagedFile.computeChunkId( filePageId );
// The chunkOffset is the addressing offset into the chunk array object for the relevant array slot. Using // The chunkOffset is the addressing offset into the chunk array object for the relevant array slot. Using
// this, we can access the array slot with Unsafe. // this, we can access the array slot with Unsafe.
Expand Down Expand Up @@ -173,10 +173,16 @@ protected void pin( long filePageId, boolean exclusive ) throws IOException
// been evicted, and possibly even page faulted into something else. In this case, we discard the // been evicted, and possibly even page faulted into something else. In this case, we discard the
// item and try again, as the eviction thread would have set the chunk array slot to null. // item and try again, as the eviction thread would have set the chunk array slot to null.
MuninnPage page = (MuninnPage) item; MuninnPage page = (MuninnPage) item;
lockPage( page ); if ( tryLockPage( page ) ) // TODO try simplifying this
if ( !page.isBoundTo( swapper, filePageId ) ) {
if ( !page.isBoundTo( swapper, filePageId ) )
{
unlockPage( page );
item = null;
}
}
else
{ {
unlockPage( page );
item = null; item = null;
} }
} }
Expand All @@ -201,8 +207,7 @@ private Object[][] expandTranslationTableCapacity( int chunkId )
return pagedFile.expandCapacity( chunkId ); return pagedFile.expandCapacity( chunkId );
} }


private Object initiatePageFault( long filePageId, long chunkOffset, Object[] chunk ) private Object initiatePageFault( long filePageId, long chunkOffset, Object[] chunk ) throws IOException
throws IOException
{ {
BinaryLatch latch = new BinaryLatch(); BinaryLatch latch = new BinaryLatch();
Object item = null; Object item = null;
Expand Down Expand Up @@ -237,27 +242,18 @@ private MuninnPage pageFault(
try try
{ {
// The grabFreePage method might throw. // The grabFreePage method might throw.
page = pagedFile.grabFreePage( faultEvent ); page = pagedFile.grabFreeAndExclusivelyLockedPage( faultEvent );


// We got a free page, and we know that we have race-free access to it. Well, it's not entirely race // We got a free page, and we know that we have race-free access to it. Well, it's not entirely race
// free, because other paged files might have it in their translation tables (or rather, their reads of // free, because other paged files might have it in their translation tables (or rather, their reads of
// their translation tables might race with eviction) and try to pin it. // their translation tables might race with eviction) and try to pin it.
// However, they will all fail because when they try to pin, the page will either be 1) free, 2) bound to // However, they will all fail because when they try to pin, because the page will be exclusively locked
// our file, or 3) the page is write locked. // and possibly bound to our page.
if ( !page.tryExclusiveLock() )
{
throw new AssertionError( "Unable to take exclusive lock on free page" );
}
} }
catch ( Throwable throwable ) catch ( Throwable throwable )
{ {
// Make sure to unstuck the page fault latch. // Make sure to unstuck the page fault latch.
UnsafeUtil.putObjectVolatile( chunk, chunkOffset, null ); abortPageFault( throwable, chunk, chunkOffset, latch, faultEvent );
latch.release();
faultEvent.done( throwable );
pinEvent.done();
// We don't need to worry about the 'stamp' here, because the writeLock call is uninterruptible, so it
// can't really fail.
throw throwable; throw throwable;
} }
try try
Expand All @@ -275,19 +271,30 @@ private MuninnPage pageFault(
// Make sure to unlock the page, so the eviction thread can pick up our trash. // Make sure to unlock the page, so the eviction thread can pick up our trash.
page.unlockExclusive(); page.unlockExclusive();
// Make sure to unstuck the page fault latch. // Make sure to unstuck the page fault latch.
UnsafeUtil.putObjectVolatile( chunk, chunkOffset, null ); abortPageFault( throwable, chunk, chunkOffset, latch, faultEvent );
latch.release();
faultEvent.done( throwable );
pinEvent.done();
throw throwable; throw throwable;
} }
convertPageFaultLock( page ); // Put the page in the translation table before we undo the exclusive lock, as we could otherwise race with
// eviction, and the onEvict callback expects to find a MuninnPage object in the table.
UnsafeUtil.putObjectVolatile( chunk, chunkOffset, page ); UnsafeUtil.putObjectVolatile( chunk, chunkOffset, page );
// Once we page has been published to the translation table, we can convert our exclusive lock to whatever we
// need for the page cursor.
convertPageFaultLock( page );
latch.release(); latch.release();
faultEvent.done(); faultEvent.done();
return page; return page;
} }


private void abortPageFault( Throwable throwable, Object[] chunk, long chunkOffset,
BinaryLatch latch,
PageFaultEvent faultEvent ) throws IOException
{
UnsafeUtil.putObjectVolatile( chunk, chunkOffset, null );
latch.release();
faultEvent.done( throwable );
pinEvent.done();
}

protected long assertPagedFileStillMappedAndGetIdOfLastPage() protected long assertPagedFileStillMappedAndGetIdOfLastPage()
{ {
return pagedFile.getLastPageId(); return pagedFile.getLastPageId();
Expand All @@ -299,7 +306,7 @@ protected long assertPagedFileStillMappedAndGetIdOfLastPage()


protected abstract void pinCursorToPage( MuninnPage page, long filePageId, PageSwapper swapper ); protected abstract void pinCursorToPage( MuninnPage page, long filePageId, PageSwapper swapper );


protected abstract void lockPage( MuninnPage page ); protected abstract boolean tryLockPage( MuninnPage page );


protected abstract void unlockPage( MuninnPage page ); protected abstract void unlockPage( MuninnPage page );


Expand Down
Expand Up @@ -203,26 +203,38 @@ void flushAndForceInternal( FlushEventOpportunity flushOpportunity ) throws IOEx
// TODO The clean pages in question must still be loaded, though. Otherwise we'll end up writing // TODO The clean pages in question must still be loaded, though. Otherwise we'll end up writing
// TODO garbage to the file. // TODO garbage to the file.
int pagesGrabbed = 0; int pagesGrabbed = 0;
for ( Object element : chunk ) chunkLoop:for ( int i = 0; i < chunk.length; i++ )
{ {
filePageId++; filePageId++;
if ( element instanceof MuninnPage )
long offset = computeChunkOffset( filePageId );
// We might race with eviction, but we also mustn't miss a dirty page, so we loop until we succeed
// in getting a lock on all available pages.
for (;;)
{ {
MuninnPage page = (MuninnPage) element; Object element = UnsafeUtil.getObjectVolatile( chunk, offset );
page.writeLock(); if ( element instanceof MuninnPage )
if ( page.isBoundTo( swapper, filePageId ) && page.isDirty() )
{
// The page is still bound to the expected file and file page id after we locked it,
// so we didn't race with eviction and faulting, and the page is dirty.
// So we add it to our IO vector.
pages[pagesGrabbed] = page;
pagesGrabbed++;
continue;
}
else
{ {
page.unlockWrite(); MuninnPage page = (MuninnPage) element;
if ( !page.tryWriteLock() )
{
continue;
}
if ( page.isBoundTo( swapper, filePageId ) && page.isDirty() )
{
// The page is still bound to the expected file and file page id after we locked it,
// so we didn't race with eviction and faulting, and the page is dirty.
// So we add it to our IO vector.
pages[pagesGrabbed] = page;
pagesGrabbed++;
continue chunkLoop;
}
else
{
page.unlockWrite();
}
} }
break;
} }
if ( pagesGrabbed > 0 ) if ( pagesGrabbed > 0 )
{ {
Expand Down Expand Up @@ -252,6 +264,15 @@ private int vectoredFlush( MuninnPage[] pages, int pagesGrabbed, FlushEventOppor
// Write the pages vector // Write the pages vector
MuninnPage firstPage = pages[0]; MuninnPage firstPage = pages[0];
long startFilePageId = firstPage.getFilePageId(); long startFilePageId = firstPage.getFilePageId();

// Mark the flushed pages as clean before our flush, so concurrent page writes can mark it as dirty and
// we'll be able to write those changes out on the next flush.
for ( int j = 0; j < pagesGrabbed; j++ )
{
// If the flush fails, we'll undo this
pages[j].markAsClean();
}

flush = flushOpportunity.beginFlush( startFilePageId, firstPage.getCachePageId(), swapper ); flush = flushOpportunity.beginFlush( startFilePageId, firstPage.getCachePageId(), swapper );
long bytesWritten = swapper.write( startFilePageId, pages, 0, pagesGrabbed ); long bytesWritten = swapper.write( startFilePageId, pages, 0, pagesGrabbed );


Expand All @@ -260,17 +281,16 @@ private int vectoredFlush( MuninnPage[] pages, int pagesGrabbed, FlushEventOppor
flush.addPagesFlushed( pagesGrabbed ); flush.addPagesFlushed( pagesGrabbed );
flush.done(); flush.done();


// Mark the flushed pages as clean
for ( int j = 0; j < pagesGrabbed; j++ )
{
pages[j].markAsClean();
}

// There are now 0 'grabbed' pages // There are now 0 'grabbed' pages
return 0; return 0;
} }
catch ( IOException ioe ) catch ( IOException ioe )
{ {
// Undo marking the pages as clean
for ( int j = 0; j < pagesGrabbed; j++ )
{
pages[j].markAsDirty();
}
if ( flush != null ) if ( flush != null )
{ {
flush.done( ioe ); flush.done( ioe );
Expand Down Expand Up @@ -399,9 +419,9 @@ int getRefCount()
* none are immediately available. * none are immediately available.
* @param faultEvent The trace event for the current page fault. * @param faultEvent The trace event for the current page fault.
*/ */
MuninnPage grabFreePage( PageFaultEvent faultEvent ) throws IOException MuninnPage grabFreeAndExclusivelyLockedPage( PageFaultEvent faultEvent ) throws IOException
{ {
return pageCache.grabFreePage( faultEvent ); return pageCache.grabFreeAndExclusivelyLockedPage( faultEvent );
} }


/** /**
Expand Down
Expand Up @@ -56,9 +56,10 @@ public boolean next() throws IOException
} }


@Override @Override
protected void lockPage( MuninnPage page ) protected boolean tryLockPage( MuninnPage page )
{ {
lockStamp = page.tryOptimisticReadLock(); lockStamp = page.tryOptimisticReadLock();
return true;
} }


@Override @Override
Expand Down

0 comments on commit 02a5a30

Please sign in to comment.