Skip to content

Commit

Permalink
More consice logging about log rotation, pruning and checkpointing
Browse files Browse the repository at this point in the history
Previously the logging regarding log rotation pruning and checkpoint was very
verbose w/o actually including much information, e.g check pointing was logged like this:

```
2017-10-03 17:56:19.589+0000 INFO [o.n.k.i.t.l.c.CheckPointerImpl] Check Pointing triggered by scheduler for time threshold [30731131]:  Starting check pointing...
2017-10-03 17:56:19.589+0000 INFO [o.n.k.i.t.l.c.CheckPointerImpl] Check Pointing triggered by scheduler for time threshold [30731131]:  Starting store flush...
2017-10-03 17:56:19.906+0000 INFO [o.n.k.i.s.c.CountsTracker] About to rotate counts store at transaction 30731131 to [.../graph.db/neostore.counts.db.a], from [.../graph.db/neostore.counts.db.b].
2017-10-03 17:56:19.936+0000 INFO [o.n.k.i.s.c.CountsTracker] Successfully rotated counts store at transaction 30731131 to [.../graph.db/neostore.counts.db.a], from [.../graph.db/neostore.counts.db.b].
2017-10-03 17:56:20.088+0000 INFO [o.n.k.i.t.l.c.CheckPointerImpl] Check Pointing triggered by scheduler for time threshold [30731131]:  Store flush completed
2017-10-03 17:56:20.089+0000 INFO [o.n.k.i.t.l.c.CheckPointerImpl] Check Pointing triggered by scheduler for time threshold [30731131]:  Starting appending check point entry into the tx log...
2017-10-03 17:56:20.091+0000 INFO [o.n.k.i.t.l.c.CheckPointerImpl] Check Pointing triggered by scheduler for time threshold [30731131]:  Appending check point entry into the tx log completed
2017-10-03 17:56:20.091+0000 INFO [o.n.k.i.t.l.c.CheckPointerImpl] Check Pointing triggered by scheduler for time threshold [30731131]:  Check pointing completed
```

Log pruning like this:

```
2017-10-03 17:56:20.091+0000 INFO [o.n.k.i.t.l.p.LogPruningImpl] Log Rotation [12546]:  Starting log pruning.
2017-10-03 17:56:20.091+0000 INFO [o.n.k.i.t.l.p.LogPruningImpl] Log Rotation [12546]:  Log pruning complete.
```

Log rotation like this:

```
2017-10-03 19:56:16.131+0000 INFO [o.n.k.NeoStoreDataSource] Rotating log version:45
2017-10-03 19:56:16.132+0000 INFO [o.n.k.NeoStoreDataSource] Opened logical log [.../graph.db/neostore.transaction.db.46] version=46, lastTxId=30731176 (clean)
2017-10-03 19:56:16.133+0000 INFO [o.n.k.NeoStoreDataSource] Finished rotating log version:45
```

These events happen quite frequently so a more consice format will make it easier to read and take up less space in the log.
The logging has been changed to look like, for check pointing:

```
2017-10-11 10:01:00.050+0000 INFO [o.n.k.i.t.l.c.CheckPointerImpl] Checkpoint triggered by scheduler for tx count threshold @ txId: 3193169 checkpoint started...
2017-10-11 10:01:00.092+0000 INFO [o.n.k.i.s.c.CountsTracker] Rotated counts store at transaction 3199085 to [.../graph.db/neostore.counts.db.b], from [.../graph.db/neostore.counts.db.a].
2017-10-11 10:01:00.317+0000 INFO [o.n.k.i.t.l.c.CheckPointerImpl] Checkpoint triggered by scheduler for tx count threshold @ txId: 3193169 checkpoint completed in 245ms
```

Log pruning:
```
2017-10-11 10:01:00.359+0000 INFO [o.n.k.i.t.l.p.LogPruningImpl] Pruned log versions 0-3, last checkpoint was made in version 7
```

Log rotation:
```
2017-10-11 10:01:01.271+0000 INFO [o.n.k.NeoStoreDataSource] Rotated to transaction log [.../graph.db/neostore.transaction.db.8] version=8, last transaction in previous log=3585572
```

which is less text and more information than the previous format had.
  • Loading branch information
tinwelint committed Oct 19, 2017
1 parent 597fe98 commit 124eb3e
Show file tree
Hide file tree
Showing 11 changed files with 61 additions and 34 deletions.
Expand Up @@ -106,14 +106,12 @@ public void failedToOpenStoreFile( File path, Exception error )
@Override @Override
public void beforeRotation( File source, File target, Headers headers ) public void beforeRotation( File source, File target, Headers headers )
{ {
log.info( format( "About to rotate counts store at transaction %d to [%s], from [%s].",
headers.get( FileVersion.FILE_VERSION ).txId, target, source ) );
} }


@Override @Override
public void rotationSucceeded( File source, File target, Headers headers ) public void rotationSucceeded( File source, File target, Headers headers )
{ {
log.info( format( "Successfully rotated counts store at transaction %d to [%s], from [%s].", log.info( format( "Rotated counts store at transaction %d to [%s], from [%s].",
headers.get( FileVersion.FILE_VERSION ).txId, target, source ) ); headers.get( FileVersion.FILE_VERSION ).txId, target, source ) );
} }


Expand Down
Expand Up @@ -81,13 +81,11 @@ public void failToRecoverTransactionsAfterPosition( Throwable t, LogPosition rec
@Override @Override
public void startedRotating( long currentVersion ) public void startedRotating( long currentVersion )
{ {
log.info( format( "Rotating log version:%d", currentVersion ) );
} }


@Override @Override
public void finishedRotating( long currentVersion ) public void finishedRotating( long currentVersion )
{ {
log.info( format( "Finished rotating log version:%d", currentVersion ) );
} }


@Override @Override
Expand All @@ -101,10 +99,10 @@ public void transactionRecovered( long txId )
} }


@Override @Override
public void opened( File logFile, long logVersion, long lastTransactionId, boolean clean ) public void created( File logFile, long logVersion, long lastTransactionId )
{ {
log.info( format( "Opened logical log [%s] version=%d, lastTxId=%d (%s)", log.info( format( "Rotated to transaction log [%s] version=%d, last transaction in previous log=%d",
logFile, logVersion, lastTransactionId, clean ? "clean" : "recovered" ) ); logFile, logVersion, lastTransactionId ) );
} }


@Override @Override
Expand Down
Expand Up @@ -44,12 +44,12 @@ public class PhysicalLogFile implements LogFile, Lifecycle
{ {
public interface Monitor public interface Monitor
{ {
void opened( File logFile, long logVersion, long lastTransactionId, boolean clean ); void created( File logFile, long logVersion, long lastTransactionId );


class Adapter implements Monitor class Adapter implements Monitor
{ {
@Override @Override
public void opened( File logFile, long logVersion, long lastTransactionId, boolean clean ) public void created( File logFile, long logVersion, long lastTransactionId )
{ {
} }
} }
Expand Down Expand Up @@ -244,7 +244,7 @@ private PhysicalLogVersionedStoreChannel createLogChannelForVersion( long forVer
writeLogHeader( headerBuffer, forVersion, lastTxId ); writeLogHeader( headerBuffer, forVersion, lastTxId );
logHeaderCache.putHeader( forVersion, lastTxId ); logHeaderCache.putHeader( forVersion, lastTxId );
storeChannel.writeAll( headerBuffer ); storeChannel.writeAll( headerBuffer );
monitor.opened( toOpen, forVersion, lastTxId, true ); monitor.created( toOpen, forVersion, lastTxId );
} }
byte formatVersion = header == null ? CURRENT_LOG_VERSION : header.logFormatVersion; byte formatVersion = header == null ? CURRENT_LOG_VERSION : header.logFormatVersion;
return new PhysicalLogVersionedStoreChannel( storeChannel, forVersion, formatVersion ); return new PhysicalLogVersionedStoreChannel( storeChannel, forVersion, formatVersion );
Expand Down
Expand Up @@ -34,6 +34,10 @@
import org.neo4j.logging.LogProvider; import org.neo4j.logging.LogProvider;
import org.neo4j.storageengine.api.StorageEngine; import org.neo4j.storageengine.api.StorageEngine;


import static java.lang.System.currentTimeMillis;

import static org.neo4j.helpers.Format.duration;

public class CheckPointerImpl extends LifecycleAdapter implements CheckPointer public class CheckPointerImpl extends LifecycleAdapter implements CheckPointer
{ {
private final TransactionAppender appender; private final TransactionAppender appender;
Expand Down Expand Up @@ -145,7 +149,6 @@ private long doCheckPoint( TriggerInfo triggerInfo, LogCheckPointEvent logCheckP
long lastClosedTransactionId = lastClosedTransaction[0]; long lastClosedTransactionId = lastClosedTransaction[0];
LogPosition logPosition = new LogPosition( lastClosedTransaction[1], lastClosedTransaction[2] ); LogPosition logPosition = new LogPosition( lastClosedTransaction[1], lastClosedTransaction[2] );
String prefix = triggerInfo.describe( lastClosedTransactionId ); String prefix = triggerInfo.describe( lastClosedTransactionId );
msgLog.info( prefix + " Starting check pointing..." );
/* /*
* Check kernel health before going into waiting for transactions to be closed, to avoid * Check kernel health before going into waiting for transactions to be closed, to avoid
* getting into a scenario where we would await a condition that would potentially never * getting into a scenario where we would await a condition that would potentially never
Expand All @@ -156,20 +159,18 @@ private long doCheckPoint( TriggerInfo triggerInfo, LogCheckPointEvent logCheckP
* First we flush the store. If we fail now or during the flush, on recovery we'll find the * First we flush the store. If we fail now or during the flush, on recovery we'll find the
* earlier check point and replay from there all the log entries. Everything will be ok. * earlier check point and replay from there all the log entries. Everything will be ok.
*/ */
msgLog.info( prefix + " Starting store flush..." ); msgLog.info( prefix + " checkpoint started..." );
long startTime = currentTimeMillis();
storageEngine.flushAndForce( ioLimiter ); storageEngine.flushAndForce( ioLimiter );
msgLog.info( prefix + " Store flush completed" );
/* /*
* Check kernel health before going to write the next check point. In case of a panic this check point * Check kernel health before going to write the next check point. In case of a panic this check point
* will be aborted, which is the safest alternative so that the next recovery will have a chance to * will be aborted, which is the safest alternative so that the next recovery will have a chance to
* repair the damages. * repair the damages.
*/ */
databaseHealth.assertHealthy( IOException.class ); databaseHealth.assertHealthy( IOException.class );
msgLog.info( prefix + " Starting appending check point entry into the tx log..." );
appender.checkPoint( logPosition, logCheckPointEvent ); appender.checkPoint( logPosition, logCheckPointEvent );
threshold.checkPointHappened( lastClosedTransactionId ); threshold.checkPointHappened( lastClosedTransactionId );
msgLog.info( prefix + " Appending check point entry into the tx log completed" ); msgLog.info( prefix + " checkpoint completed in " + duration( currentTimeMillis() - startTime ) );
msgLog.info( prefix + " Check pointing completed" );
/* /*
* Prune up to the version pointed from the latest check point, * Prune up to the version pointed from the latest check point,
* since it might be an earlier version than the current log version. * since it might be an earlier version than the current log version.
Expand All @@ -183,7 +184,7 @@ private long doCheckPoint( TriggerInfo triggerInfo, LogCheckPointEvent logCheckP
// Why only log failure here? It's because check point can potentially be made from various // Why only log failure here? It's because check point can potentially be made from various
// points of execution e.g. background thread triggering check point if needed and during // points of execution e.g. background thread triggering check point if needed and during
// shutdown where it's better to have more control over failure handling. // shutdown where it's better to have more control over failure handling.
msgLog.error( "Error performing check point", t ); msgLog.error( "Checkpoint failed", t );
throw t; throw t;
} }
} }
Expand Down
Expand Up @@ -40,7 +40,7 @@ public SimpleTriggerInfo( String triggerName )
public String describe( long transactionId ) public String describe( long transactionId )
{ {
String info = description == null ? triggerName : triggerName + " for " + description; String info = description == null ? triggerName : triggerName + " for " + description;
return "Check Pointing triggered by " + info + " [" + transactionId + "]: "; return "Checkpoint triggered by " + info + " @ txId: " + transactionId;
} }


@Override @Override
Expand Down
Expand Up @@ -21,5 +21,12 @@


public interface LogPruneStrategy public interface LogPruneStrategy
{ {
void prune( long upToLogVersion ); interface Monitor
{
void logsPruned( long upToVersion, long fromVersion, long toVersion );

void noLogsPruned( long upToVersion );
}

void prune( long upToVersion, Monitor monitor );
} }
Expand Up @@ -34,7 +34,7 @@ public class LogPruneStrategyFactory
public static final LogPruneStrategy NO_PRUNING = new LogPruneStrategy() public static final LogPruneStrategy NO_PRUNING = new LogPruneStrategy()
{ {
@Override @Override
public void prune( long upToLogVersion ) public void prune( long upToLogVersion, Monitor monitor )
{ {
// do nothing // do nothing
} }
Expand Down
Expand Up @@ -22,6 +22,7 @@
import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;


import org.neo4j.kernel.impl.transaction.log.pruning.LogPruneStrategy.Monitor;
import org.neo4j.logging.Log; import org.neo4j.logging.Log;
import org.neo4j.logging.LogProvider; import org.neo4j.logging.LogProvider;


Expand All @@ -33,11 +34,27 @@ public class LogPruningImpl implements LogPruning
private final Lock pruneLock = new ReentrantLock(); private final Lock pruneLock = new ReentrantLock();
private final LogPruneStrategy pruneStrategy; private final LogPruneStrategy pruneStrategy;
private final Log msgLog; private final Log msgLog;
private final Monitor monitor;


public LogPruningImpl( LogPruneStrategy pruneStrategy, LogProvider logProvider ) public LogPruningImpl( LogPruneStrategy pruneStrategy, LogProvider logProvider )
{ {
this.pruneStrategy = pruneStrategy; this.pruneStrategy = pruneStrategy;
this.msgLog = logProvider.getLog( getClass() ); this.msgLog = logProvider.getLog( getClass() );
this.monitor = new LogPruneStrategy.Monitor()
{
@Override
public void logsPruned( long upToVersion, long fromVersion, long toVersion )
{
msgLog.info( "Pruned log versions " + fromVersion + "-" + toVersion +
", last checkpoint was made in version " + upToVersion );
}

@Override
public void noLogsPruned( long upToVersion )
{
msgLog.info( "No log version pruned, last checkpoint was made in version " + upToVersion );
}
};
} }


@Override @Override
Expand All @@ -47,16 +64,13 @@ public void pruneLogs( long upToVersion )
// and it's OK to skip pruning if another one is doing so right now. // and it's OK to skip pruning if another one is doing so right now.
if ( pruneLock.tryLock() ) if ( pruneLock.tryLock() )
{ {
String prefix = "Log Rotation [" + upToVersion + "]: ";
msgLog.info( prefix + " Starting log pruning." );
try try
{ {
pruneStrategy.prune( upToVersion ); pruneStrategy.prune( upToVersion, monitor );
} }
finally finally
{ {
pruneLock.unlock(); pruneLock.unlock();
msgLog.info( prefix + " Log pruning complete." );
} }
} }
} }
Expand Down
Expand Up @@ -45,15 +45,15 @@ public ThresholdBasedPruneStrategy( FileSystemAbstraction fileSystem, LogFileInf
} }


@Override @Override
public void prune( long upToLogVersion ) public void prune( long upToVersion, Monitor monitor )
{ {
if ( upToLogVersion == INITIAL_LOG_VERSION ) if ( upToVersion == INITIAL_LOG_VERSION )
{ {
return; return;
} }


threshold.init(); threshold.init();
long upper = upToLogVersion - 1; long upper = upToVersion - 1;
boolean exceeded = false; boolean exceeded = false;
while ( upper >= 0 ) while ( upper >= 0 )
{ {
Expand All @@ -75,6 +75,7 @@ public void prune( long upToLogVersion )


if ( !exceeded ) if ( !exceeded )
{ {
monitor.noLogsPruned( upToVersion );
return; return;
} }


Expand All @@ -97,7 +98,7 @@ public void prune( long upToLogVersion )
* This if statement does nothing more complicated than checking if the next-to-last log would be prunned * This if statement does nothing more complicated than checking if the next-to-last log would be prunned
* and simply skipping it if so. * and simply skipping it if so.
*/ */
if ( upper == upToLogVersion - 1 ) if ( upper == upToVersion - 1 )
{ {
upper--; upper--;
} }
Expand All @@ -108,5 +109,6 @@ public void prune( long upToLogVersion )
{ {
fileSystem.deleteFile( files.getLogFileForVersion( version ) ); fileSystem.deleteFile( files.getLogFileForVersion( version ) );
} }
monitor.logsPruned( upToVersion, lower, upper );
} }
} }
Expand Up @@ -231,7 +231,7 @@ int numberOfRotations()
} }


@Override @Override
public void opened( File logFile, long logVersion, long lastTransactionId, boolean clean ) public void created( File logFile, long logVersion, long lastTransactionId )
{ {
} }
} }
Expand Down
Expand Up @@ -26,6 +26,7 @@
import org.neo4j.io.fs.FileSystemAbstraction; import org.neo4j.io.fs.FileSystemAbstraction;
import org.neo4j.kernel.impl.transaction.log.LogFileInformation; import org.neo4j.kernel.impl.transaction.log.LogFileInformation;
import org.neo4j.kernel.impl.transaction.log.PhysicalLogFiles; import org.neo4j.kernel.impl.transaction.log.PhysicalLogFiles;
import org.neo4j.kernel.impl.transaction.log.pruning.LogPruneStrategy.Monitor;


import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.eq;
Expand All @@ -47,6 +48,7 @@ public class ThresholdBasedPruneStrategyTest
public void shouldNotDeleteAnythingIfThresholdDoesNotAllow() throws Exception public void shouldNotDeleteAnythingIfThresholdDoesNotAllow() throws Exception
{ {
// Given // Given
File fileName0 = new File( "logical.log.v0" );
File fileName1 = new File( "logical.log.v1" ); File fileName1 = new File( "logical.log.v1" );
File fileName2 = new File( "logical.log.v2" ); File fileName2 = new File( "logical.log.v2" );
File fileName3 = new File( "logical.log.v3" ); File fileName3 = new File( "logical.log.v3" );
Expand All @@ -60,26 +62,30 @@ public void shouldNotDeleteAnythingIfThresholdDoesNotAllow() throws Exception
when( files.getLogFileForVersion( 3 ) ).thenReturn( fileName3 ); when( files.getLogFileForVersion( 3 ) ).thenReturn( fileName3 );
when( files.getLogFileForVersion( 2 ) ).thenReturn( fileName2 ); when( files.getLogFileForVersion( 2 ) ).thenReturn( fileName2 );
when( files.getLogFileForVersion( 1 ) ).thenReturn( fileName1 ); when( files.getLogFileForVersion( 1 ) ).thenReturn( fileName1 );
when( files.getLogFileForVersion( 0 ) ).thenReturn( fileName0 );


when( fileSystem.fileExists( fileName6 ) ).thenReturn( true ); when( fileSystem.fileExists( fileName6 ) ).thenReturn( true );
when( fileSystem.fileExists( fileName5 ) ).thenReturn( true ); when( fileSystem.fileExists( fileName5 ) ).thenReturn( true );
when( fileSystem.fileExists( fileName4 ) ).thenReturn( true ); when( fileSystem.fileExists( fileName4 ) ).thenReturn( true );
when( fileSystem.fileExists( fileName3 ) ).thenReturn( true ); when( fileSystem.fileExists( fileName3 ) ).thenReturn( true );
when( fileSystem.fileExists( fileName2 ) ).thenReturn( true ); when( fileSystem.fileExists( fileName2 ) ).thenReturn( true );
when( fileSystem.fileExists( fileName1 ) ).thenReturn( true ); when( fileSystem.fileExists( fileName1 ) ).thenReturn( true );
when( fileSystem.fileExists( fileName0 ) ).thenReturn( true );


when( fileSystem.getFileSize( any() ) ).thenReturn( LOG_HEADER_SIZE + 1L ); when( fileSystem.getFileSize( any() ) ).thenReturn( LOG_HEADER_SIZE + 1L );


when( threshold.reached( any(), anyLong(), any() ) ).thenReturn( false ); when( threshold.reached( any(), anyLong(), any() ) ).thenReturn( false );


final ThresholdBasedPruneStrategy strategy = new ThresholdBasedPruneStrategy( fileSystem, logFileInfo, files, threshold ); final ThresholdBasedPruneStrategy strategy = new ThresholdBasedPruneStrategy( fileSystem, logFileInfo, files, threshold );
Monitor monitor = mock( Monitor.class );


// When // When
strategy.prune( 7L ); strategy.prune( 7L, monitor );


// Then // Then
verify( threshold, times( 1 ) ).init(); verify( threshold, times( 1 ) ).init();
verify( fileSystem, times( 0 ) ).deleteFile( any() ); verify( fileSystem, times( 0 ) ).deleteFile( any() );
verify( monitor ).noLogsPruned( 7L );
} }


@Test @Test
Expand Down Expand Up @@ -119,16 +125,17 @@ public void shouldDeleteJustWhatTheThresholdSays() throws Exception
when( fileSystem.getFileSize( any() ) ).thenReturn( LOG_HEADER_SIZE + 1L ); when( fileSystem.getFileSize( any() ) ).thenReturn( LOG_HEADER_SIZE + 1L );


final ThresholdBasedPruneStrategy strategy = new ThresholdBasedPruneStrategy( final ThresholdBasedPruneStrategy strategy = new ThresholdBasedPruneStrategy(
fileSystem, logFileInfo, files, threshold fileSystem, logFileInfo, files, threshold );
); Monitor monitor = mock( Monitor.class );


// When // When
strategy.prune( 7L ); strategy.prune( 7L, monitor );


// Then // Then
verify( threshold, times( 1 ) ).init(); verify( threshold, times( 1 ) ).init();
verify( fileSystem, times( 1 ) ).deleteFile( fileName1 ); verify( fileSystem, times( 1 ) ).deleteFile( fileName1 );
verify( fileSystem, times( 1 ) ).deleteFile( fileName2 ); verify( fileSystem, times( 1 ) ).deleteFile( fileName2 );
verify( fileSystem, times( 1 ) ).deleteFile( fileName3 ); verify( fileSystem, times( 1 ) ).deleteFile( fileName3 );
verify( monitor ).logsPruned( 7L, 1, 3 );
} }
} }

0 comments on commit 124eb3e

Please sign in to comment.