Skip to content

Commit

Permalink
Merge pull request #11227 from jakewins/jake/tx-term-nolog
Browse files Browse the repository at this point in the history
Don't log tx terminated stack on client disconnect
  • Loading branch information
lutovich committed Mar 19, 2018
2 parents 3753c06 + 6eb10c8 commit 37d647d
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,28 @@
package org.neo4j.bolt.v1.messaging;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.neo4j.bolt.runtime.BoltConnection;
import org.neo4j.bolt.v1.packstream.PackOutputClosedException;
import org.neo4j.bolt.v1.runtime.BoltResponseHandler;
import org.neo4j.bolt.v1.runtime.Neo4jError;
import org.neo4j.bolt.v1.runtime.spi.BoltResult;
import org.neo4j.kernel.api.exceptions.Status;
import org.neo4j.logging.Log;
import org.neo4j.values.AnyValue;
import org.neo4j.values.virtual.MapValue;
import org.neo4j.values.virtual.VirtualValues;

class MessageProcessingHandler implements BoltResponseHandler
{
// Errors that are expected when the client disconnects mid-operation
private static final Set<Status> CLIENT_MID_OP_DISCONNECT_ERRORS = new HashSet<>( Arrays.asList(
Status.Transaction.Terminated, Status.Transaction.LockClientStopped ) );
protected final Map<String,AnyValue> metadata = new HashMap<>();

protected final Log log;
Expand Down Expand Up @@ -139,8 +146,23 @@ private void publishError( BoltResponseMessageHandler<IOException> out, Neo4jErr
}
catch ( PackOutputClosedException e )
{
// we tried to write error back to the client and realized that the underlying channel is closed
// log a warning, client driver might have just been stopped and closed all socket connections
// Can't write error to the client, because the connection is closed.
// Very likely our error is related to the connection being closed.

// If the error is that the transaction was terminated, then the error is a side-effect of
// us cleaning up stuff that was running when the client disconnected. Log a warning without
// stack trace to highlight clients are disconnecting while stuff is running:
if ( CLIENT_MID_OP_DISCONNECT_ERRORS.contains( error.status() ) )
{
log.warn( "Client %s disconnected while query was running. Session has been cleaned up. " +
"This can be caused by temporary network problems, but if you see this often, " +
"ensure your applications are properly waiting for operations to complete before exiting.",
e.clientAddress() );
return;
}

// If the error isn't that the tx was terminated, log it to the console for debugging. It's likely
// there are other "ok" errors that we can whitelist into the conditional above over time.
log.warn( "Unable to send error back to the client. " + e.getMessage(), error.cause() );
}
catch ( Throwable t )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,16 @@

public class PackOutputClosedException extends IOException
{
public PackOutputClosedException( String message )
private final String clientAddress;

public PackOutputClosedException( String message, String clientAddress )
{
super( message );
this.clientAddress = clientAddress;
}

public String clientAddress()
{
return clientAddress;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ public class ChunkedOutput implements PackOutput, BoltResponseMessageBoundaryHoo
private final int maxChunkSize;
private final AtomicBoolean closed = new AtomicBoolean( false );
private final TransportThrottleGroup throttleGroup;
private final Channel channel;

private ByteBuf buffer;
private Channel channel;
private int currentChunkHeaderOffset;

/** Are currently in the middle of writing a chunk? */
Expand Down Expand Up @@ -186,7 +186,9 @@ private void ensure( int size ) throws IOException
assert size <= maxChunkSize : size + " > " + maxChunkSize;
if ( closed.get() )
{
throw new PackOutputClosedException( "Network channel towards " + channel.remoteAddress() + " is closed. " + "Client has probably been stopped." );
throw new PackOutputClosedException(
String.format( "Network channel towards %s is closed. Client has probably been stopped.",
channel.remoteAddress() ), String.format( "%s", channel.remoteAddress() ) );
}
int toWriteSize = chunkOpen ? size : size + CHUNK_HEADER_SIZE;
synchronized ( this )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.neo4j.bolt.runtime.BoltConnection;
import org.neo4j.bolt.v1.packstream.PackOutputClosedException;
import org.neo4j.bolt.v1.runtime.Neo4jError;
import org.neo4j.graphdb.TransactionTerminatedException;
import org.neo4j.kernel.api.exceptions.Status;
import org.neo4j.logging.AssertableLogProvider;
import org.neo4j.logging.Log;
Expand All @@ -49,14 +50,12 @@ public void shouldCallHaltOnUnexpectedFailures() throws Exception
{
// Given
BoltResponseMessageHandler<IOException> msgHandler = newResponseHandlerMock();
doThrow( new RuntimeException( "Something went horribly wrong" ) )
.when( msgHandler )
.onSuccess( any( MapValue.class ) );
doThrow( new RuntimeException( "Something went horribly wrong" ) ).when( msgHandler ).onSuccess(
any( MapValue.class ) );

BoltConnection connection = mock( BoltConnection.class );
MessageProcessingHandler handler =
new MessageProcessingHandler( msgHandler, mock( Runnable.class ),
connection, mock( Log.class ) );
new MessageProcessingHandler( msgHandler, mock( Runnable.class ), connection, mock( Log.class ) );

// When
handler.onFinish();
Expand All @@ -68,70 +67,84 @@ public void shouldCallHaltOnUnexpectedFailures() throws Exception
@Test
public void shouldLogOriginalErrorWhenOutputIsClosed() throws Exception
{
testLoggingOfOriginalErrorWhenOutputIsClosed( false );
testLoggingOfOriginalErrorWhenOutputIsClosed( Neo4jError.from( new RuntimeException( "Non-fatal error" ) ) );
}

@Test
public void shouldLogOriginalFatalErrorWhenOutputIsClosed() throws Exception
{
testLoggingOfOriginalErrorWhenOutputIsClosed( true );
testLoggingOfOriginalErrorWhenOutputIsClosed( Neo4jError.fatalFrom( new RuntimeException( "Fatal error" ) ) );
}

@Test
public void shouldLogWriteErrorAndOriginalErrorWhenUnknownFailure() throws Exception
{
testLoggingOfWriteErrorAndOriginalErrorWhenUnknownFailure( false );
testLoggingOfWriteErrorAndOriginalErrorWhenUnknownFailure(
Neo4jError.from( new RuntimeException( "Non-fatal error" ) ) );
}

@Test
public void shouldLogWriteErrorAndOriginalFatalErrorWhenUnknownFailure() throws Exception
{
testLoggingOfWriteErrorAndOriginalErrorWhenUnknownFailure( true );
testLoggingOfWriteErrorAndOriginalErrorWhenUnknownFailure(
Neo4jError.fatalFrom( new RuntimeException( "Fatal error" ) ) );
}

private static void testLoggingOfOriginalErrorWhenOutputIsClosed( boolean fatalError ) throws Exception
@Test
public void shouldLogShortWarningOnClientDisconnectMidwayThroughQuery() throws Exception
{
AssertableLogProvider logProvider = new AssertableLogProvider();
Log log = logProvider.getLog( "Test" );

PackOutputClosedException outputClosed = new PackOutputClosedException( "Output closed" );
BoltResponseMessageHandler<IOException> responseHandler = newResponseHandlerMock( fatalError, outputClosed );
// Connections dying is not exceptional per-se, so we don't need to fill the log with
// eye-catching stack traces; but it could be indicative of some issue, so log a brief
// warning in the debug log at least.

MessageProcessingHandler handler = new MessageProcessingHandler( responseHandler, mock( Runnable.class ),
mock( BoltConnection.class ), log );
// Given
PackOutputClosedException outputClosed = new PackOutputClosedException( "Output closed", "<client>" );
Neo4jError txTerminated =
Neo4jError.from( new TransactionTerminatedException( Status.Transaction.Terminated ) );

RuntimeException originalError = new RuntimeException( "Hi, I'm the original error" );
markFailed( handler, fatalError, originalError );
// When
AssertableLogProvider logProvider = emulateFailureWritingError( txTerminated, outputClosed );

logProvider.assertExactly( inLog( "Test" ).warn(
startsWith( "Unable to send error back to the client" ),
equalTo( originalError ) ) );
// Then
logProvider.assertExactly( inLog( "Test" ).warn( equalTo(
"Client %s disconnected while query was running. Session has been cleaned up. " +
"This can be caused by temporary network problems, but if you see this often, ensure your " +
"applications are properly waiting for operations to complete before exiting." ),
equalTo( "<client>" ) ) );
}

private static void testLoggingOfWriteErrorAndOriginalErrorWhenUnknownFailure( boolean fatalError ) throws Exception
private static void testLoggingOfOriginalErrorWhenOutputIsClosed( Neo4jError original ) throws Exception
{
AssertableLogProvider logProvider = new AssertableLogProvider();
Log log = logProvider.getLog( "Test" );
PackOutputClosedException outputClosed = new PackOutputClosedException( "Output closed", "<client>" );
AssertableLogProvider logProvider = emulateFailureWritingError( original, outputClosed );
logProvider.assertExactly( inLog( "Test" ).warn( startsWith( "Unable to send error back to the client" ),
equalTo( original.cause() ) ) );
}

private static void testLoggingOfWriteErrorAndOriginalErrorWhenUnknownFailure( Neo4jError original )
throws Exception
{
RuntimeException outputError = new RuntimeException( "Output failed" );
BoltResponseMessageHandler<IOException> responseHandler = newResponseHandlerMock( fatalError, outputError );

MessageProcessingHandler handler = new MessageProcessingHandler( responseHandler, mock( Runnable.class ),
mock( BoltConnection.class ), log );

RuntimeException originalError = new RuntimeException( "Hi, I'm the original error" );
markFailed( handler, fatalError, originalError );

logProvider.assertExactly( inLog( "Test" ).error(
startsWith( "Unable to send error back to the client" ),
both( equalTo( outputError ) ).and( hasSuppressed( originalError ) ) ) );
AssertableLogProvider logProvider = emulateFailureWritingError( original, outputError );
logProvider.assertExactly( inLog( "Test" ).error( startsWith( "Unable to send error back to the client" ),
both( equalTo( outputError ) ).and( hasSuppressed( original.cause() ) ) ) );
}

private static void markFailed( MessageProcessingHandler handler, boolean fatalError, Throwable error )
private static AssertableLogProvider emulateFailureWritingError( Neo4jError error, Throwable errorDuringWrite )
throws Exception
{
Neo4jError neo4jError = fatalError ? Neo4jError.fatalFrom( error ) : Neo4jError.from( error );
handler.markFailed( neo4jError );
AssertableLogProvider logProvider = new AssertableLogProvider();
BoltResponseMessageHandler<IOException> responseHandler =
newResponseHandlerMock( error.isFatal(), errorDuringWrite );

MessageProcessingHandler handler =
new MessageProcessingHandler( responseHandler, mock( Runnable.class ), mock( BoltConnection.class ),
logProvider.getLog( "Test" ) );

handler.markFailed( error );
handler.onFinish();

return logProvider;
}

private static BoltResponseMessageHandler<IOException> newResponseHandlerMock( boolean fatalError, Throwable error )
Expand Down

0 comments on commit 37d647d

Please sign in to comment.