diff --git a/community/kernel/src/main/java/org/neo4j/graphdb/factory/GraphDatabaseSettings.java b/community/kernel/src/main/java/org/neo4j/graphdb/factory/GraphDatabaseSettings.java index 7df94a4086eed..688b644b81d56 100644 --- a/community/kernel/src/main/java/org/neo4j/graphdb/factory/GraphDatabaseSettings.java +++ b/community/kernel/src/main/java/org/neo4j/graphdb/factory/GraphDatabaseSettings.java @@ -602,6 +602,12 @@ public String providerName() public static final Setting logical_log_rotation_threshold = buildSetting( "dbms.tx_log.rotation.size", BYTES, "250M" ).constraint( min( ByteUnit.mebiBytes( 1 ) ) ).build(); + @Description( "If `true`, Neo4j will abort recovery if any errors are encountered in the logical log. Setting " + + "this to `false` will allow Neo4j to restore as much as possible from the corrupted log files and ignore " + + "the rest, but, the integrity of the database might be compromised." ) + @Internal + public static final Setting fail_on_corrupted_log_files = setting("unsupported.dbms.tx_log.fail_on_corrupted_log_files", BOOLEAN, TRUE ); + @Description( "Use a quick approach for rebuilding the ID generators. This give quicker recovery time, " + "but will limit the ability to reuse the space of deleted entities." ) @Internal diff --git a/community/kernel/src/main/java/org/neo4j/kernel/NeoStoreDataSource.java b/community/kernel/src/main/java/org/neo4j/kernel/NeoStoreDataSource.java index 4ed0bd5d61fa3..2757d56b86819 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/NeoStoreDataSource.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/NeoStoreDataSource.java @@ -160,7 +160,6 @@ import org.neo4j.storageengine.api.StoreFileMetadata; import org.neo4j.storageengine.api.StoreReadLayer; import org.neo4j.time.SystemNanoClock; -import org.neo4j.util.FeatureToggles; import static org.neo4j.helpers.Exceptions.throwIfUnchecked; @@ -224,8 +223,6 @@ boolean applicable( DiagnosticsPhase phase ) } public static final String DEFAULT_DATA_SOURCE_NAME = "nioneodb"; - private final boolean failOnCorruptedLogFiles = FeatureToggles.flag( NeoStoreDataSource.class, - "failOnCorruptedLogFiles", false ); private final Monitors monitors; private final Tracers tracers; @@ -280,6 +277,8 @@ boolean applicable( DiagnosticsPhase phase ) private NeoStoreTransactionLogModule transactionLogModule; private NeoStoreKernelModule kernelModule; + private final boolean failOnCorruptedLogFiles; + public NeoStoreDataSource( File storeDir, Config config, IdGeneratorFactory idGeneratorFactory, LogService logService, JobScheduler scheduler, TokenNameLookup tokenNameLookup, DependencyResolver dependencyResolver, PropertyKeyTokenHolder propertyKeyTokens, @@ -363,6 +362,7 @@ public Iterable all() this.pageCache = pageCache; this.monitors.addMonitorListener( new LoggingLogFileMonitor( msgLog ) ); this.collectionsFactorySupplier = collectionsFactorySupplier; + this.failOnCorruptedLogFiles = config.get( GraphDatabaseSettings.fail_on_corrupted_log_files ); } @Override diff --git a/community/kernel/src/main/java/org/neo4j/kernel/recovery/LogTailScanner.java b/community/kernel/src/main/java/org/neo4j/kernel/recovery/LogTailScanner.java index 59d02a2e12f78..49b993f75be92 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/recovery/LogTailScanner.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/recovery/LogTailScanner.java @@ -20,6 +20,7 @@ package org.neo4j.kernel.recovery; import java.io.IOException; +import java.nio.channels.ClosedByInterruptException; import org.neo4j.kernel.impl.store.UnderlyingStorageException; import org.neo4j.kernel.impl.transaction.log.LogEntryCursor; @@ -37,8 +38,8 @@ import org.neo4j.kernel.impl.transaction.log.files.LogFiles; import org.neo4j.kernel.monitoring.Monitors; -import static org.neo4j.helpers.Exceptions.throwIfUnchecked; import static org.neo4j.kernel.impl.transaction.log.LogVersionRepository.INITIAL_LOG_VERSION; +import static org.neo4j.kernel.recovery.Recovery.throwUnableToCleanRecover; /** * This class collects information about the latest entries in the transaction log. Since the only way we have to collect @@ -132,14 +133,18 @@ else if ( entry instanceof LogEntryStart ) { corruptedTransactionLogs = true; } + } + catch ( Error | ClosedByInterruptException e ) + { + // These should not be parsing errors + throw e; } catch ( Throwable t ) { monitor.corruptedLogFile( version, t ); if ( failOnCorruptedLogFiles ) { - throwIfUnchecked( t ); - throw new RuntimeException( t ); + throwUnableToCleanRecover( t ); } corruptedTransactionLogs = true; } diff --git a/community/kernel/src/main/java/org/neo4j/kernel/recovery/Recovery.java b/community/kernel/src/main/java/org/neo4j/kernel/recovery/Recovery.java index c4cabb21b378c..32f07f27573f1 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/recovery/Recovery.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/recovery/Recovery.java @@ -20,7 +20,9 @@ package org.neo4j.kernel.recovery; import java.io.IOException; +import java.nio.channels.ClosedByInterruptException; +import org.neo4j.graphdb.factory.GraphDatabaseSettings; import org.neo4j.kernel.impl.core.StartupStatisticsProvider; import org.neo4j.kernel.impl.transaction.CommittedTransactionRepresentation; import org.neo4j.kernel.impl.transaction.log.LogPosition; @@ -30,7 +32,6 @@ import org.neo4j.kernel.impl.util.monitoring.ProgressReporter; import org.neo4j.kernel.lifecycle.LifecycleAdapter; -import static org.neo4j.helpers.Exceptions.throwIfUnchecked; import static org.neo4j.storageengine.api.TransactionApplicationMode.RECOVERY; import static org.neo4j.storageengine.api.TransactionApplicationMode.REVERSE_RECOVERY; @@ -116,12 +117,17 @@ public void init() throws IOException recoveryToPosition = transactionsToRecover.position(); } } + catch ( Error | ClosedByInterruptException e ) + { + // We do not want to truncate logs based on these exceptions. Since users can influence them with config changes + // the users are able to workaround this if truncations is really needed. + throw e; + } catch ( Throwable t ) { if ( failOnCorruptedLogFiles ) { - throwIfUnchecked( t ); - throw new RuntimeException( t ); + throwUnableToCleanRecover( t ); } if ( lastTransaction != null ) { @@ -142,6 +148,15 @@ public void init() throws IOException monitor.recoveryCompleted( numberOfRecoveredTransactions ); } + static void throwUnableToCleanRecover( Throwable t ) + { + throw new RuntimeException( + "Error reading transaction logs, recovery not possible. To force the database to start anyway, you can specify '" + + GraphDatabaseSettings.fail_on_corrupted_log_files.name() + "=false'. This will try to recover as much " + + "as possible and then truncate the corrupt part of the transaction log. Doing this means your database " + + "integrity might be compromised, please consider restoring from a consistent backup instead.", t ); + } + private void initProgressReporter( RecoveryStartInformation recoveryStartInformation, CommittedTransactionRepresentation lastReversedTransaction ) { diff --git a/community/kernel/src/test/java/org/neo4j/kernel/RecoveryCorruptedTransactionLogIT.java b/community/kernel/src/test/java/org/neo4j/kernel/RecoveryCorruptedTransactionLogIT.java index 37ddc97a36223..817b79ffa2ed8 100644 --- a/community/kernel/src/test/java/org/neo4j/kernel/RecoveryCorruptedTransactionLogIT.java +++ b/community/kernel/src/test/java/org/neo4j/kernel/RecoveryCorruptedTransactionLogIT.java @@ -20,7 +20,6 @@ package org.neo4j.kernel; import org.hamcrest.Matchers; -import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -40,6 +39,7 @@ import org.neo4j.graphdb.Node; import org.neo4j.graphdb.RelationshipType; import org.neo4j.graphdb.Transaction; +import org.neo4j.graphdb.factory.GraphDatabaseSettings; import org.neo4j.helpers.collection.MultiSet; import org.neo4j.io.fs.OpenMode; import org.neo4j.io.fs.StoreChannel; @@ -79,7 +79,6 @@ import org.neo4j.test.rule.RandomRule; import org.neo4j.test.rule.TestDirectory; import org.neo4j.test.rule.fs.DefaultFileSystemRule; -import org.neo4j.util.FeatureToggles; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.greaterThan; @@ -114,12 +113,6 @@ public void setUp() throws Exception logFiles = buildDefaultLogFiles(); } - @After - public void tearDown() - { - FeatureToggles.set( NeoStoreDataSource.class, "failOnCorruptedLogFiles", false ); - } - @Test public void evenTruncateNewerTransactionLogFile() throws IOException { @@ -136,7 +129,8 @@ public void evenTruncateNewerTransactionLogFile() throws IOException removeLastCheckpointRecordFromLastLogFile(); addRandomBytesToLastLogFile( this::randomBytes ); - database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabase( storeDir ); + database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabaseBuilder( storeDir ) + .setConfig( GraphDatabaseSettings.fail_on_corrupted_log_files, "false" ).newGraphDatabase(); database.shutdown(); assertEquals( numberOfClosedTransactions, recoveryMonitor.getNumberOfRecoveredTransactions() ); @@ -154,7 +148,6 @@ public void doNotTruncateNewerTransactionLogFileWhenFailOnError() throws IOExcep removeLastCheckpointRecordFromLastLogFile(); addRandomBytesToLastLogFile( this::randomPositiveBytes ); - FeatureToggles.set( NeoStoreDataSource.class, "failOnCorruptedLogFiles", true ); expectedException.expectCause( new RootCauseMatcher<>( UnsupportedLogVersionException.class ) ); database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabase( storeDir ); @@ -176,7 +169,8 @@ public void truncateNewerTransactionLogFileWhenForced() throws IOException removeLastCheckpointRecordFromLastLogFile(); addRandomBytesToLastLogFile( this::randomBytes ); - database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabase( storeDir ); + database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabaseBuilder( storeDir ) + .setConfig( GraphDatabaseSettings.fail_on_corrupted_log_files, "false" ).newGraphDatabase(); database.shutdown(); logProvider.assertContainsMessageContaining( "Fail to read transaction log version 0." ); @@ -189,7 +183,8 @@ public void recoverFirstCorruptedTransactionSingleFileNoCheckpoint() throws IOEx { addCorruptedCommandsToLastLogFile(); - GraphDatabaseService recoveredDatabase = databaseFactory.newEmbeddedDatabase( storeDir ); + GraphDatabaseService recoveredDatabase = databaseFactory.newEmbeddedDatabaseBuilder( storeDir ) + .setConfig( GraphDatabaseSettings.fail_on_corrupted_log_files, "false" ).newGraphDatabase(); recoveredDatabase.shutdown(); logProvider.assertContainsMessageContaining( "Fail to read transaction log version 0." ); @@ -210,7 +205,6 @@ public void failToRecoverFirstCorruptedTransactionSingleFileNoCheckpointIfFailOn { addCorruptedCommandsToLastLogFile(); - FeatureToggles.set( NeoStoreDataSource.class, "failOnCorruptedLogFiles", true ); expectedException.expectCause( new RootCauseMatcher<>( NegativeArraySizeException.class ) ); GraphDatabaseService recoveredDatabase = databaseFactory.newEmbeddedDatabase( storeDir ); @@ -239,7 +233,8 @@ public void recoverNotAFirstCorruptedTransactionSingleFileNoCheckpoint() throws assertThat( modifiedFileLength, greaterThan( originalFileLength ) ); - database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabase( storeDir ); + database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabaseBuilder( storeDir ) + .setConfig( GraphDatabaseSettings.fail_on_corrupted_log_files, "false" ).newGraphDatabase(); database.shutdown(); logProvider.assertContainsMessageContaining( "Fail to read transaction log version 0." ); @@ -280,7 +275,8 @@ public void recoverNotAFirstCorruptedTransactionMultipleFilesNoCheckpoints() thr assertThat( modifiedFileLength, greaterThan( originalFileLength ) ); - database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabase( storeDir ); + database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabaseBuilder( storeDir ) + .setConfig( GraphDatabaseSettings.fail_on_corrupted_log_files, "false" ).newGraphDatabase(); database.shutdown(); logProvider.assertContainsMessageContaining( "Fail to read transaction log version 3." ); @@ -318,7 +314,8 @@ public void recoverNotAFirstCorruptedTransactionMultipleFilesMultipleCheckpoints assertThat( modifiedFileLength, greaterThan( originalFileLength ) ); - database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabase( storeDir ); + database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabaseBuilder( storeDir ) + .setConfig( GraphDatabaseSettings.fail_on_corrupted_log_files, "false" ).newGraphDatabase(); database.shutdown(); logProvider.assertContainsMessageContaining( "Fail to read transaction log version 3." ); @@ -349,7 +346,8 @@ public void recoverFirstCorruptedTransactionAfterCheckpointInLastLogFile() throw assertThat( modifiedFileLength, greaterThan( originalFileLength ) ); - database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabase( storeDir ); + database = (GraphDatabaseAPI) databaseFactory.newEmbeddedDatabaseBuilder( storeDir ) + .setConfig( GraphDatabaseSettings.fail_on_corrupted_log_files, "false" ).newGraphDatabase(); database.shutdown(); logProvider.assertContainsMessageContaining( "Fail to read transaction log version 5." ); @@ -438,7 +436,7 @@ private void removeLastCheckpointRecordFromLastLogFile() throws IOException LogPosition checkpointPosition = null; LogFile transactionLogFile = logFiles.getLogFile(); - VersionAwareLogEntryReader entryReader = new VersionAwareLogEntryReader(); + VersionAwareLogEntryReader entryReader = new VersionAwareLogEntryReader<>(); LogPosition startPosition = LogPosition.start( logFiles.getHighestLogVersion() ); try ( ReadableLogChannel reader = transactionLogFile.getReader( startPosition ) ) { @@ -527,7 +525,7 @@ private MultiSet getLogEntriesDistribution( LogFiles logFiles ) throws IO LogFile transactionLogFile = logFiles.getLogFile(); LogPosition fileStartPosition = new LogPosition( 0, LogHeader.LOG_HEADER_SIZE ); - VersionAwareLogEntryReader entryReader = new VersionAwareLogEntryReader(); + VersionAwareLogEntryReader entryReader = new VersionAwareLogEntryReader<>(); MultiSet multiset = new MultiSet<>(); try ( ReadableLogChannel fileReader = transactionLogFile.getReader( fileStartPosition ) ) diff --git a/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/discovery/HazelcastCoreTopologyServiceTest.java b/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/discovery/HazelcastCoreTopologyServiceTest.java index fae7239a2a41e..e85632b29dfa4 100644 --- a/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/discovery/HazelcastCoreTopologyServiceTest.java +++ b/enterprise/causal-clustering/src/test/java/org/neo4j/causalclustering/discovery/HazelcastCoreTopologyServiceTest.java @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j Enterprise Edition. The included source + * code can be redistributed and/or modified under the terms of the + * GNU AFFERO GENERAL PUBLIC LICENSE Version 3 + * (http://www.fsf.org/licensing/licenses/agpl-3.0.html) with the + * Commons Clause, as found in the associated LICENSE.txt file. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * Neo4j object code can be licensed independently from the source + * under separate terms from the AGPL. Inquiries can be directed to: + * licensing@neo4j.com + * + * More information is also available at: + * https://neo4j.com/licensing/ + */ package org.neo4j.causalclustering.discovery; import org.junit.Test; @@ -17,7 +39,7 @@ public class HazelcastCoreTopologyServiceTest { - @Test( timeout = 120_000) + @Test( timeout = 120_000 ) public void shouldBeAbleToStartAndStoreWithoutSuccessfulJoin() { CentralJobScheduler jobScheduler = new CentralJobScheduler(); @@ -36,7 +58,6 @@ public void shouldBeAbleToStartAndStoreWithoutSuccessfulJoin() hostnameResolver, new TopologyServiceNoRetriesStrategy() ); service.start(); - Thread.yield(); service.stop(); }