Skip to content

Commit

Permalink
Rebuild indexes that fail to open
Browse files Browse the repository at this point in the history
If an index that should be online fails to open on startup we would not
be able to apply updates to it, and it would thus become inconsistent
with the database. Should the index then be possible to open after a
restart of the database we would end up with an inconsistent index.

The situation described above is now handled by treating the index as
permanently failed, and then taking actions to correct it. The first and
possibly most important action is to log the failure, something that was
not done before. Then we recognise that the only thing a user could do
to restore the index would be to either drop it and recreate it, or
stop the database, delete the index store files from disk, then restart
the database. By automating this second option we can avoid the need for
a restart. Since we are already in the startup phase when this happens,
we can simply treat the index as incompletely populated instead, forcing
the database to rebuild the index without any further intervention.
  • Loading branch information
thobe committed Aug 18, 2016
1 parent 3ec0e92 commit f0c2348
Show file tree
Hide file tree
Showing 19 changed files with 365 additions and 120 deletions.
Expand Up @@ -147,7 +147,7 @@ public PrintWriter get()
.build(); .build();
SchemaIndexProvider indexes = new LuceneSchemaIndexProvider( SchemaIndexProvider indexes = new LuceneSchemaIndexProvider(
fileSystem, DirectoryFactory.PERSISTENT, fileSystem, DirectoryFactory.PERSISTENT,
storeDir, tuningConfiguration, operationalMode ); storeDir, logProvider, tuningConfiguration, operationalMode );
DirectStoreAccess stores = new DirectStoreAccess( store, labelScanStore, indexes ); DirectStoreAccess stores = new DirectStoreAccess( store, labelScanStore, indexes );
FullCheck check = new FullCheck( tuningConfiguration, progressFactory ); FullCheck check = new FullCheck( tuningConfiguration, progressFactory );
summary = check.execute( stores, new DuplicatingLog( log, reportLog ) ); summary = check.execute( stores, new DuplicatingLog( log, reportLog ) );
Expand Down
Expand Up @@ -62,6 +62,7 @@
import org.neo4j.kernel.impl.transaction.log.TransactionIdStore; import org.neo4j.kernel.impl.transaction.log.TransactionIdStore;
import org.neo4j.kernel.impl.transaction.tracing.CommitEvent; import org.neo4j.kernel.impl.transaction.tracing.CommitEvent;
import org.neo4j.logging.FormattedLogProvider; import org.neo4j.logging.FormattedLogProvider;
import org.neo4j.logging.NullLogProvider;
import org.neo4j.test.PageCacheRule; import org.neo4j.test.PageCacheRule;
import org.neo4j.test.TargetDirectory; import org.neo4j.test.TargetDirectory;
import org.neo4j.test.TestGraphDatabaseFactory; import org.neo4j.test.TestGraphDatabaseFactory;
Expand Down Expand Up @@ -106,8 +107,8 @@ public DirectStoreAccess directStoreAccess()
private SchemaIndexProvider createIndexes( FileSystemAbstraction fileSystem, Config config, private SchemaIndexProvider createIndexes( FileSystemAbstraction fileSystem, Config config,
OperationalMode operationalMode ) OperationalMode operationalMode )
{ {
return new LuceneSchemaIndexProvider( fileSystem, DirectoryFactory.PERSISTENT, directory, config, return new LuceneSchemaIndexProvider( fileSystem, DirectoryFactory.PERSISTENT, directory,
operationalMode ); NullLogProvider.getInstance(), config, operationalMode );
} }


public File directory() public File directory()
Expand Down
Expand Up @@ -152,7 +152,7 @@ public PrintWriter get()
SchemaIndexProvider indexes = new LuceneSchemaIndexProvider( SchemaIndexProvider indexes = new LuceneSchemaIndexProvider(
fileSystem, fileSystem,
DirectoryFactory.PERSISTENT, DirectoryFactory.PERSISTENT,
storeDir, consistencyCheckerConfig, operationalMode ); storeDir, logProvider, consistencyCheckerConfig, operationalMode );


int numberOfThreads = defaultConsistencyCheckThreadsNumber(); int numberOfThreads = defaultConsistencyCheckThreadsNumber();
Statistics statistics; Statistics statistics;
Expand Down
Expand Up @@ -142,7 +142,8 @@ public DirectStoreAccess directStoreAccess()
private SchemaIndexProvider createIndexes( FileSystemAbstraction fileSystem, Config config, private SchemaIndexProvider createIndexes( FileSystemAbstraction fileSystem, Config config,
OperationalMode operationalMode ) OperationalMode operationalMode )
{ {
return new LuceneSchemaIndexProvider( fileSystem, DirectoryFactory.PERSISTENT, directory, config, operationalMode ); return new LuceneSchemaIndexProvider( fileSystem, DirectoryFactory.PERSISTENT, directory,
FormattedLogProvider.toOutputStream( System.out ), config, operationalMode );
} }


public File directory() public File directory()
Expand Down
Expand Up @@ -21,5 +21,5 @@


public interface IndexProxyFactory public interface IndexProxyFactory
{ {
IndexProxy create(); IndexProxy create() throws Exception;
} }
Expand Up @@ -31,10 +31,10 @@
import org.neo4j.kernel.impl.api.UpdateableSchemaState; import org.neo4j.kernel.impl.api.UpdateableSchemaState;
import org.neo4j.kernel.impl.api.index.sampling.IndexSamplingConfig; import org.neo4j.kernel.impl.api.index.sampling.IndexSamplingConfig;
import org.neo4j.kernel.impl.util.JobScheduler; import org.neo4j.kernel.impl.util.JobScheduler;
import org.neo4j.logging.Log;
import org.neo4j.logging.LogProvider; import org.neo4j.logging.LogProvider;


import static java.lang.String.format; import static java.lang.String.format;

import static org.neo4j.kernel.impl.api.index.IndexPopulationFailure.failure; import static org.neo4j.kernel.impl.api.index.IndexPopulationFailure.failure;


public class IndexProxySetup public class IndexProxySetup
Expand Down Expand Up @@ -75,7 +75,7 @@ public IndexProxy createPopulatingIndexProxy( final long ruleId,
// TODO: This is here because there is a circular dependency from PopulatingIndexProxy to FlippableIndexProxy // TODO: This is here because there is a circular dependency from PopulatingIndexProxy to FlippableIndexProxy
final String indexUserDescription = indexUserDescription( descriptor, providerDescriptor ); final String indexUserDescription = indexUserDescription( descriptor, providerDescriptor );
final IndexConfiguration config = new IndexConfiguration( constraint ); final IndexConfiguration config = new IndexConfiguration( constraint );
IndexPopulator populator = populatorFromProvider( providerDescriptor, ruleId, descriptor, config, final IndexPopulator populator = populatorFromProvider( providerDescriptor, ruleId, descriptor, config,
samplingConfig ); samplingConfig );


FailedIndexProxyFactory failureDelegateFactory = new FailedPopulatingIndexProxyFactory( FailedIndexProxyFactory failureDelegateFactory = new FailedPopulatingIndexProxyFactory(
Expand All @@ -97,25 +97,17 @@ public IndexProxy createPopulatingIndexProxy( final long ruleId,
flipper.setFlipTarget( new IndexProxyFactory() flipper.setFlipTarget( new IndexProxyFactory()
{ {
@Override @Override
public IndexProxy create() public IndexProxy create() throws Exception
{ {
try monitor.populationCompleteOn( descriptor );
{ OnlineIndexProxy onlineProxy = new OnlineIndexProxy(
monitor.populationCompleteOn( descriptor ); descriptor, config, onlineAccessorFromProvider( providerDescriptor, ruleId,
OnlineIndexProxy onlineProxy = new OnlineIndexProxy( config, samplingConfig ), storeView, providerDescriptor );
descriptor, config, onlineAccessorFromProvider( providerDescriptor, ruleId, if ( constraint )
config, samplingConfig ), storeView, providerDescriptor
);
if ( constraint )
{
return new TentativeConstraintIndexProxy( flipper, onlineProxy );
}
return onlineProxy;
}
catch ( IOException e )
{ {
return createFailedIndexProxy( ruleId, descriptor, providerDescriptor, constraint, failure( e ) ); return new TentativeConstraintIndexProxy( flipper, onlineProxy );
} }
return onlineProxy;
} }
} ); } );


Expand Down Expand Up @@ -151,7 +143,10 @@ public IndexProxy createOnlineIndexProxy( long ruleId,
} }
catch ( IOException e ) catch ( IOException e )
{ {
return createFailedIndexProxy( ruleId, descriptor, providerDescriptor, unique, failure( e ) ); logProvider.getLog( getClass() ).error( "Failed to open index: " + ruleId +
" (" + descriptor.userDescription( tokenNameLookup ) +
"), requesting re-population.", e );
return createRecoveringIndexProxy( descriptor, providerDescriptor, unique );
} }
} }


Expand Down
Expand Up @@ -234,7 +234,6 @@ public void init()
break; break;
case POPULATING: case POPULATING:
// The database was shut down during population, or a crash has occurred, or some other sad thing. // The database was shut down during population, or a crash has occurred, or some other sad thing.

indexProxy = proxySetup.createRecoveringIndexProxy( descriptor, providerDescriptor, constraint ); indexProxy = proxySetup.createRecoveringIndexProxy( descriptor, providerDescriptor, constraint );
break; break;
case FAILED: case FAILED:
Expand Down
Expand Up @@ -19,12 +19,6 @@
*/ */
package org.neo4j.kernel.impl.api.index; package org.neo4j.kernel.impl.api.index;


import org.junit.Rule;
import org.junit.Test;
import org.mockito.InOrder;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
Expand All @@ -34,6 +28,16 @@
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;


import org.hamcrest.Description;
import org.hamcrest.Matcher;
import org.hamcrest.TypeSafeMatcher;
import org.junit.Rule;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.InOrder;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import org.neo4j.collection.primitive.PrimitiveLongSet; import org.neo4j.collection.primitive.PrimitiveLongSet;
import org.neo4j.graphdb.ResourceIterator; import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.helpers.collection.ArrayIterator; import org.neo4j.helpers.collection.ArrayIterator;
Expand Down Expand Up @@ -71,6 +75,7 @@
import org.neo4j.register.Register.DoubleLongRegister; import org.neo4j.register.Register.DoubleLongRegister;
import org.neo4j.test.DoubleLatch; import org.neo4j.test.DoubleLatch;


import static java.util.Arrays.asList;
import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.startsWith; import static org.hamcrest.CoreMatchers.startsWith;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
Expand All @@ -94,15 +99,13 @@
import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.mockito.Mockito.verifyZeroInteractions; import static org.mockito.Mockito.verifyZeroInteractions;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;

import static java.util.Arrays.asList;

import static org.neo4j.collection.primitive.PrimitiveLongCollections.setOf; import static org.neo4j.collection.primitive.PrimitiveLongCollections.setOf;
import static org.neo4j.helpers.collection.IteratorUtil.asCollection; import static org.neo4j.helpers.collection.IteratorUtil.asCollection;
import static org.neo4j.helpers.collection.IteratorUtil.asResourceIterator; import static org.neo4j.helpers.collection.IteratorUtil.asResourceIterator;
import static org.neo4j.helpers.collection.IteratorUtil.asSet; import static org.neo4j.helpers.collection.IteratorUtil.asSet;
import static org.neo4j.helpers.collection.IteratorUtil.iterator; import static org.neo4j.helpers.collection.IteratorUtil.iterator;
import static org.neo4j.helpers.collection.IteratorUtil.loop; import static org.neo4j.helpers.collection.IteratorUtil.loop;
import static org.neo4j.kernel.api.index.InternalIndexState.FAILED;
import static org.neo4j.kernel.api.index.InternalIndexState.ONLINE; import static org.neo4j.kernel.api.index.InternalIndexState.ONLINE;
import static org.neo4j.kernel.api.index.InternalIndexState.POPULATING; import static org.neo4j.kernel.api.index.InternalIndexState.POPULATING;
import static org.neo4j.kernel.impl.api.index.IndexUpdateMode.BATCHED; import static org.neo4j.kernel.impl.api.index.IndexUpdateMode.BATCHED;
Expand Down Expand Up @@ -801,6 +804,108 @@ public void awaitingPopulationOfRecoveredIndex( long index, IndexDescriptor desc
assertEquals( ONLINE, indexing.getIndexProxy( indexId.get() ).getState() ); assertEquals( ONLINE, indexing.getIndexProxy( indexId.get() ).getState() );
} }


@Test
public void shouldStoreIndexFailureWhenFailingToCreateOnlineAccessorAfterPopulating() throws Exception
{
// given
long indexId = 1;
IndexingService indexing = newIndexingServiceWithMockedDependencies( populator, accessor, withData() );

IOException exception = new IOException( "Expected failure" );
when( nameLookup.labelGetName( labelId ) ).thenReturn( "TheLabel" );
when( nameLookup.propertyKeyGetName( propertyKeyId ) ).thenReturn( "propertyKey" );

when( indexProvider.getOnlineAccessor(
eq( indexId ), any( IndexConfiguration.class ), any( IndexSamplingConfig.class ) ) )
.thenThrow( exception );

life.start();
ArgumentCaptor<Boolean> closeArgs = ArgumentCaptor.forClass( Boolean.class );

// when
indexing.createIndex( indexRule( indexId, labelId, propertyKeyId, PROVIDER_DESCRIPTOR ) );
verify( populator, timeout( 1000 ).times( 2 ) ).close( closeArgs.capture() );

// then
assertEquals( FAILED, indexing.getIndexProxy( 1 ).getState() );
assertEquals( asList( true, false ), closeArgs.getAllValues() );
assertThat( storedFailure(), containsString( "java.io.IOException: Expected failure\n\tat " ) );
logProvider.assertAtLeastOnce( inLog( IndexPopulationJob.class ).error( equalTo(
"Failed to populate index: [:TheLabel(propertyKey) [provider: {key=quantum-dex, version=25.0}]]" ),
causedBy( exception ) ) );
logProvider.assertNone( inLog( IndexPopulationJob.class ).info(
"Index population completed. Index is now online: [%s]",
":TheLabel(propertyKey) [provider: {key=quantum-dex, version=25.0}]" ) );
}

@Test
public void shouldStoreIndexFailureWhenFailingToCreateOnlineAccessorAfterRecoveringPopulatingIndex() throws Exception
{
// given
long indexId = 1;
IndexingService indexing = newIndexingServiceWithMockedDependencies( populator, accessor, withData(),
indexRule( indexId, labelId, propertyKeyId, PROVIDER_DESCRIPTOR ) );

IOException exception = new IOException( "Expected failure" );
when( nameLookup.labelGetName( labelId ) ).thenReturn( "TheLabel" );
when( nameLookup.propertyKeyGetName( propertyKeyId ) ).thenReturn( "propertyKey" );

when( indexProvider.getInitialState( indexId ) ).thenReturn( POPULATING );
when( indexProvider.getOnlineAccessor(
eq( indexId ), any( IndexConfiguration.class ), any( IndexSamplingConfig.class ) ) )
.thenThrow( exception );

life.start();
ArgumentCaptor<Boolean> closeArgs = ArgumentCaptor.forClass( Boolean.class );

// when
verify( populator, timeout( 1000 ).times( 2 ) ).close( closeArgs.capture() );

// then
assertEquals( FAILED, indexing.getIndexProxy( 1 ).getState() );
assertEquals( asList( true, false ), closeArgs.getAllValues() );
assertThat( storedFailure(), containsString( "java.io.IOException: Expected failure\n\tat " ) );
logProvider.assertAtLeastOnce( inLog( IndexPopulationJob.class ).error( equalTo(
"Failed to populate index: [:TheLabel(propertyKey) [provider: {key=quantum-dex, version=25.0}]]" ),
causedBy( exception ) ) );
logProvider.assertNone( inLog( IndexPopulationJob.class ).info(
"Index population completed. Index is now online: [%s]",
":TheLabel(propertyKey) [provider: {key=quantum-dex, version=25.0}]" ) );
}

static Matcher<? extends Throwable> causedBy( final Throwable exception )
{
return new TypeSafeMatcher<Throwable>()
{
@Override
protected boolean matchesSafely( Throwable item )
{
while ( item != null )
{
if ( item == exception )
{
return true;
}
item = item.getCause();
}
return false;
}

@Override
public void describeTo( Description description )
{
description.appendText( "exception caused by " ).appendValue( exception );
}
};
}

private String storedFailure() throws IOException
{
ArgumentCaptor<String> reason = ArgumentCaptor.forClass( String.class );
verify( populator ).markAsFailed( reason.capture() );
return reason.getValue();
}

private static class ControlledIndexPopulator extends IndexPopulator.Adapter private static class ControlledIndexPopulator extends IndexPopulator.Adapter
{ {
private final DoubleLatch latch; private final DoubleLatch latch;
Expand Down
7 changes: 7 additions & 0 deletions community/lucene-index/pom.xml
Expand Up @@ -64,6 +64,13 @@ the relevant Commercial Agreement.
<type>test-jar</type> <type>test-jar</type>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-logging</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency> <dependency>
<groupId>org.neo4j</groupId> <groupId>org.neo4j</groupId>
<artifactId>neo4j-io</artifactId> <artifactId>neo4j-io</artifactId>
Expand Down

0 comments on commit f0c2348

Please sign in to comment.