Skip to content

Commit

Permalink
Fix sampling of lucene index size
Browse files Browse the repository at this point in the history
Since when we iterate over all the terms in the lucene index, lucene
will return also the term that has been deleted and not in use anymore
untill the next index compaction.  This is not a huge problem per se
for computing index selectivity, but we were using this skewed numbers
also for the index size.  This commit will fix the index size to be
computed correctly by asking lucene the number of documents present in
the index.
  • Loading branch information
davidegrohmann committed Apr 18, 2016
1 parent 21eb863 commit 8813ab4
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 5 deletions.
Expand Up @@ -66,7 +66,8 @@ class LuceneIndexAccessorReader implements IndexReader
public long sampleIndex( DoubleLong.Out result ) throws IndexNotFoundKernelException public long sampleIndex( DoubleLong.Out result ) throws IndexNotFoundKernelException
{ {
NonUniqueIndexSampler sampler = new NonUniqueIndexSampler( bufferSizeLimit ); NonUniqueIndexSampler sampler = new NonUniqueIndexSampler( bufferSizeLimit );
try ( TermEnum terms = luceneIndexReader().terms() ) org.apache.lucene.index.IndexReader indexReader = luceneIndexReader();
try ( TermEnum terms = indexReader.terms() )
{ {
while ( terms.next() ) while ( terms.next() )
{ {
Expand All @@ -86,7 +87,12 @@ public long sampleIndex( DoubleLong.Out result ) throws IndexNotFoundKernelExcep
throw new RuntimeException( e ); throw new RuntimeException( e );
} }


return sampler.result( result ); sampler.result( result );

// Here do not return the count from the sampler, since when traversing all terms lucene will consider also the
// logical delete once that are gonna be physically deleted in the next compaction. The index size can be
// computed exactly by asking the current number of documents in the lucene index.
return indexReader.numDocs();
} }


@Override @Override
Expand Down
Expand Up @@ -87,6 +87,7 @@ public void shouldProvideTheIndexUniqueValuesForAnIndexWithDuplicates() throws E
new Term( "string", "aaa" ), new Term( "string", "aaa" ),
new Term( "string", "ccc" ) new Term( "string", "ccc" )
); );
when(reader.numDocs() ).thenReturn( 3 );


// When // When
final DoubleLongRegister output = Registers.newDoubleLongRegister(); final DoubleLongRegister output = Registers.newDoubleLongRegister();
Expand All @@ -108,6 +109,7 @@ public void shouldSkipTheNonNodeIdKeyEntriesWhenCalculatingIndexUniqueValues() t
new Term( NODE_ID_KEY, "aaa" ), // <- this should be ignored new Term( NODE_ID_KEY, "aaa" ), // <- this should be ignored
new Term( "string", "bbb" ) new Term( "string", "bbb" )
); );
when(reader.numDocs() ).thenReturn( 1 );


// When // When


Expand Down
Expand Up @@ -37,6 +37,7 @@
import org.neo4j.kernel.impl.store.counts.CountsTracker; import org.neo4j.kernel.impl.store.counts.CountsTracker;
import org.neo4j.kernel.impl.store.counts.keys.CountsKeyFactory; import org.neo4j.kernel.impl.store.counts.keys.CountsKeyFactory;
import org.neo4j.kernel.impl.store.counts.keys.IndexSampleKey; import org.neo4j.kernel.impl.store.counts.keys.IndexSampleKey;
import org.neo4j.kernel.impl.store.counts.keys.IndexStatisticsKey;
import org.neo4j.register.Register.DoubleLongRegister; import org.neo4j.register.Register.DoubleLongRegister;
import org.neo4j.register.Registers; import org.neo4j.register.Registers;
import org.neo4j.test.TargetDirectory; import org.neo4j.test.TargetDirectory;
Expand All @@ -58,6 +59,7 @@ public class IndexSamplingIntegrationTest
public void shouldSampleNotUniqueIndex() throws Throwable public void shouldSampleNotUniqueIndex() throws Throwable
{ {
GraphDatabaseService db = null; GraphDatabaseService db = null;
long deletedNodes = 0;
try try
{ {
// Given // Given
Expand All @@ -82,6 +84,17 @@ public void shouldSampleNotUniqueIndex() throws Throwable
db.createNode( label ).setProperty( property, names[i % names.length] ); db.createNode( label ).setProperty( property, names[i % names.length] );
tx.success(); tx.success();
} }

}

try ( Transaction tx = db.beginTx() )
{
for ( int i = 0; i < (nodes / 10) ; i++ )
{
db.findNodes( label, property, names[i % names.length] ).next().delete();
deletedNodes++;
tx.success();
}
} }
} }
finally finally
Expand All @@ -96,15 +109,23 @@ public void shouldSampleNotUniqueIndex() throws Throwable
triggerIndexResamplingOnNextStartup(); triggerIndexResamplingOnNextStartup();


// Then // Then

// sampling will consider also the delete nodes till the next lucene compaction
DoubleLongRegister register = fetchIndexSamplingValues( db ); DoubleLongRegister register = fetchIndexSamplingValues( db );
assertEquals( names.length, register.readFirst() ); assertEquals( names.length, register.readFirst() );
assertEquals( nodes, register.readSecond() ); assertEquals( nodes, register.readSecond() );

// but the deleted nodes should not be considered in the index size value
DoubleLongRegister indexSizeRegister = fetchIndexSizeValues( db );
assertEquals( 0, indexSizeRegister.readFirst() );
assertEquals( nodes - deletedNodes, indexSizeRegister.readSecond() );
} }


@Test @Test
public void shouldSampleUniqueIndex() throws Throwable public void shouldSampleUniqueIndex() throws Throwable
{ {
GraphDatabaseService db = null; GraphDatabaseService db = null;
long deletedNodes = 0;
try try
{ {
// Given // Given
Expand All @@ -123,6 +144,19 @@ public void shouldSampleUniqueIndex() throws Throwable
tx.success(); tx.success();
} }
} }

try ( Transaction tx = db.beginTx() )
{
for ( int i = 0; i < nodes; i++ )
{
if ( i % 10 == 0 )
{
deletedNodes++;
db.findNode( label, property, "" + i ).delete();
tx.success();
}
}
}
} }
finally finally
{ {
Expand All @@ -136,9 +170,13 @@ public void shouldSampleUniqueIndex() throws Throwable
triggerIndexResamplingOnNextStartup(); triggerIndexResamplingOnNextStartup();


// Then // Then
DoubleLongRegister register = fetchIndexSamplingValues( db ); DoubleLongRegister indexSampleRegister = fetchIndexSamplingValues( db );
assertEquals( nodes, register.readFirst() ); assertEquals( nodes - deletedNodes, indexSampleRegister.readFirst() );
assertEquals( nodes, register.readSecond() ); assertEquals( nodes - deletedNodes, indexSampleRegister.readSecond() );

DoubleLongRegister indexSizeRegister = fetchIndexSizeValues( db );
assertEquals( 0, indexSizeRegister.readFirst() );
assertEquals( nodes - deletedNodes, indexSizeRegister.readSecond() );
} }


private DoubleLongRegister fetchIndexSamplingValues( GraphDatabaseService db ) private DoubleLongRegister fetchIndexSamplingValues( GraphDatabaseService db )
Expand All @@ -162,6 +200,27 @@ private DoubleLongRegister fetchIndexSamplingValues( GraphDatabaseService db )
} }
} }


private DoubleLongRegister fetchIndexSizeValues( GraphDatabaseService db )
{
try
{
// Then
db = new TestGraphDatabaseFactory().newEmbeddedDatabase( testDirectory.graphDbDir().getAbsolutePath() );
@SuppressWarnings( "deprecation" )
GraphDatabaseAPI api = (GraphDatabaseAPI) db;
CountsTracker countsTracker = api.getDependencyResolver().resolveDependency( NeoStore.class ).getCounts();
IndexStatisticsKey key = CountsKeyFactory.indexStatisticsKey( 0, 0 ); // cheating a bit...
return countsTracker.get( key, Registers.newDoubleLongRegister() );
}
finally
{
if ( db != null )
{
db.shutdown();
}
}
}

private void triggerIndexResamplingOnNextStartup() private void triggerIndexResamplingOnNextStartup()
{ {
// Trigger index resampling on next at startup // Trigger index resampling on next at startup
Expand Down

0 comments on commit 8813ab4

Please sign in to comment.