From 1e83bb402209816ffb6fb5cb30f2a827b40c8ac7 Mon Sep 17 00:00:00 2001 From: Mikhaylo Demianenko Date: Wed, 18 May 2016 15:08:33 +0200 Subject: [PATCH] Update default settings for non unique index sampling, sampler set initial size evaluation. Decrease default non unique sampler step size limit to 8m instead of 64m. Update default set size evaluation to be less greedy, make it sample step size dependent. Performance wise new default size/initial set size combination gives about 30% increase while populating sampler. And it does not make sense to create new map for values each time. --- .../factory/GraphDatabaseSettings.java | 2 +- .../index/sampling/IndexSamplingConfig.java | 12 +++---- .../index/sampling/NonUniqueIndexSampler.java | 35 +++++++++++++------ .../NonUniqueLuceneIndexPopulator.java | 2 +- .../sampler/NonUniqueLuceneIndexSampler.java | 2 +- 5 files changed, 33 insertions(+), 20 deletions(-) diff --git a/community/kernel/src/main/java/org/neo4j/graphdb/factory/GraphDatabaseSettings.java b/community/kernel/src/main/java/org/neo4j/graphdb/factory/GraphDatabaseSettings.java index b8d75427a261..5364192ed6db 100644 --- a/community/kernel/src/main/java/org/neo4j/graphdb/factory/GraphDatabaseSettings.java +++ b/community/kernel/src/main/java/org/neo4j/graphdb/factory/GraphDatabaseSettings.java @@ -305,7 +305,7 @@ public abstract class GraphDatabaseSettings @Description("Size of buffer used by index sampling") public static final Setting index_sampling_buffer_size = - setting("dbms.index_sampling.buffer_size", BYTES, "64m", + setting("dbms.index_sampling.buffer_size", BYTES, "8m", min( /* 1m */ 1048576L ), max( (long) Integer.MAX_VALUE ) ); @Description("Percentage of index updates of total index size required before sampling of a given index is triggered") diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/IndexSamplingConfig.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/IndexSamplingConfig.java index d01f7c7108dd..1f918951ef1d 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/IndexSamplingConfig.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/IndexSamplingConfig.java @@ -24,20 +24,20 @@ public class IndexSamplingConfig { - private final int bufferSize; + private final int sampleSizeLimit; private final double updateRatio; private final boolean backgroundSampling; public IndexSamplingConfig( Config config ) { - this.bufferSize = config.get( GraphDatabaseSettings.index_sampling_buffer_size ).intValue(); + this.sampleSizeLimit = config.get( GraphDatabaseSettings.index_sampling_buffer_size ).intValue(); this.updateRatio = ((double) config.get( GraphDatabaseSettings.index_sampling_update_percentage )) / 100.0d; this.backgroundSampling = config.get( GraphDatabaseSettings.index_background_sampling_enabled ); } - public int bufferSize() + public int sampleSizeLimit() { - return bufferSize; + return sampleSizeLimit; } public double updateRatio() @@ -71,14 +71,14 @@ public boolean equals( Object o ) IndexSamplingConfig that = (IndexSamplingConfig) o; return backgroundSampling == that.backgroundSampling && - bufferSize == that.bufferSize && + sampleSizeLimit == that.sampleSizeLimit && Double.compare( that.updateRatio, updateRatio ) == 0; } @Override public int hashCode() { - int result = bufferSize; + int result = sampleSizeLimit; long temp = Double.doubleToLongBits( updateRatio ); result = 31 * result + (int) (temp ^ (temp >>> 32)); result = 31 * result + (backgroundSampling ? 1 : 0); diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/NonUniqueIndexSampler.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/NonUniqueIndexSampler.java index dba409e73fe6..afa428406d82 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/NonUniqueIndexSampler.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/api/index/sampling/NonUniqueIndexSampler.java @@ -24,9 +24,7 @@ public class NonUniqueIndexSampler { - private static final int INITIAL_SIZE = 1 << 16; - - private final int bufferSizeLimit; + private final int sampleSizeLimit; private final MultiSet values; private int sampledSteps = 0; @@ -35,12 +33,12 @@ public class NonUniqueIndexSampler private long accumulatedUniqueValues = 0; private long accumulatedSampledSize = 0; - private long bufferSize = 0; + private long sampleSize = 0; - public NonUniqueIndexSampler( int bufferSizeLimit ) + public NonUniqueIndexSampler( int sampleSizeLimit ) { - this.bufferSizeLimit = bufferSizeLimit; - this.values = new MultiSet<>( INITIAL_SIZE ); + this.values = new MultiSet<>( calculateInitialSetSize( sampleSizeLimit ) ); + this.sampleSizeLimit = sampleSizeLimit; } public void include( String value ) @@ -51,14 +49,14 @@ public void include( String value ) public void include( String value, long increment ) { assert increment > 0; - if ( bufferSize >= bufferSizeLimit ) + if ( sampleSize >= sampleSizeLimit ) { nextStep(); } if ( values.increment( value, increment ) == increment ) { - bufferSize += value.length(); + sampleSize += value.length(); } } @@ -72,7 +70,7 @@ public void exclude( String value, long decrement ) assert decrement > 0; if ( values.increment( value, -decrement ) == 0 ) { - bufferSize -= value.length(); + sampleSize -= value.length(); } } @@ -98,9 +96,24 @@ private void nextStep() { accumulatedUniqueValues += values.uniqueSize(); accumulatedSampledSize += values.size(); - bufferSize = 0; + sampleSize = 0; sampledSteps++; values.clear(); } + + /** + * Evaluate initial set size that evaluate initial set as log2(sampleSizeLimit) / 2 based on provided sample size + * limit. + * Minimum possible size is 1 << 10. + * Maximum possible size is 1 << 16. + * + * @param sampleSizeLimit specified sample size limit + * @return initial set size + */ + private int calculateInitialSetSize( int sampleSizeLimit ) + { + int basedOnSampleSize = Math.max( 10, (int) (Math.log( sampleSizeLimit ) / Math.log( 2 )) / 2 ); + return (1 << Math.min( 16, basedOnSampleSize )); + } } diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/populator/NonUniqueLuceneIndexPopulator.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/populator/NonUniqueLuceneIndexPopulator.java index 41fae773573d..383cef16a081 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/populator/NonUniqueLuceneIndexPopulator.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/populator/NonUniqueLuceneIndexPopulator.java @@ -42,7 +42,7 @@ public class NonUniqueLuceneIndexPopulator extends LuceneIndexPopulator public NonUniqueLuceneIndexPopulator( LuceneSchemaIndex luceneIndex, IndexSamplingConfig samplingConfig ) { super( luceneIndex ); - this.sampler = new NonUniqueIndexSampler( samplingConfig.bufferSize() ); + this.sampler = new NonUniqueIndexSampler( samplingConfig.sampleSizeLimit() ); } @Override diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/sampler/NonUniqueLuceneIndexSampler.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/sampler/NonUniqueLuceneIndexSampler.java index d04cd2bd9083..d7142578b2ff 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/sampler/NonUniqueLuceneIndexSampler.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/sampler/NonUniqueLuceneIndexSampler.java @@ -58,7 +58,7 @@ public NonUniqueLuceneIndexSampler( IndexSearcher indexSearcher, TaskControl tas @Override protected IndexSample performSampling() throws IndexNotFoundKernelException { - NonUniqueIndexSampler sampler = new NonUniqueIndexSampler( indexSamplingConfig.bufferSize() ); + NonUniqueIndexSampler sampler = new NonUniqueIndexSampler( indexSamplingConfig.sampleSizeLimit() ); IndexReader indexReader = indexSearcher.getIndexReader(); for ( LeafReaderContext readerContext : indexReader.leaves() ) {