Skip to content

Commit

Permalink
Update default settings for non unique index sampling, sampler set in…
Browse files Browse the repository at this point in the history
…itial size evaluation.

Decrease default non unique sampler step size limit to 8m instead of 64m.
Update default set size evaluation to be less greedy, make it sample step size dependent.

Performance wise new default size/initial set size combination gives about 30% increase while populating sampler.
And it does not make sense to create new map for values each time.
  • Loading branch information
MishaDemianenko committed May 19, 2016
1 parent 82d7ef3 commit 1e83bb4
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 20 deletions.
Expand Up @@ -305,7 +305,7 @@ public abstract class GraphDatabaseSettings


@Description("Size of buffer used by index sampling") @Description("Size of buffer used by index sampling")
public static final Setting<Long> index_sampling_buffer_size = public static final Setting<Long> index_sampling_buffer_size =
setting("dbms.index_sampling.buffer_size", BYTES, "64m", setting("dbms.index_sampling.buffer_size", BYTES, "8m",
min( /* 1m */ 1048576L ), max( (long) Integer.MAX_VALUE ) ); min( /* 1m */ 1048576L ), max( (long) Integer.MAX_VALUE ) );


@Description("Percentage of index updates of total index size required before sampling of a given index is triggered") @Description("Percentage of index updates of total index size required before sampling of a given index is triggered")
Expand Down
Expand Up @@ -24,20 +24,20 @@


public class IndexSamplingConfig public class IndexSamplingConfig
{ {
private final int bufferSize; private final int sampleSizeLimit;
private final double updateRatio; private final double updateRatio;
private final boolean backgroundSampling; private final boolean backgroundSampling;


public IndexSamplingConfig( Config config ) public IndexSamplingConfig( Config config )
{ {
this.bufferSize = config.get( GraphDatabaseSettings.index_sampling_buffer_size ).intValue(); this.sampleSizeLimit = config.get( GraphDatabaseSettings.index_sampling_buffer_size ).intValue();
this.updateRatio = ((double) config.get( GraphDatabaseSettings.index_sampling_update_percentage )) / 100.0d; this.updateRatio = ((double) config.get( GraphDatabaseSettings.index_sampling_update_percentage )) / 100.0d;
this.backgroundSampling = config.get( GraphDatabaseSettings.index_background_sampling_enabled ); this.backgroundSampling = config.get( GraphDatabaseSettings.index_background_sampling_enabled );
} }


public int bufferSize() public int sampleSizeLimit()
{ {
return bufferSize; return sampleSizeLimit;
} }


public double updateRatio() public double updateRatio()
Expand Down Expand Up @@ -71,14 +71,14 @@ public boolean equals( Object o )
IndexSamplingConfig that = (IndexSamplingConfig) o; IndexSamplingConfig that = (IndexSamplingConfig) o;


return backgroundSampling == that.backgroundSampling && return backgroundSampling == that.backgroundSampling &&
bufferSize == that.bufferSize && sampleSizeLimit == that.sampleSizeLimit &&
Double.compare( that.updateRatio, updateRatio ) == 0; Double.compare( that.updateRatio, updateRatio ) == 0;
} }


@Override @Override
public int hashCode() public int hashCode()
{ {
int result = bufferSize; int result = sampleSizeLimit;
long temp = Double.doubleToLongBits( updateRatio ); long temp = Double.doubleToLongBits( updateRatio );
result = 31 * result + (int) (temp ^ (temp >>> 32)); result = 31 * result + (int) (temp ^ (temp >>> 32));
result = 31 * result + (backgroundSampling ? 1 : 0); result = 31 * result + (backgroundSampling ? 1 : 0);
Expand Down
Expand Up @@ -24,9 +24,7 @@


public class NonUniqueIndexSampler public class NonUniqueIndexSampler
{ {
private static final int INITIAL_SIZE = 1 << 16; private final int sampleSizeLimit;

private final int bufferSizeLimit;
private final MultiSet<String> values; private final MultiSet<String> values;


private int sampledSteps = 0; private int sampledSteps = 0;
Expand All @@ -35,12 +33,12 @@ public class NonUniqueIndexSampler


private long accumulatedUniqueValues = 0; private long accumulatedUniqueValues = 0;
private long accumulatedSampledSize = 0; private long accumulatedSampledSize = 0;
private long bufferSize = 0; private long sampleSize = 0;


public NonUniqueIndexSampler( int bufferSizeLimit ) public NonUniqueIndexSampler( int sampleSizeLimit )
{ {
this.bufferSizeLimit = bufferSizeLimit; this.values = new MultiSet<>( calculateInitialSetSize( sampleSizeLimit ) );
this.values = new MultiSet<>( INITIAL_SIZE ); this.sampleSizeLimit = sampleSizeLimit;
} }


public void include( String value ) public void include( String value )
Expand All @@ -51,14 +49,14 @@ public void include( String value )
public void include( String value, long increment ) public void include( String value, long increment )
{ {
assert increment > 0; assert increment > 0;
if ( bufferSize >= bufferSizeLimit ) if ( sampleSize >= sampleSizeLimit )
{ {
nextStep(); nextStep();
} }


if ( values.increment( value, increment ) == increment ) if ( values.increment( value, increment ) == increment )
{ {
bufferSize += value.length(); sampleSize += value.length();
} }
} }


Expand All @@ -72,7 +70,7 @@ public void exclude( String value, long decrement )
assert decrement > 0; assert decrement > 0;
if ( values.increment( value, -decrement ) == 0 ) if ( values.increment( value, -decrement ) == 0 )
{ {
bufferSize -= value.length(); sampleSize -= value.length();
} }
} }


Expand All @@ -98,9 +96,24 @@ private void nextStep()
{ {
accumulatedUniqueValues += values.uniqueSize(); accumulatedUniqueValues += values.uniqueSize();
accumulatedSampledSize += values.size(); accumulatedSampledSize += values.size();
bufferSize = 0; sampleSize = 0;


sampledSteps++; sampledSteps++;
values.clear(); values.clear();
} }

/**
* Evaluate initial set size that evaluate initial set as log2(sampleSizeLimit) / 2 based on provided sample size
* limit.
* Minimum possible size is 1 << 10.
* Maximum possible size is 1 << 16.
*
* @param sampleSizeLimit specified sample size limit
* @return initial set size
*/
private int calculateInitialSetSize( int sampleSizeLimit )
{
int basedOnSampleSize = Math.max( 10, (int) (Math.log( sampleSizeLimit ) / Math.log( 2 )) / 2 );
return (1 << Math.min( 16, basedOnSampleSize ));
}
} }
Expand Up @@ -42,7 +42,7 @@ public class NonUniqueLuceneIndexPopulator extends LuceneIndexPopulator
public NonUniqueLuceneIndexPopulator( LuceneSchemaIndex luceneIndex, IndexSamplingConfig samplingConfig ) public NonUniqueLuceneIndexPopulator( LuceneSchemaIndex luceneIndex, IndexSamplingConfig samplingConfig )
{ {
super( luceneIndex ); super( luceneIndex );
this.sampler = new NonUniqueIndexSampler( samplingConfig.bufferSize() ); this.sampler = new NonUniqueIndexSampler( samplingConfig.sampleSizeLimit() );
} }


@Override @Override
Expand Down
Expand Up @@ -58,7 +58,7 @@ public NonUniqueLuceneIndexSampler( IndexSearcher indexSearcher, TaskControl tas
@Override @Override
protected IndexSample performSampling() throws IndexNotFoundKernelException protected IndexSample performSampling() throws IndexNotFoundKernelException
{ {
NonUniqueIndexSampler sampler = new NonUniqueIndexSampler( indexSamplingConfig.bufferSize() ); NonUniqueIndexSampler sampler = new NonUniqueIndexSampler( indexSamplingConfig.sampleSizeLimit() );
IndexReader indexReader = indexSearcher.getIndexReader(); IndexReader indexReader = indexSearcher.getIndexReader();
for ( LeafReaderContext readerContext : indexReader.leaves() ) for ( LeafReaderContext readerContext : indexReader.leaves() )
{ {
Expand Down

0 comments on commit 1e83bb4

Please sign in to comment.