Skip to content

Commit

Permalink
Merge histograms without losing precision (elastic#93704)
Browse files Browse the repository at this point in the history
For empty histogram aggregations we instantiate by default a DoubleHistogram
with 3 significant digits precision. The test generates a random value for
the number of significant digits that is in the range [0, 5]. As a result, if the
test runs with 4 or 5 significant value digits but the HdrHistogram sketch only
uses 3, checking errors on results fails since all computations are done with
lower than expected precision.

The issue happens at reduction time in AbstractInternalHDRPercentiles when
merging histograms coming from different shards. If a shard returns no data the
sketch is empty but uses 3 significant digits, while for non empty results the
correct number of digits is used. Now, depending on the order sketches are
merged it might happen, for instance, that we merge a sketch using 4 or 5
significant digits into one using 3 significant digits (used for the empty result).
The result, will then use whatever precision is used by the first "merged" object
created. This in some cases leads to correct result and sometimes not.

Here, when merging histograms, we always use the one with higher value of
numberOfSignificantValueDigits so to avoid reducing precision of the result.

Note that, as a result of this merging strategy, we can even use just 0 digits
precision for empty results and save on some serialization/deserialization
for empty histograms.

Resolves elastic#92822
  • Loading branch information
salvatore-campagna committed Feb 16, 2023
1 parent ab9d389 commit 7d94bb0
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 7 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/93704.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 93704
summary: Merge two histograms usign the higher number of digits among all histograms
area: Aggregations
type: bug
issues:
- 92822
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
package org.elasticsearch.search.aggregations.metrics;

import org.apache.logging.log4j.LogManager;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.CollectionUtils;
Expand Down Expand Up @@ -50,7 +49,6 @@
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.sameInstance;

@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/92822")
public class HDRPercentilesIT extends AbstractNumericTestCase {

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@

abstract class AbstractInternalHDRPercentiles extends InternalNumericMetricsAggregation.MultiValue {

private static final DoubleHistogram EMPTY_HISTOGRAM = new DoubleHistogram(3);
private static final DoubleHistogram EMPTY_HISTOGRAM_THREE_DIGITS = new DoubleHistogram(3);
private static final DoubleHistogram EMPTY_HISTOGRAM_ZERO_DIGITS = new DoubleHistogram(0);

protected final double[] keys;
protected final DoubleHistogram state;
Expand Down Expand Up @@ -91,7 +92,9 @@ protected void doWriteTo(StreamOutput out) throws IOException {
out.writeBoolean(false);
}
} else {
DoubleHistogram state = this.state != null ? this.state : EMPTY_HISTOGRAM;
DoubleHistogram state = this.state != null ? this.state
: out.getTransportVersion().onOrAfter(TransportVersion.V_8_7_0) ? EMPTY_HISTOGRAM_ZERO_DIGITS
: EMPTY_HISTOGRAM_THREE_DIGITS;
encode(state, out);
}
out.writeBoolean(keyed);
Expand Down Expand Up @@ -157,14 +160,35 @@ public AbstractInternalHDRPercentiles reduce(List<InternalAggregation> aggregati
merged = new DoubleHistogram(percentiles.state);
merged.setAutoResize(true);
}
merged.add(percentiles.state);
merged = merge(merged, percentiles.state);
}
if (merged == null) {
merged = EMPTY_HISTOGRAM;
merged = EMPTY_HISTOGRAM_ZERO_DIGITS;
}
return createReduced(getName(), keys, merged, keyed, getMetadata());
}

/**
* Merges two {@link DoubleHistogram}s such that we always merge the one with less
* numberOfSignificantValueDigits into the one with more numberOfSignificantValueDigits.
* This prevents producing a result that has lower than expected precision.
*
* @param histogram1 The first histogram to merge
* @param histogram2 The second histogram to merge
* @return One of the input histograms such that the one with higher numberOfSignificantValueDigits is used as the one for merging
*/
private DoubleHistogram merge(final DoubleHistogram histogram1, final DoubleHistogram histogram2) {
DoubleHistogram moreDigits = histogram1;
DoubleHistogram lessDigits = histogram2;
if (histogram2.getNumberOfSignificantValueDigits() > histogram1.getNumberOfSignificantValueDigits()) {
moreDigits = histogram2;
lessDigits = histogram1;
}
moreDigits.setAutoResize(true);
moreDigits.add(lessDigits);
return moreDigits;
}

@Override
public InternalAggregation finalizeSampling(SamplingContext samplingContext) {
return this;
Expand All @@ -180,7 +204,7 @@ protected abstract AbstractInternalHDRPercentiles createReduced(

@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
DoubleHistogram state = this.state != null ? this.state : EMPTY_HISTOGRAM;
DoubleHistogram state = this.state != null ? this.state : EMPTY_HISTOGRAM_ZERO_DIGITS;
if (keyed) {
builder.startObject(CommonFields.VALUES.getPreferredName());
for (int i = 0; i < keys.length; ++i) {
Expand Down

0 comments on commit 7d94bb0

Please sign in to comment.