diff --git a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java index 7fe6cff3b..20e918123 100644 --- a/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java +++ b/simpleclient/src/main/java/io/prometheus/client/CKMSQuantiles.java @@ -129,21 +129,27 @@ final class CKMSQuantiles { * @param quantiles The targeted quantiles, can be empty. */ CKMSQuantiles(Quantile[] quantiles) { - // hard-coded epsilon of 0.1% to determine the batch size, and default epsilon in case of empty quantiles - double pointOnePercent = 0.001; if (quantiles.length == 0) { // we need at least one for this algorithm to work - this.quantiles = new Quantile[1]; - this.quantiles[0] = new Quantile(0.5, pointOnePercent / 2); - } else { - this.quantiles = quantiles; + throw new IllegalArgumentException("quantiles cannot be empty"); } + this.quantiles = quantiles; + // section 5.1 Methods - Batch. + // This is hardcoded to 500, which corresponds to an epsilon of 0.1%. - this.insertThreshold = 500; + // Benchmarks showed that this size has a good performance on the Arrays.sort method used in insertBatch() + // Larger values grow too much in time to sort. + int threshold = 500; + for (Quantile q : quantiles) { + // Find a smaller threshold to cater for scenarios where this is required, e.g., large epsilon. + threshold = Math.min(threshold, (int) (1 / (2 * q.epsilon))); + } + + this.insertThreshold = threshold; // create a buffer with size equal to threshold - this.buffer = new double[insertThreshold]; + this.buffer = new double[threshold]; // Initialize empty items this.samples = new LinkedList(); @@ -184,6 +190,13 @@ public double get(double q) { return Double.NaN; } + // short-circuit min (p0) and max (p100) queries + if (q == 0.0) { + return samples.getFirst().value; + } else if (q == 1.0) { + return samples.getLast().value; + } + // Straightforward implementation of Output(q). // Paper Section 3.1 on true rank: let r_i = Sum_{j=1}^{i−1} g_j int currentRank = 0; @@ -204,8 +217,8 @@ public double get(double q) { } } - // edge case of wanting max value - return samples.getLast().value; + // The iterator is exhausted, return the last value. + return cur.value; } /** @@ -304,6 +317,8 @@ private void insertBatch() { it.add(newItem); count++; item = newItem; + // Note that if the new value v is inserted before vi then ri increases by 1. + currentRank += item.g; } // reset buffered items to 0. @@ -332,6 +347,9 @@ private void compress() { ListIterator it = samples.listIterator(); + // Preserve the first element (min) so that q=0.0 can be looked up + it.next(); + Item prev; Item next = it.next(); @@ -398,7 +416,14 @@ public String toString() { } /** + * Data class for Targeted quantile: T = {(φ_j , ε_j )} * + * Rather than requesting the same ε for all quantiles (the uniform case) + * or ε scaled by φ (the biased case), one might specify an arbitrary set + * of quantiles and the desired errors of ε for each in the form (φj , εj ). + * For example, input to the targeted quantiles problem might be {(0.5, 0.1), (0.2, 0.05), (0.9, 0.01)}, + * meaning that the median should be returned with 10% error, the 20th percentile with 5% error, + * and the 90th percentile with 1%. */ static class Quantile { /** @@ -419,20 +444,13 @@ static class Quantile { final double v; /** - * Targeted quantile: T = {(φ_j , ε_j )} - * Rather than requesting the same ε for all quantiles (the uniform case) - * or ε scaled by φ (the biased case), one might specify an arbitrary set - * of quantiles and the desired errors of ε for each in the form (φj , εj ). - * For example, input to the targeted quantiles problem might be {(0.5, 0.1), (0.2, 0.05), (0.9, 0.01)}, - * meaning that the median should be returned with 10% error, the 20th percentile with 5% error, - * and the 90th percentile with 1%. * * @param quantile the quantile between 0 and 1 * @param epsilon the desired error for this quantile, between 0 and 1. */ Quantile(double quantile, double epsilon) { if (quantile < 0 || quantile > 1.0) throw new IllegalArgumentException("Quantile must be between 0 and 1"); - if (epsilon < 0 || epsilon > 1.0) throw new IllegalArgumentException("Epsilon must be between 0 and 1"); + if (epsilon <= 0 || epsilon >= 1.0) throw new IllegalArgumentException("Epsilon must be between 0 and 1"); this.quantile = quantile; this.epsilon = epsilon; diff --git a/simpleclient/src/main/java/io/prometheus/client/Summary.java b/simpleclient/src/main/java/io/prometheus/client/Summary.java index 34b123ba0..886358cd9 100644 --- a/simpleclient/src/main/java/io/prometheus/client/Summary.java +++ b/simpleclient/src/main/java/io/prometheus/client/Summary.java @@ -99,10 +99,10 @@ public static class Builder extends SimpleCollector.Builder { private int ageBuckets = 5; public Builder quantile(double quantile, double error) { - if (quantile < 0.0 || quantile > 1.0) { + if (quantile <= 0.0 || quantile >= 1.0) { throw new IllegalArgumentException("Quantile " + quantile + " invalid: Expected number between 0.0 and 1.0."); } - if (error < 0.0 || error > 1.0) { + if (error <= 0.0 || error >= 1.0) { throw new IllegalArgumentException("Error " + error + " invalid: Expected number between 0.0 and 1.0."); } quantiles.add(new Quantile(quantile, error)); diff --git a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java index 63a281007..021b64b55 100644 --- a/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java +++ b/simpleclient/src/test/java/io/prometheus/client/CKMSQuantilesTest.java @@ -6,7 +6,10 @@ import org.apache.commons.math3.random.RandomGenerator; import org.junit.Test; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; import static org.junit.Assert.*; @@ -25,37 +28,6 @@ public void testGetOnEmptyValues() { assertEquals(Double.NaN, ckms.get(0), 0); } - @Test - public void testGetWhenNoQuantilesAreDefined() { - CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{}); - assertEquals(Double.NaN, ckms.get(0), 0); - } - - @Test - public void testInsertWhenNoQuantilesAreDefined() { - CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{}); - ckms.insert(1.0); - ckms.insert(2.0); - ckms.insert(3.0); - assertEquals(1.0, ckms.get(0), 0); - assertEquals(2.0, ckms.get(0.5), 0); - assertEquals(3.0, ckms.get(1), 0); - } - - @Test - public void testCompressWhenBufferSize500Reached() { - CKMSQuantiles ckms = new CKMSQuantiles(new Quantile[]{}); - List input = makeSequence(1, 499); - - for (double v : input) { - ckms.insert(v); - } - assertEquals("No compress should be triggered", 0, ckms.samples.size()); - - ckms.insert(500); - assertEquals(500, ckms.samples.size()); - } - @Test public void testGet() { List quantiles = new ArrayList(); @@ -64,7 +36,7 @@ public void testGet() { quantiles.add(new Quantile(0.95, 0.01)); quantiles.add(new Quantile(0.99, 0.01)); - List input = makeSequence(1, 100); + List input = setupInput(100); CKMSQuantiles ckms = new CKMSQuantiles( quantiles.toArray(new Quantile[]{})); for (double v : input) { @@ -80,20 +52,41 @@ public void testGet() { @Test public void testGetWithAMillionElements() { List quantiles = new ArrayList(); - quantiles.add(new Quantile(0.0, 0.01)); + quantiles.add(new Quantile(0.0, 0.001)); quantiles.add(new Quantile(0.10, 0.01)); quantiles.add(new Quantile(0.90, 0.001)); quantiles.add(new Quantile(0.95, 0.02)); quantiles.add(new Quantile(0.99, 0.001)); + quantiles.add(new Quantile(1.0, 0.001)); final int elemCount = 1000000; - double[] shuffle = new double[elemCount]; - for (int i = 0; i < shuffle.length; i++) { - shuffle[i] = i + 1; + List shuffle = setupInput(elemCount); + + CKMSQuantiles ckms = new CKMSQuantiles( + quantiles.toArray(new Quantile[]{})); + + for (double v : shuffle) { + ckms.insert(v); } - Random rand = new Random(0); + // given the linear distribution, we set the delta equal to the εn value for this quantile + assertRank(elemCount, ckms.get(0.0), 0.0, 0.001); + assertRank(elemCount, ckms.get(0.1), 0.1, 0.01); + assertRank(elemCount, ckms.get(0.9), 0.9, 0.001); + assertRank(elemCount, ckms.get(0.95), 0.95, 0.02); + assertRank(elemCount, ckms.get(0.99), 0.99, 0.001); + assertRank(elemCount, ckms.get(1.0), 1.0, 0.001); + + assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); + } + - Collections.shuffle(Arrays.asList(shuffle), rand); + @Test + public void testGetWithASingleQuantile() { + List quantiles = new ArrayList(); + quantiles.add(new Quantile(0.95, 0.02)); + + final int elemCount = 100; + List shuffle = setupInput(elemCount); CKMSQuantiles ckms = new CKMSQuantiles( quantiles.toArray(new Quantile[]{})); @@ -102,14 +95,44 @@ public void testGetWithAMillionElements() { ckms.insert(v); } // given the linear distribution, we set the delta equal to the εn value for this quantile - assertEquals(0.1 * elemCount, ckms.get(0.1), 0.01 * elemCount); - assertEquals(0.9 * elemCount, ckms.get(0.9), 0.001 * elemCount); - assertEquals(0.95 * elemCount, ckms.get(0.95), 0.02 * elemCount); - assertEquals(0.99 * elemCount, ckms.get(0.99), 0.001 * elemCount); + assertRank(elemCount, ckms.get(0.95), 0.95, 0.02); + } - assertTrue("sample size should be way below 1_000_000", ckms.samples.size() < 1000); + @Test + public void testReturnMinAndMaxWithoutPassingTheQuantile() { + List quantiles = new ArrayList(); + quantiles.add(new Quantile(0.95, 0.02)); + + final int elemCount = 1000; + List shuffle = setupInput(elemCount); + + CKMSQuantiles ckms = new CKMSQuantiles( + quantiles.toArray(new Quantile[]{})); + + for (double v : shuffle) { + ckms.insert(v); + } + // given the linear distribution, we set the delta equal to the εn value for this quantile + assertRank(elemCount, ckms.get(0), 0.0, 0.001); + assertRank(elemCount, ckms.get(1), 1.0, 0.001); } + private List setupInput(int elemCount) { + List shuffle = new ArrayList(elemCount); + for (int i = 0; i < elemCount; i++) { + shuffle.add(i + 1.0); + } + Random rand = new Random(0); + Collections.shuffle(shuffle, rand); + return shuffle; + } + + private void assertRank(int elemCount, double actual, double quantile, double epsilon) { + double lowerBound = elemCount * (quantile - epsilon); + double upperBound = elemCount * (quantile + epsilon); + assertTrue("quantile=" + quantile + ", actual=" + actual + ", lowerBound=" + lowerBound, actual >= lowerBound); + assertTrue("quantile=" + quantile + ", actual=" + actual + ", upperBound=" + upperBound, actual <= upperBound); + } @Test public void testGetGaussian() { @@ -182,15 +205,4 @@ public void checkBounds() { // subtract and divide by 2, assuming that the increase is linear in this small epsilon. return Math.abs(upperBound - lowerBound) / 2; } - - /** - * In Java 8 we could use IntStream - */ - List makeSequence(int begin, int end) { - List ret = new ArrayList(end - begin + 1); - for (int i = begin; i <= end; i++) { - ret.add((double) i); - } - return ret; - } }