Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add warning when monotonicity is forced in the input to histogram_quantile #12931

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 5 additions & 1 deletion promql/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -1168,10 +1168,14 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev

for _, mb := range enh.signatureToMetricWithBuckets {
if len(mb.buckets) > 0 {
res, forcedMonotonicity := bucketQuantile(q, mb.buckets)
enh.Out = append(enh.Out, Sample{
Metric: mb.metric,
F: bucketQuantile(q, mb.buckets),
F: res,
})
if forcedMonotonicity {
annos.Add(annotations.NewHistogramQuantileForcedMonotonicityInfo(mb.metric.Get(labels.MetricName), args[1].PositionRange()))
}
}
}

Expand Down
59 changes: 25 additions & 34 deletions promql/quantile.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,17 @@ type metricWithBuckets struct {
// If q<0, -Inf is returned.
//
// If q>1, +Inf is returned.
func bucketQuantile(q float64, buckets buckets) float64 {
//
// We also return a bool to indicate if monotonicity needed to be forced.
func bucketQuantile(q float64, buckets buckets) (float64, bool) {
if math.IsNaN(q) {
return math.NaN()
return math.NaN(), false
}
if q < 0 {
return math.Inf(-1)
return math.Inf(-1), false
}
if q > 1 {
return math.Inf(+1)
return math.Inf(+1), false
}
slices.SortFunc(buckets, func(a, b bucket) int {
// We don't expect the bucket boundary to be a NaN.
Expand All @@ -92,27 +94,27 @@ func bucketQuantile(q float64, buckets buckets) float64 {
return 0
})
if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
return math.NaN()
return math.NaN(), false
}

buckets = coalesceBuckets(buckets)
ensureMonotonic(buckets)
forcedMonotonic := ensureMonotonic(buckets)

if len(buckets) < 2 {
return math.NaN()
return math.NaN(), false
}
observations := buckets[len(buckets)-1].count
if observations == 0 {
return math.NaN()
return math.NaN(), false
}
rank := q * observations
b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })

if b == len(buckets)-1 {
return buckets[len(buckets)-2].upperBound
return buckets[len(buckets)-2].upperBound, forcedMonotonic
}
if b == 0 && buckets[0].upperBound <= 0 {
return buckets[0].upperBound
return buckets[0].upperBound, forcedMonotonic
}
var (
bucketStart float64
Expand All @@ -124,7 +126,7 @@ func bucketQuantile(q float64, buckets buckets) float64 {
count -= buckets[b-1].count
rank -= buckets[b-1].count
}
return bucketStart + (bucketEnd-bucketStart)*(rank/count)
return bucketStart + (bucketEnd-bucketStart)*(rank/count), forcedMonotonic
}

// histogramQuantile calculates the quantile 'q' based on the given histogram.
Expand Down Expand Up @@ -342,46 +344,35 @@ func coalesceBuckets(buckets buckets) buckets {
// The assumption that bucket counts increase monotonically with increasing
// upperBound may be violated during:
//
// * Recording rule evaluation of histogram_quantile, especially when rate()
// has been applied to the underlying bucket timeseries.
// * Evaluation of histogram_quantile computed over federated bucket
// timeseries, especially when rate() has been applied.
//
// This is because scraped data is not made available to rule evaluation or
// federation atomically, so some buckets are computed with data from the
// most recent scrapes, but the other buckets are missing data from the most
// recent scrape.
// - Circumstances where data is already inconsistent at the target's side.
// - Ingestion via the remote write receiver that Prometheus implements.
// - Optimisation of query execution where precision is sacrificed for other
// benefits, not by Prometheus but by systems built on top of it.
//
// Monotonicity is usually guaranteed because if a bucket with upper bound
// u1 has count c1, then any bucket with a higher upper bound u > u1 must
// have counted all c1 observations and perhaps more, so that c >= c1.
//
// Randomly interspersed partial sampling breaks that guarantee, and rate()
// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
// monotonicity is broken. It is exacerbated by rate() because under normal
// operation, cumulative counting of buckets will cause the bucket counts to
// diverge such that small differences from missing samples are not a problem.
// rate() removes this divergence.)
// have counted all c1 observations and perhaps more, so that c >= c1.
//
// bucketQuantile depends on that monotonicity to do a binary search for the
// bucket with the φ-quantile count, so breaking the monotonicity
// guarantee causes bucketQuantile() to return undefined (nonsense) results.
//
// As a somewhat hacky solution until ingestion is atomic per scrape, we
// calculate the "envelope" of the histogram buckets, essentially removing
// any decreases in the count between successive buckets.

func ensureMonotonic(buckets buckets) {
// As a somewhat hacky solution, we calculate the "envelope" of the histogram
// buckets, essentially removing any decreases in the count between successive
// buckets. We return a bool to indicate if this monotonicity was forced or not.
func ensureMonotonic(buckets buckets) bool {
forced := false
max := buckets[0].count
for i := 1; i < len(buckets); i++ {
switch {
case buckets[i].count > max:
max = buckets[i].count
case buckets[i].count < max:
buckets[i].count = max
forced = true
}
}
return forced
}

// quantile calculates the given quantile of a vector of samples.
Expand Down
12 changes: 11 additions & 1 deletion util/annotations/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ var (
MixedFloatsHistogramsWarning = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning)
MixedClassicNativeHistogramsWarning = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning)

PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count:", PromQLInfo)
PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count:", PromQLInfo)
HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (and may give inaccurate results) for metric name", PromQLInfo)
)

type annoErr struct {
Expand Down Expand Up @@ -163,3 +164,12 @@ func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) an
Err: fmt.Errorf("%w %q", PossibleNonCounterInfo, metricName),
}
}

// NewHistogramQuantileForcedMonotonicityInfo is used when the input (classic histograms) to
// histogram_quantile needs to be forced to be monotonic.
func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange.PositionRange) annoErr {
return annoErr{
PositionRange: pos,
Err: fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityInfo, metricName),
}
}