Skip to content

Commit

Permalink
promql: Remove extrapolation from rate/increase/delta.
Browse files Browse the repository at this point in the history
This is an improvement from both a theoretical and practical
standpoint.

Theory:
For simplicty I'll take the example of increase().

Counters are fundamentally lossy, if there's a counter reset
or instance failure between one scrape and the next we'll
lose information about the period up to the reset/failure.
Thus the samples we have allow us to calculate a lower-bound
on the increase() in a time period.

Extrapolation multiples this by an amount depending on timing which is
an upper bound based on what might have happened if everything continued
as it was.
This mix of upper and lower bounds means that we can't make any
meaningful statements about the output of increase() in relation to what
actually happened.

By removing the extrapolation, we can once again reason that the result
of increase() is a lower bound on the true increase of the counter.

Practical:
Fixes #581.
The extrapolation presumes that it's looking at a range within a
continuous stream of samples. If in fact the time series starts or
end within this range, this leads to an over-correction.

For discrete counters and gauges, extrapolating to invalid values in
their domain can be confusing and prevent rules being written that
depend on exact values.

For those looking to graph things more accurately irate() is a better
choice than extrapolation on rate().
For those looking to calculate how a gauge is trending deriv() is a
better choice than delta().
  • Loading branch information
brian-brazil committed Oct 11, 2015
1 parent bbdfb10 commit 06c55d7
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 52 deletions.
78 changes: 32 additions & 46 deletions promql/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,55 +46,15 @@ func funcTime(ev *evaluator, args Expressions) model.Value {

// === delta(matrix model.ValMatrix) Vector ===
func funcDelta(ev *evaluator, args Expressions) model.Value {
// This function still takes a 2nd argument for use by rate() and increase().
isCounter := len(args) >= 2 && ev.evalInt(args[1]) > 0
resultVector := vector{}

// If we treat these metrics as counters, we need to fetch all values
// in the interval to find breaks in the timeseries' monotonicity.
// I.e. if a counter resets, we want to ignore that reset.
var matrixValue matrix
if isCounter {
matrixValue = ev.evalMatrix(args[0])
} else {
matrixValue = ev.evalMatrixBounds(args[0])
}
for _, samples := range matrixValue {
for _, samples := range ev.evalMatrixBounds(args[0]) {
// No sense in trying to compute a delta without at least two points. Drop
// this vector element.
if len(samples.Values) < 2 {
continue
}

var (
counterCorrection model.SampleValue
lastValue model.SampleValue
)
for _, sample := range samples.Values {
currentValue := sample.Value
if isCounter && currentValue < lastValue {
counterCorrection += lastValue - currentValue
}
lastValue = currentValue
}
resultValue := lastValue - samples.Values[0].Value + counterCorrection

targetInterval := args[0].(*MatrixSelector).Range
sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp)
if sampledInterval == 0 {
// Only found one sample. Cannot compute a rate from this.
continue
}
// Correct for differences in target vs. actual delta interval.
//
// Above, we didn't actually calculate the delta for the specified target
// interval, but for an interval between the first and last found samples
// under the target interval, which will usually have less time between
// them. Depending on how many samples are found under a target interval,
// the delta results are distorted and temporal aliasing occurs (ugly
// bumps). This effect is corrected for below.
intervalCorrection := model.SampleValue(targetInterval) / model.SampleValue(sampledInterval)
resultValue *= intervalCorrection
resultValue := samples.Values[len(samples.Values)-1].Value - samples.Values[0].Value

resultSample := &sample{
Metric: samples.Metric,
Expand All @@ -109,8 +69,7 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {

// === rate(node model.ValMatrix) Vector ===
func funcRate(ev *evaluator, args Expressions) model.Value {
args = append(args, &NumberLiteral{1})
vector := funcDelta(ev, args).(vector)
vector := funcIncrease(ev, args).(vector)

// TODO: could be other type of model.ValMatrix in the future (right now, only
// MatrixSelector exists). Find a better way of getting the duration of a
Expand All @@ -124,8 +83,35 @@ func funcRate(ev *evaluator, args Expressions) model.Value {

// === increase(node model.ValMatrix) Vector ===
func funcIncrease(ev *evaluator, args Expressions) model.Value {
args = append(args, &NumberLiteral{1})
return funcDelta(ev, args).(vector)
resultVector := vector{}
for _, samples := range ev.evalMatrix(args[0]) {
// No sense in trying to compute an increase without at least two points. Drop
// this vector element.
if len(samples.Values) < 2 {
continue
}

var (
counterCorrection model.SampleValue
lastValue model.SampleValue
)
for _, sample := range samples.Values {
currentValue := sample.Value
if currentValue < lastValue {
counterCorrection += lastValue - currentValue
}
lastValue = currentValue
}
resultValue := lastValue - samples.Values[0].Value + counterCorrection
resultSample := &sample{
Metric: samples.Metric,
Value: resultValue,
Timestamp: ev.Timestamp,
}
resultSample.Metric.Del(model.MetricNameLabel)
resultVector = append(resultVector, resultSample)
}
return resultVector
}

// === irate(node model.ValMatrix) Vector ===
Expand Down
9 changes: 7 additions & 2 deletions promql/testdata/functions.test
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ eval instant at 50m increase(http_requests[50m])
{path="/foo"} 100
{path="/bar"} 90

# Tests for increase().
eval instant at 51m increase(http_requests[50m])
{path="/foo"} 90
{path="/bar"} 80

clear

# Tests for irate().
Expand All @@ -87,10 +92,10 @@ load 5m
http_requests{job="app-server", instance="1", group="canary"} 0+80x10

# deriv should return the same as rate in simple cases.
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m])
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
{group="canary", instance="1", job="app-server"} 0.26666666666666666

eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[60m])
eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[50m])
{group="canary", instance="1", job="app-server"} 0.26666666666666666

# deriv should return correct result.
Expand Down
8 changes: 4 additions & 4 deletions promql/testdata/legacy.test
Original file line number Diff line number Diff line change
Expand Up @@ -194,18 +194,18 @@ eval instant at 50m sum(http_requests) by (job) + min(http_requests) by (job) +
{job="api-server"} 1750


# Deltas should be adjusted for target interval vs. samples under target interval.
# Deltas should not be adjusted for target interval vs. samples under target interval.
eval instant at 50m delta(http_requests{group="canary", instance="1", job="app-server"}[18m])
{group="canary", instance="1", job="app-server"} 288
{group="canary", instance="1", job="app-server"} 240


# Rates should calculate per-second rates.
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m])
{group="canary", instance="1", job="app-server"} 0.26666666666666666
{group="canary", instance="1", job="app-server"} 0.2222222222222222

# Counter resets at in the middle of range are handled correctly by rate().
eval instant at 50m rate(testcounter_reset_middle[60m])
{} 0.03
{} 0.025


# Counter resets at end of range are ignored by rate().
Expand Down

0 comments on commit 06c55d7

Please sign in to comment.