Merge pull request #1295 from prometheus/beorn7/rate

promql: Limit extrapolation of delta/rate/increase
prometheus · Jan 8, 2016 · c0474d5 · c0474d5
2 parents adb3be4 + c77c3a8
commit c0474d5
Show file tree

Hide file tree

Showing 3 changed files with 97 additions and 48 deletions.
diff --git a/promql/functions.go b/promql/functions.go
@@ -19,7 +19,6 @@ import (
 	"regexp"
 	"sort"
 	"strconv"
-	"time"
 
 	"github.com/prometheus/common/model"
 
@@ -44,28 +43,25 @@ func funcTime(ev *evaluator, args Expressions) model.Value {
 	}
 }
 
-// === delta(matrix model.ValMatrix) Vector ===
-func funcDelta(ev *evaluator, args Expressions) model.Value {
-	// This function still takes a 2nd argument for use by rate() and increase().
-	isCounter := len(args) >= 2 && ev.evalInt(args[1]) > 0
+// extrapolatedRate is a utility function for rate/increase/delta.
+// It calculates the rate (allowing for counter resets if isCounter is true),
+// extrapolates if the first/last sample is close to the boundary, and returns
+// the result as either per-second (if isRate is true) or overall.
+func extrapolatedRate(ev *evaluator, arg Expr, isCounter bool, isRate bool) model.Value {
+	ms := arg.(*MatrixSelector)
+
+	rangeStart := ev.Timestamp.Add(-ms.Range - ms.Offset)
+	rangeEnd := ev.Timestamp.Add(-ms.Offset)
+
 	resultVector := vector{}
 
-	// If we treat these metrics as counters, we need to fetch all values
-	// in the interval to find breaks in the timeseries' monotonicity.
-	// I.e. if a counter resets, we want to ignore that reset.
-	var matrixValue matrix
-	if isCounter {
-		matrixValue = ev.evalMatrix(args[0])
-	} else {
-		matrixValue = ev.evalMatrixBounds(args[0])
-	}
+	matrixValue := ev.evalMatrix(ms)
 	for _, samples := range matrixValue {
-		// No sense in trying to compute a delta without at least two points. Drop
+		// No sense in trying to compute a rate without at least two points. Drop
 		// this vector element.
 		if len(samples.Values) < 2 {
 			continue
 		}
-
 		var (
 			counterCorrection model.SampleValue
 			lastValue         model.SampleValue
@@ -79,22 +75,48 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {
 		}
 		resultValue := lastValue - samples.Values[0].Value + counterCorrection
 
-		targetInterval := args[0].(*MatrixSelector).Range
-		sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp)
-		if sampledInterval == 0 {
-			// Only found one sample. Cannot compute a rate from this.
-			continue
+		// Duration between first/last samples and boundary of range.
+		durationToStart := samples.Values[0].Timestamp.Sub(rangeStart).Seconds()
+		durationToEnd := rangeEnd.Sub(samples.Values[len(samples.Values)-1].Timestamp).Seconds()
+
+		sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp).Seconds()
+		averageDurationBetweenSamples := sampledInterval / float64(len(samples.Values)-1)
+
+		if isCounter && resultValue > 0 && samples.Values[0].Value >= 0 {
+			// Counters cannot be negative. If we have any slope at
+			// all (i.e. resultValue went up), we can extrapolate
+			// the zero point of the counter. If the duration to the
+			// zero point is shorter than the durationToStart, we
+			// take the zero point as the start of the series,
+			// thereby avoiding extrapolation to negative counter
+			// values.
+			durationToZero := sampledInterval * float64(samples.Values[0].Value/resultValue)
+			if durationToZero < durationToStart {
+				durationToStart = durationToZero
+			}
+		}
+
+		// If the first/last samples are close to the boundaries of the range,
+		// extrapolate the result. This is as we expect that another sample
+		// will exist given the spacing between samples we've seen thus far,
+		// with an allowance for noise.
+		extrapolationThreshold := averageDurationBetweenSamples * 1.1
+		extrapolateToInterval := sampledInterval
+
+		if durationToStart < extrapolationThreshold {
+			extrapolateToInterval += durationToStart
+		} else {
+			extrapolateToInterval += averageDurationBetweenSamples / 2
+		}
+		if durationToEnd < extrapolationThreshold {
+			extrapolateToInterval += durationToEnd
+		} else {
+			extrapolateToInterval += averageDurationBetweenSamples / 2
+		}
+		resultValue = resultValue * model.SampleValue(extrapolateToInterval/sampledInterval)
+		if isRate {
+			resultValue = resultValue / model.SampleValue(ms.Range.Seconds())
 		}
-		// Correct for differences in target vs. actual delta interval.
-		//
-		// Above, we didn't actually calculate the delta for the specified target
-		// interval, but for an interval between the first and last found samples
-		// under the target interval, which will usually have less time between
-		// them. Depending on how many samples are found under a target interval,
-		// the delta results are distorted and temporal aliasing occurs (ugly
-		// bumps). This effect is corrected for below.
-		intervalCorrection := model.SampleValue(targetInterval) / model.SampleValue(sampledInterval)
-		resultValue *= intervalCorrection
 
 		resultSample := &sample{
 			Metric:    samples.Metric,
@@ -107,25 +129,19 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {
 	return resultVector
 }
 
+// === delta(matrix model.ValMatrix) Vector ===
+func funcDelta(ev *evaluator, args Expressions) model.Value {
+	return extrapolatedRate(ev, args[0], false, false)
+}
+
 // === rate(node model.ValMatrix) Vector ===
 func funcRate(ev *evaluator, args Expressions) model.Value {
-	args = append(args, &NumberLiteral{1})
-	vector := funcDelta(ev, args).(vector)
-
-	// TODO: could be other type of model.ValMatrix in the future (right now, only
-	// MatrixSelector exists). Find a better way of getting the duration of a
-	// matrix, such as looking at the samples themselves.
-	interval := args[0].(*MatrixSelector).Range
-	for i := range vector {
-		vector[i].Value /= model.SampleValue(interval / time.Second)
-	}
-	return vector
+	return extrapolatedRate(ev, args[0], true, true)
 }
 
 // === increase(node model.ValMatrix) Vector ===
 func funcIncrease(ev *evaluator, args Expressions) model.Value {
-	args = append(args, &NumberLiteral{1})
-	return funcDelta(ev, args).(vector)
+	return extrapolatedRate(ev, args[0], true, false)
 }
 
 // === irate(node model.ValMatrix) Vector ===

diff --git a/promql/testdata/functions.test b/promql/testdata/functions.test
@@ -63,6 +63,10 @@ eval instant at 50m increase(http_requests[50m])
 	{path="/foo"} 100
 	{path="/bar"}  90
 
+eval instant at 50m increase(http_requests[100m])
+	{path="/foo"} 100
+	{path="/bar"}  90
+
 clear
 
 # Tests for irate().
@@ -87,10 +91,10 @@ load 5m
 	http_requests{job="app-server", instance="1", group="canary"}		0+80x10
 
 # deriv should return the same as rate in simple cases.
-eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m])
+eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
 	{group="canary", instance="1", job="app-server"} 0.26666666666666666
 
-eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[60m])
+eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[50m])
 	{group="canary", instance="1", job="app-server"} 0.26666666666666666
 
 # deriv should return correct result.

diff --git a/promql/testdata/legacy.test b/promql/testdata/legacy.test
@@ -15,6 +15,14 @@ load 5m
 	testcounter_reset_middle	0+10x4 0+10x5
 	testcounter_reset_end    	0+10x9 0 10
 
+load 4m
+	testcounter_zero_cutoff{start="0m"}	0+240x10
+	testcounter_zero_cutoff{start="1m"}	60+240x10
+	testcounter_zero_cutoff{start="2m"}	120+240x10
+	testcounter_zero_cutoff{start="3m"}	180+240x10
+	testcounter_zero_cutoff{start="4m"}	240+240x10
+	testcounter_zero_cutoff{start="5m"}	300+240x10
+
 load 5m
 	label_grouping_test{a="aa", b="bb"}	0+10x10
 	label_grouping_test{a="a", b="abb"}	0+20x10
@@ -151,18 +159,39 @@ eval instant at 50m delta(http_requests{group="canary", instance="1", job="app-s
 
 
 # Rates should calculate per-second rates.
-eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m])
+eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
 	{group="canary", instance="1", job="app-server"} 0.26666666666666666
 
+
 # Counter resets at in the middle of range are handled correctly by rate().
-eval instant at 50m rate(testcounter_reset_middle[60m])
+eval instant at 50m rate(testcounter_reset_middle[50m])
 	{} 0.03
 
 
 # Counter resets at end of range are ignored by rate().
 eval instant at 50m rate(testcounter_reset_end[5m])
 	{} 0
 
+
+# Zero cutoff for left-side extrapolation.
+eval instant at 10m rate(testcounter_zero_cutoff[20m])
+	{start="0m"} 0.5
+	{start="1m"} 0.55
+	{start="2m"} 0.6
+	{start="3m"} 0.65
+	{start="4m"} 0.7
+	{start="5m"} 0.6
+
+# Normal half-interval cutoff for left-side extrapolation.
+eval instant at 50m rate(testcounter_zero_cutoff[20m])
+	{start="0m"} 0.6
+	{start="1m"} 0.6
+	{start="2m"} 0.6
+	{start="3m"} 0.6
+	{start="4m"} 0.6
+	{start="5m"} 0.6
+
+
 # count_scalar for a non-empty vector should return scalar element count.
 eval instant at 50m count_scalar(http_requests)
 	8