From 5fe48be506fe2b946d4db9cd7976ad0c4d4e7e1a Mon Sep 17 00:00:00 2001
From: Mike Spreitzer <mspreitz@us.ibm.com>
Date: Tue, 5 Jan 2021 23:31:06 -0500
Subject: [PATCH 1/4] Add unit test for sample-and-watermark histograms

---
 .../pkg/util/flowcontrol/metrics/BUILD        | 13 ++-
 .../metrics/sample_and_watermark_test.go      | 93 +++++++++++++++++++
 2 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go

diff --git a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/BUILD b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/BUILD
index cf4d2f2427b1..4cea275a17db 100644
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/BUILD
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
 
 go_library(
     name = "go_default_library",
@@ -32,3 +32,14 @@ filegroup(
     tags = ["automanaged"],
     visibility = ["//visibility:public"],
 )
+
+go_test(
+    name = "go_default_test",
+    srcs = ["sample_and_watermark_test.go"],
+    embed = [":go_default_library"],
+    deps = [
+        "//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
+    ],
+)
diff --git a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
new file mode 100644
index 000000000000..cb21df3faa90
--- /dev/null
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
@@ -0,0 +1,93 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package metrics
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+	"time"
+
+	"k8s.io/apimachinery/pkg/util/clock"
+	compbasemetrics "k8s.io/component-base/metrics"
+	"k8s.io/component-base/metrics/legacyregistry"
+)
+
+const samplesHistName = "sawtestsamples"
+
+func TestSampler(t *testing.T) {
+	t0 := time.Now()
+	clk := clock.NewFakePassiveClock(t0)
+	buckets := []float64{0, 1}
+	const samplingPeriod = time.Millisecond
+	gen := NewSampleAndWaterMarkHistogramsGenerator(clk, samplingPeriod,
+		&compbasemetrics.HistogramOpts{Name: samplesHistName, Buckets: buckets},
+		&compbasemetrics.HistogramOpts{Name: "marks", Buckets: buckets},
+		[]string{})
+	saw := gen.Generate(0, 1, []string{})
+	regs := gen.metrics()
+	for _, reg := range regs {
+		legacyregistry.MustRegister(reg)
+	}
+	dt := 2 * samplingPeriod
+	t1 := t0.Add(dt)
+	t.Logf("t0=%s", t0)
+	for i := 0; i < 100; i++ {
+		ddt := time.Microsecond * time.Duration(rand.Intn(3000)-500)
+		t1 = t1.Add(ddt)
+		diff := t1.Sub(t0)
+		if diff > dt {
+			dt = diff
+		}
+		clk.SetTime(t1)
+		saw.Set(1)
+		expectedCount := int64(dt / time.Millisecond)
+		actualCount, err := getHistogramCount(regs, samplesHistName)
+		if err != nil {
+			t.Fatalf("For t0=%s, t1=%s, failed to getHistogramCount: %#+v", t0, t1, err)
+		}
+		t.Logf("For i=%d, ddt=%s, t1=%s, diff=%s, dt=%s, count=%d", i, ddt, t1, diff, dt, actualCount)
+		if expectedCount != actualCount {
+			t.Errorf("For i=%d, t0=%s, ddt=%s, t1=%s, expectedCount=%d, actualCount=%d", i, t0, ddt, t1, expectedCount, actualCount)
+		}
+	}
+}
+
+func getHistogramCount(regs Registerables, metricName string) (int64, error) {
+	considered := []string{}
+	mfs, err := legacyregistry.DefaultGatherer.Gather()
+	if err != nil {
+		return 0, fmt.Errorf("failed to gather metrics: %s", err)
+	}
+	for _, mf := range mfs {
+		thisName := mf.GetName()
+		if thisName != metricName {
+			considered = append(considered, thisName)
+			continue
+		}
+		metric := mf.GetMetric()[0]
+		hist := metric.GetHistogram()
+		if hist == nil {
+			return 0, fmt.Errorf("dto.Metric has nil Histogram")
+		}
+		if hist.SampleCount == nil {
+			return 0, fmt.Errorf("dto.Histogram has nil SampleCount")
+		}
+		return int64(*hist.SampleCount), nil
+	}
+	return 0, fmt.Errorf("not found, considered=%#+v", considered)
+}

From 194c22fed54ba89d985085412795a9850bedeaf3 Mon Sep 17 00:00:00 2001
From: Mike Spreitzer <mspreitz@us.ibm.com>
Date: Wed, 6 Jan 2021 12:56:16 -0500
Subject: [PATCH 2/4] Explain warnings in new test

Add info message that explains what warnings are deliberate and expected.
---
 .../apiserver/pkg/util/flowcontrol/metrics/BUILD    |  1 +
 .../metrics/sample_and_watermark_test.go            | 13 ++++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/BUILD b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/BUILD
index 4cea275a17db..fa038f0dcc6a 100644
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/BUILD
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/BUILD
@@ -41,5 +41,6 @@ go_test(
         "//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library",
         "//staging/src/k8s.io/component-base/metrics:go_default_library",
         "//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
+        "//vendor/k8s.io/klog/v2:go_default_library",
     ],
 )
diff --git a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
index cb21df3faa90..0b7810dcc9a8 100644
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
@@ -25,9 +25,15 @@ import (
 	"k8s.io/apimachinery/pkg/util/clock"
 	compbasemetrics "k8s.io/component-base/metrics"
 	"k8s.io/component-base/metrics/legacyregistry"
+	"k8s.io/klog/v2"
 )
 
-const samplesHistName = "sawtestsamples"
+const (
+	samplesHistName = "sawtestsamples"
+	ddtRange        = 3000
+	ddtOffset       = 500
+	numIterations   = 100
+)
 
 func TestSampler(t *testing.T) {
 	t0 := time.Now()
@@ -45,9 +51,10 @@ func TestSampler(t *testing.T) {
 	}
 	dt := 2 * samplingPeriod
 	t1 := t0.Add(dt)
+	klog.Infof("Expect about %v warnings about time going backwards; this is fake time deliberately misbehaving.", (numIterations*ddtOffset)/ddtRange)
 	t.Logf("t0=%s", t0)
-	for i := 0; i < 100; i++ {
-		ddt := time.Microsecond * time.Duration(rand.Intn(3000)-500)
+	for i := 0; i < numIterations; i++ {
+		ddt := time.Microsecond * time.Duration(rand.Intn(ddtRange)-ddtOffset)
 		t1 = t1.Add(ddt)
 		diff := t1.Sub(t0)
 		if diff > dt {

From 42f8d4e28fc05f3532f5cf7d2294049e3314896a Mon Sep 17 00:00:00 2001
From: Mike Spreitzer <mspreitz@us.ibm.com>
Date: Mon, 18 Jan 2021 00:57:09 -0500
Subject: [PATCH 3/4] Added comment explaining the new unit test

---
 .../flowcontrol/metrics/sample_and_watermark_test.go   | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
index 0b7810dcc9a8..2726101a7e89 100644
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
@@ -35,6 +35,15 @@ const (
 	numIterations   = 100
 )
 
+/* TestSampler does a rough behavioral test of SampleAndWatermarkHistograms.
+   The test creates one and exercises it, checking that the count in the
+   sampling histogram is correct at each step.  A fake clock is used, and
+   the exercise consists of changing that fake clock by an amount of time
+   chosen uniformly at random from a range that goes from a little negative
+   to somewhat more than two sampling periods.  The negative changes are
+   included because negative changes have been observed in real monotonic
+   clock readings.
+*/
 func TestSampler(t *testing.T) {
 	t0 := time.Now()
 	clk := clock.NewFakePassiveClock(t0)
@@ -74,6 +83,7 @@ func TestSampler(t *testing.T) {
 	}
 }
 
+/* getHistogramCount returns the count of the named histogram */
 func getHistogramCount(regs Registerables, metricName string) (int64, error) {
 	considered := []string{}
 	mfs, err := legacyregistry.DefaultGatherer.Gather()

From 736a87a4ee516a3feca0a79a4c48806ad4916aa3 Mon Sep 17 00:00:00 2001
From: Mike Spreitzer <mspreitz@us.ibm.com>
Date: Sun, 24 Jan 2021 23:38:48 -0500
Subject: [PATCH 4/4] Tweak up TestSampler in response to review

---
 .../metrics/sample_and_watermark_test.go      | 45 ++++++++++++-------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
index 2726101a7e89..ef0795d46cba 100644
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics/sample_and_watermark_test.go
@@ -29,26 +29,31 @@ import (
 )
 
 const (
-	samplesHistName = "sawtestsamples"
-	ddtRange        = 3000
-	ddtOffset       = 500
-	numIterations   = 100
+	samplesHistName       = "sawtestsamples"
+	samplingPeriod        = time.Millisecond
+	ddtRangeCentiPeriods  = 300
+	ddtOffsetCentiPeriods = 50
+	numIterations         = 100
 )
 
-/* TestSampler does a rough behavioral test of SampleAndWatermarkHistograms.
-   The test creates one and exercises it, checking that the count in the
-   sampling histogram is correct at each step.  A fake clock is used, and
-   the exercise consists of changing that fake clock by an amount of time
-   chosen uniformly at random from a range that goes from a little negative
-   to somewhat more than two sampling periods.  The negative changes are
-   included because negative changes have been observed in real monotonic
-   clock readings.
+/* TestSampler does a rough behavioral test of the sampling in a
+   SampleAndWatermarkHistograms.  The test creates one and exercises
+   it, checking that the count in the sampling histogram is correct at
+   each step.  The sampling histogram is expected to get one
+   observation at the end of each sampling period.  A fake clock is
+   used, and the exercise consists of repeatedly changing that fake
+   clock by an amount of time chosen uniformly at random from a range
+   that goes from a little negative to somewhat more than two sampling
+   periods.  The negative changes are included because small negative
+   changes have been observed in real monotonic clock readings (see
+   issue #96459) and we want to test that they are properly tolerated.
+   The designed toleration is to pretend that the clock did not
+   change, until it resumes net forward progress.
 */
 func TestSampler(t *testing.T) {
 	t0 := time.Now()
 	clk := clock.NewFakePassiveClock(t0)
 	buckets := []float64{0, 1}
-	const samplingPeriod = time.Millisecond
 	gen := NewSampleAndWaterMarkHistogramsGenerator(clk, samplingPeriod,
 		&compbasemetrics.HistogramOpts{Name: samplesHistName, Buckets: buckets},
 		&compbasemetrics.HistogramOpts{Name: "marks", Buckets: buckets},
@@ -58,12 +63,18 @@ func TestSampler(t *testing.T) {
 	for _, reg := range regs {
 		legacyregistry.MustRegister(reg)
 	}
-	dt := 2 * samplingPeriod
+	// `dt` is the admitted cumulative difference in fake time
+	// since the start of the test.  "admitted" means this is
+	// never allowed to decrease, which matches the designed
+	// toleration for negative monotonic clock changes.
+	var dt time.Duration
+	// `t1` is the current fake time
 	t1 := t0.Add(dt)
-	klog.Infof("Expect about %v warnings about time going backwards; this is fake time deliberately misbehaving.", (numIterations*ddtOffset)/ddtRange)
+	klog.Infof("Expect about %v warnings about time going backwards; this is fake time deliberately misbehaving.", (numIterations*ddtOffsetCentiPeriods)/ddtRangeCentiPeriods)
 	t.Logf("t0=%s", t0)
 	for i := 0; i < numIterations; i++ {
-		ddt := time.Microsecond * time.Duration(rand.Intn(ddtRange)-ddtOffset)
+		// `ddt` is the next step to take in fake time
+		ddt := time.Duration(rand.Intn(ddtRangeCentiPeriods)-ddtOffsetCentiPeriods) * samplingPeriod / 100
 		t1 = t1.Add(ddt)
 		diff := t1.Sub(t0)
 		if diff > dt {
@@ -71,7 +82,7 @@ func TestSampler(t *testing.T) {
 		}
 		clk.SetTime(t1)
 		saw.Set(1)
-		expectedCount := int64(dt / time.Millisecond)
+		expectedCount := int64(dt / samplingPeriod)
 		actualCount, err := getHistogramCount(regs, samplesHistName)
 		if err != nil {
 			t.Fatalf("For t0=%s, t1=%s, failed to getHistogramCount: %#+v", t0, t1, err)