Merge pull request #64838 from krzysied/scheduling_latency_metric_fix

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Adding summary metric for scheduling latency **What this PR does / why we need it**: Re-introduces histogram metrics for the backward compatibility. Changes SchedulingLatency metric to satisfy prometheus best practice. ref #64316 **Release note**: ```release-note NONE ```
kubernetes · Jun 15, 2018 · a6e61e7 · a6e61e7
2 parents 3abba25 + e32910a
commit a6e61e7
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 21 deletions.
diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go
@@ -27,16 +27,15 @@ const (
 	// SchedulerSubsystem - subsystem name used by scheduler
 	SchedulerSubsystem = "scheduler"
 	// SchedulingLatencyName - scheduler latency metric name
-	SchedulingLatencyName = "scheduling_latencies_summary"
+	SchedulingLatencyName = "scheduling_latency_seconds"
 
 	// OperationLabel - operation label name
 	OperationLabel = "operation"
 	// Binding - binding operation label value
 	Binding = "binding"
-	// SchedulingAlgorithm - scheduling algorithm operation label value
-	SchedulingAlgorithm = "scheduling_algorithm"
+	// SelectingNode - selecting node operation label value
+	SelectingNode = "selecting_node"
 	// E2eScheduling - e2e scheduling operation label value
-	E2eScheduling = "e2e_scheduling"
 )
 
 // All the histogram based metrics have 1ms as size for the smallest bucket.
@@ -45,13 +44,29 @@ var (
 		prometheus.SummaryOpts{
 			Subsystem: SchedulerSubsystem,
 			Name:      SchedulingLatencyName,
-			Help:      "Scheduling latency in microseconds split by sub-parts of the scheduling operation",
+			Help:      "Scheduling latency in seconds split by sub-parts of the scheduling operation",
 			// Make the sliding window of 5h.
 			// TODO: The value for this should be based on some SLI definition (long term).
 			MaxAge: 5 * time.Hour,
 		},
 		[]string{OperationLabel},
 	)
+	E2eSchedulingLatency = prometheus.NewHistogram(
+		prometheus.HistogramOpts{
+			Subsystem: SchedulerSubsystem,
+			Name:      "e2e_scheduling_latency_microseconds",
+			Help:      "E2e scheduling latency (scheduling algorithm + binding)",
+			Buckets:   prometheus.ExponentialBuckets(1000, 2, 15),
+		},
+	)
+	SchedulingAlgorithmLatency = prometheus.NewHistogram(
+		prometheus.HistogramOpts{
+			Subsystem: SchedulerSubsystem,
+			Name:      "scheduling_algorithm_latency_microseconds",
+			Help:      "Scheduling algorithm latency",
+			Buckets:   prometheus.ExponentialBuckets(1000, 2, 15),
+		},
+	)
 	SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
 		prometheus.HistogramOpts{
 			Subsystem: SchedulerSubsystem,
@@ -76,6 +91,14 @@ var (
 			Buckets:   prometheus.ExponentialBuckets(1000, 2, 15),
 		},
 	)
+	BindingLatency = prometheus.NewHistogram(
+		prometheus.HistogramOpts{
+			Subsystem: SchedulerSubsystem,
+			Name:      "binding_latency_microseconds",
+			Help:      "Binding latency",
+			Buckets:   prometheus.ExponentialBuckets(1000, 2, 15),
+		},
+	)
 	PreemptionVictims = prometheus.NewGauge(
 		prometheus.GaugeOpts{
 			Subsystem: SchedulerSubsystem,
@@ -90,6 +113,9 @@ var (
 		})
 	metricsList = []prometheus.Collector{
 		SchedulingLatency,
+		E2eSchedulingLatency,
+		SchedulingAlgorithmLatency,
+		BindingLatency,
 		SchedulingAlgorithmPredicateEvaluationDuration,
 		SchedulingAlgorithmPriorityEvaluationDuration,
 		SchedulingAlgorithmPremptionEvaluationDuration,
@@ -102,6 +128,7 @@ var registerMetrics sync.Once
 
 // Register all metrics.
 func Register() {
+	// Register the metrics.
 	registerMetrics.Do(func() {
 		for _, metric := range metricsList {
 			prometheus.MustRegister(metric)
@@ -118,3 +145,8 @@ func Reset() {
 func SinceInMicroseconds(start time.Time) float64 {
 	return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
 }
+
+// SinceInSeconds gets the time since the specified start in seconds.
+func SinceInSeconds(start time.Time) float64 {
+	return time.Since(start).Seconds()
+}
diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go
@@ -429,7 +429,8 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
 		return err
 	}
 
-	metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInMicroseconds(bindingStart))
+	metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
+	metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
 	sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name)
 	return nil
 }
@@ -461,7 +462,8 @@ func (sched *Scheduler) scheduleOne() {
 		}
 		return
 	}
-	metrics.SchedulingLatency.WithLabelValues(metrics.SchedulingAlgorithm).Observe(metrics.SinceInMicroseconds(start))
+	metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
+	metrics.SchedulingLatency.WithLabelValues(metrics.SelectingNode).Observe(metrics.SinceInSeconds(start))
 	// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
 	// This allows us to keep scheduling without waiting on binding to occur.
 	assumedPod := pod.DeepCopy()
@@ -496,7 +498,7 @@ func (sched *Scheduler) scheduleOne() {
 				Name: suggestedHost,
 			},
 		})
-		metrics.SchedulingLatency.WithLabelValues(metrics.E2eScheduling).Observe(metrics.SinceInMicroseconds(start))
+		metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
 		if err != nil {
 			glog.Errorf("Internal error binding pod: (%v)", err)
 		}

diff --git a/test/e2e/framework/metrics_util.go b/test/e2e/framework/metrics_util.go
@@ -210,13 +210,12 @@ func (l *PodStartupLatency) PrintJSON() string {
 }
 
 type SchedulingMetrics struct {
-	SchedulingLatency LatencyMetric `json:"schedulingLatency"`
-	BindingLatency    LatencyMetric `json:"bindingLatency"`
-	E2ELatency        LatencyMetric `json:"e2eLatency"`
-	ThroughputAverage float64       `json:"throughputAverage"`
-	ThroughputPerc50  float64       `json:"throughputPerc50"`
-	ThroughputPerc90  float64       `json:"throughputPerc90"`
-	ThroughputPerc99  float64       `json:"throughputPerc99"`
+	SelectingNodeLatency LatencyMetric `json:"selectingNodeLatency"`
+	BindingLatency       LatencyMetric `json:"bindingLatency"`
+	ThroughputAverage    float64       `json:"throughputAverage"`
+	ThroughputPerc50     float64       `json:"throughputPerc50"`
+	ThroughputPerc90     float64       `json:"throughputPerc90"`
+	ThroughputPerc99     float64       `json:"throughputPerc99"`
 }
 
 func (l *SchedulingMetrics) SummaryKind() string {
@@ -512,23 +511,20 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) {
 
 		var metric *LatencyMetric = nil
 		switch sample.Metric[schedulermetric.OperationLabel] {
-		case schedulermetric.SchedulingAlgorithm:
-			metric = &result.SchedulingLatency
+		case schedulermetric.SelectingNode:
+			metric = &result.SelectingNodeLatency
 		case schedulermetric.Binding:
 			metric = &result.BindingLatency
-		case schedulermetric.E2eScheduling:
-			metric = &result.E2ELatency
 		}
 		if metric == nil {
 			continue
 		}
 
-		latency := sample.Value
 		quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
 		if err != nil {
 			return nil, err
 		}
-		setQuantile(metric, quantile, time.Duration(int64(latency)))
+		setQuantile(metric, quantile, time.Duration(int64(float64(sample.Value)*float64(time.Second))))
 	}
 	return &result, nil
 }