From 7250083a1ea77f0c27d9eb0f388e36695d1c09bf Mon Sep 17 00:00:00 2001 From: Minhan Xia Date: Thu, 4 Feb 2021 15:54:10 -0800 Subject: [PATCH] add metrics to nodeSyncLoop in service controller --- .../cloud-provider/controllers/service/BUILD | 3 + .../controllers/service/controller.go | 16 ++++- .../controllers/service/metrics.go | 58 +++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 staging/src/k8s.io/cloud-provider/controllers/service/metrics.go diff --git a/staging/src/k8s.io/cloud-provider/controllers/service/BUILD b/staging/src/k8s.io/cloud-provider/controllers/service/BUILD index ac337f5e8a32..00918e32bbc5 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/service/BUILD +++ b/staging/src/k8s.io/cloud-provider/controllers/service/BUILD @@ -5,6 +5,7 @@ go_library( srcs = [ "controller.go", "doc.go", + "metrics.go", ], importmap = "k8s.io/kubernetes/vendor/k8s.io/cloud-provider/controllers/service", importpath = "k8s.io/cloud-provider/controllers/service", @@ -27,6 +28,8 @@ go_library( "//staging/src/k8s.io/cloud-provider:go_default_library", "//staging/src/k8s.io/cloud-provider/service/helpers:go_default_library", "//staging/src/k8s.io/component-base/featuregate:go_default_library", + "//staging/src/k8s.io/component-base/metrics:go_default_library", + "//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library", "//staging/src/k8s.io/component-base/metrics/prometheus/ratelimiter:go_default_library", "//vendor/k8s.io/klog/v2:go_default_library", ], diff --git a/staging/src/k8s.io/cloud-provider/controllers/service/controller.go b/staging/src/k8s.io/cloud-provider/controllers/service/controller.go index 5bd4a4211b31..f49a5c23b900 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/service/controller.go +++ b/staging/src/k8s.io/cloud-provider/controllers/service/controller.go @@ -112,11 +112,12 @@ func New( recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "service-controller"}) if kubeClient != nil && kubeClient.CoreV1().RESTClient().GetRateLimiter() != nil { - if err := ratelimiter.RegisterMetricAndTrackRateLimiterUsage("service_controller", kubeClient.CoreV1().RESTClient().GetRateLimiter()); err != nil { + if err := ratelimiter.RegisterMetricAndTrackRateLimiterUsage(subSystemName, kubeClient.CoreV1().RESTClient().GetRateLimiter()); err != nil { return nil, err } } + registerMetrics() s := &Controller{ cloud: cloud, knownHosts: []*v1.Node{}, @@ -663,6 +664,13 @@ func nodeReadyConditionStatus(node *v1.Node) v1.ConditionStatus { func (s *Controller) nodeSyncLoop() { s.knownHostsLock.Lock() defer s.knownHostsLock.Unlock() + startTime := time.Now() + defer func() { + latency := time.Now().Sub(startTime).Seconds() + klog.V(4).Infof("It took %v seconds to finish nodeSyncLoop", latency) + nodeSyncLatency.Observe(latency) + }() + newHosts, err := listWithPredicate(s.nodeLister, s.getNodeConditionPredicate()) if err != nil { runtime.HandleError(fmt.Errorf("Failed to retrieve current set of nodes from node lister: %v", err)) @@ -713,6 +721,12 @@ func (s *Controller) lockedUpdateLoadBalancerHosts(service *v1.Service, hosts [] if !wantsLoadBalancer(service) { return nil } + startTime := time.Now() + defer func() { + latency := time.Now().Sub(startTime).Seconds() + klog.V(4).Infof("It took %v seconds to update load balancer hosts for service %s/%s", latency, service.Namespace, service.Name) + updateLoadBalancerHostLatency.Observe(latency) + }() // This operation doesn't normally take very long (and happens pretty often), so we only record the final event err := s.balancer.UpdateLoadBalancer(context.TODO(), s.clusterName, service, hosts) diff --git a/staging/src/k8s.io/cloud-provider/controllers/service/metrics.go b/staging/src/k8s.io/cloud-provider/controllers/service/metrics.go new file mode 100644 index 000000000000..e8410e3eb814 --- /dev/null +++ b/staging/src/k8s.io/cloud-provider/controllers/service/metrics.go @@ -0,0 +1,58 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" + "sync" +) + +const ( + // subSystemName is the name of this subsystem name used for prometheus metrics. + subSystemName = "service_controller" +) + +var register sync.Once + +// registerMetrics registers service-controller metrics. +func registerMetrics() { + register.Do(func() { + legacyregistry.MustRegister(nodeSyncLatency) + legacyregistry.MustRegister(updateLoadBalancerHostLatency) + }) +} + +var ( + nodeSyncLatency = metrics.NewHistogram(&metrics.HistogramOpts{ + Name: "nodesync_latency_seconds", + Subsystem: subSystemName, + Help: "A metric measuring the latency for nodesync which updates loadbalancer hosts on cluster node updates.", + // Buckets from 1s to 16384s + Buckets: metrics.ExponentialBuckets(1, 2, 15), + StabilityLevel: metrics.ALPHA, + }) + + updateLoadBalancerHostLatency = metrics.NewHistogram(&metrics.HistogramOpts{ + Name: "update_loadbalancer_host_latency_seconds", + Subsystem: subSystemName, + Help: "A metric measuring the latency for updating each load balancer hosts.", + // Buckets from 1s to 16384s + Buckets: metrics.ExponentialBuckets(1, 2, 15), + StabilityLevel: metrics.ALPHA, + }) +)