Skip to content

Commit

Permalink
monitor: add more kube-ovn-cni metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
oilbeater committed Sep 20, 2020
1 parent 697e95f commit 38adc18
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 4 deletions.
2 changes: 2 additions & 0 deletions pkg/daemon/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,15 @@ func (csh cniServerHandler) handleAdd(req *restful.Request, resp *restful.Respon
if pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, podRequest.Provider)] != "true" {
klog.Infof("wait address for pod %s/%s ", podRequest.PodNamespace, podRequest.PodName)
// wait controller assign an address
cniWaitAddressResult.WithLabelValues(nodeName).Inc()
time.Sleep(1 * time.Second)
continue
}

if err := util.ValidatePodNetwork(pod.Annotations); err != nil {
klog.Errorf("validate pod %s/%s failed, %v", podRequest.PodNamespace, podRequest.PodName, err)
// wait controller assign an address
cniWaitAddressResult.WithLabelValues(nodeName).Inc()
time.Sleep(1 * time.Second)
continue
}
Expand Down
193 changes: 189 additions & 4 deletions pkg/daemon/metrics.go
Original file line number Diff line number Diff line change
@@ -1,21 +1,206 @@
package daemon

import "github.com/prometheus/client_golang/prometheus"
import (
"net/url"
"time"

"github.com/prometheus/client_golang/prometheus"
reflectormetrics "k8s.io/client-go/tools/cache"
clientmetrics "k8s.io/client-go/tools/metrics"
)

var (
nodeName = ""
cniOperationHistogram = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "cni_op_latency_second",
Help: "the latency second for cni operations",
Buckets: []float64{.1, .25, .5, 1, 2, 4, 8, 16, 32, 64, 128, 256},
Name: "cni_op_latency_seconds",
Help: "the latency seconds for cni operations",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
}, []string{
"node_name",
"method",
"status_code",
})

cniWaitAddressResult = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "cni_wait_address_seconds_total",
Help: "Latency that cni wait controller to assign an address",
},
[]string{"node_name"},
)

cniConnectivityResult = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "cni_wait_connectivity_seconds_total",
Help: "Latency that cni wait address ready in overlay network",
},
[]string{"node_name"},
)

// client metrics
requestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_latency_seconds",
Help: "Request latency in seconds. Broken down by verb and URL.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
},
[]string{"verb", "url"},
)

requestResult = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "rest_client_requests_total",
Help: "Number of HTTP requests, partitioned by status code, method, and host.",
},
[]string{"code", "method", "host"},
)

// reflector metrics

// TODO(directxman12): update these to be histograms once the metrics overhaul KEP
// PRs start landing.

reflectorSubsystem = "reflector"

listsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Subsystem: reflectorSubsystem,
Name: "lists_total",
Help: "Total number of API lists done by the reflectors",
}, []string{"name"})

listsDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
Subsystem: reflectorSubsystem,
Name: "list_duration_seconds",
Help: "How long an API list takes to return and decode for the reflectors",
}, []string{"name"})

itemsPerList = prometheus.NewSummaryVec(prometheus.SummaryOpts{
Subsystem: reflectorSubsystem,
Name: "items_per_list",
Help: "How many items an API list returns to the reflectors",
}, []string{"name"})

watchesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Subsystem: reflectorSubsystem,
Name: "watches_total",
Help: "Total number of API watches done by the reflectors",
}, []string{"name"})

shortWatchesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Subsystem: reflectorSubsystem,
Name: "short_watches_total",
Help: "Total number of short API watches done by the reflectors",
}, []string{"name"})

watchDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
Subsystem: reflectorSubsystem,
Name: "watch_duration_seconds",
Help: "How long an API watch takes to return and decode for the reflectors",
}, []string{"name"})

itemsPerWatch = prometheus.NewSummaryVec(prometheus.SummaryOpts{
Subsystem: reflectorSubsystem,
Name: "items_per_watch",
Help: "How many items an API watch returns to the reflectors",
}, []string{"name"})

lastResourceVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Subsystem: reflectorSubsystem,
Name: "last_resource_version",
Help: "Last resource version seen for the reflectors",
}, []string{"name"})
)

func init() {
registerReflectorMetrics()
prometheus.MustRegister(cniOperationHistogram)
prometheus.MustRegister(cniWaitAddressResult)
prometheus.MustRegister(cniConnectivityResult)
}

// registerClientMetrics sets up the client latency metrics from client-go
func registerClientMetrics() {
// register the metrics with our registry
prometheus.MustRegister(requestLatency)
prometheus.MustRegister(requestResult)

// register the metrics with client-go
clientmetrics.Register(&latencyAdapter{metric: requestLatency}, &resultAdapter{metric: requestResult})
}

// registerReflectorMetrics sets up reflector (reconile) loop metrics
func registerReflectorMetrics() {
prometheus.MustRegister(listsTotal)
prometheus.MustRegister(listsDuration)
prometheus.MustRegister(itemsPerList)
prometheus.MustRegister(watchesTotal)
prometheus.MustRegister(shortWatchesTotal)
prometheus.MustRegister(watchDuration)
prometheus.MustRegister(itemsPerWatch)
prometheus.MustRegister(lastResourceVersion)

reflectormetrics.SetReflectorMetricsProvider(reflectorMetricsProvider{})
}

// this section contains adapters, implementations, and other sundry organic, artisinally
// hand-crafted syntax trees required to convince client-go that it actually wants to let
// someone use its metrics.

// Client metrics adapters (method #1 for client-go metrics),
// copied (more-or-less directly) from k8s.io/kubernetes setup code
// (which isn't anywhere in an easily-importable place).

type latencyAdapter struct {
metric *prometheus.HistogramVec
}

func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) {
l.metric.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
}

type resultAdapter struct {
metric *prometheus.CounterVec
}

func (r *resultAdapter) Increment(code, method, host string) {
r.metric.WithLabelValues(code, method, host).Inc()
}

// Reflector metrics provider (method #2 for client-go metrics),
// copied (more-or-less directly) from k8s.io/kubernetes setup code
// (which isn't anywhere in an easily-importable place).

type reflectorMetricsProvider struct{}

func (reflectorMetricsProvider) NewListsMetric(name string) reflectormetrics.CounterMetric {
return listsTotal.WithLabelValues(name)
}

func (reflectorMetricsProvider) NewListDurationMetric(name string) reflectormetrics.SummaryMetric {
return listsDuration.WithLabelValues(name)
}

func (reflectorMetricsProvider) NewItemsInListMetric(name string) reflectormetrics.SummaryMetric {
return itemsPerList.WithLabelValues(name)
}

func (reflectorMetricsProvider) NewWatchesMetric(name string) reflectormetrics.CounterMetric {
return watchesTotal.WithLabelValues(name)
}

func (reflectorMetricsProvider) NewShortWatchesMetric(name string) reflectormetrics.CounterMetric {
return shortWatchesTotal.WithLabelValues(name)
}

func (reflectorMetricsProvider) NewWatchDurationMetric(name string) reflectormetrics.SummaryMetric {
return watchDuration.WithLabelValues(name)
}

func (reflectorMetricsProvider) NewItemsInWatchMetric(name string) reflectormetrics.SummaryMetric {
return itemsPerWatch.WithLabelValues(name)
}

func (reflectorMetricsProvider) NewLastResourceVersionMetric(name string) reflectormetrics.GaugeMetric {
return lastResourceVersion.WithLabelValues(name)
}
1 change: 1 addition & 0 deletions pkg/daemon/ovs.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ func waiteNetworkReady(gateway string) error {
}
pinger.Run()

cniConnectivityResult.WithLabelValues(nodeName).Add(float64(pinger.PacketsSent))
if !success {
return fmt.Errorf("network not ready after 600 ping")
}
Expand Down

0 comments on commit 38adc18

Please sign in to comment.