Skip to content

Commit

Permalink
Include namespace label in Prometheus metrics
Browse files Browse the repository at this point in the history
Avoid aliasing between namespaces and allow reporting on metrics for
individual namespaces by including the namespace as a label.
  • Loading branch information
zaneb committed Dec 16, 2019
1 parent f3ab6ff commit b5095d1
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 12 deletions.
23 changes: 17 additions & 6 deletions pkg/controller/baremetalhost/baremetalhost_controller.go
Expand Up @@ -42,22 +42,26 @@ import (

const (
hostErrorRetryDelay = time.Second * 10

labelHostNamespace = "namespace"
labelHostName = "host"
labelPowerOnOff = "on_off"
)

var runInTestMode bool
var runInDemoMode bool
var reconcileCounters = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "metal3_reconcile_total",
Help: "The number of times hosts have been reconciled",
}, []string{"host"})
}, []string{labelHostNamespace, labelHostName})
var reconcileErrorCounter = prometheus.NewCounter(prometheus.CounterOpts{
Name: "metal3_reconcile_error_total",
Help: "The number of times the operator has failed to reconcile a host",
})
var powerChangeAttempts = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "metal3_operation_power_change_total",
Help: "Number of times a host has been powered on or off",
}, []string{"host", "on_off"})
}, []string{labelHostNamespace, labelHostName, labelPowerOnOff})

func init() {
flag.BoolVar(&runInTestMode, "test-mode", false, "disable ironic communication")
Expand Down Expand Up @@ -162,8 +166,10 @@ func (info *reconcileInfo) publishEvent(reason, message string) {
// is true, otherwise upon completion it will remove the work from the
// queue.
func (r *ReconcileBareMetalHost) Reconcile(request reconcile.Request) (result reconcile.Result, err error) {

reconcileCounters.WithLabelValues(request.Name).Inc()
reconcileCounters.With(prometheus.Labels{
labelHostNamespace: request.Namespace,
labelHostName: request.Name,
}).Inc()
defer func() {
if err != nil {
reconcileErrorCounter.Inc()
Expand Down Expand Up @@ -626,11 +632,16 @@ func (r *ReconcileBareMetalHost) manageHostPower(prov provisioner.Provisioner, i

if provResult.Dirty {
info.postSaveCallbacks = append(info.postSaveCallbacks, func() {
metricLabels := prometheus.Labels{
labelHostNamespace: info.host.Namespace,
labelHostName: info.host.Name,
}
if info.host.Spec.Online {
powerChangeAttempts.WithLabelValues(info.host.Name, "on").Inc()
metricLabels[labelPowerOnOff] = "on"
} else {
powerChangeAttempts.WithLabelValues(info.host.Name, "off").Inc()
metricLabels[labelPowerOnOff] = "off"
}
powerChangeAttempts.With(metricLabels).Inc()
})
info.host.ClearError()
return actionContinue{provResult.RequeueAfter}
Expand Down
14 changes: 8 additions & 6 deletions pkg/controller/baremetalhost/host_state_machine.go
Expand Up @@ -17,22 +17,22 @@ var stateTime = map[metal3v1alpha1.ProvisioningState]*prometheus.HistogramVec{
metal3v1alpha1.StateRegistering: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "metal3_operation_register_duration_seconds",
Help: "Length of time per registration per host",
}, []string{"host"}),
}, []string{labelHostNamespace, labelHostName}),
metal3v1alpha1.StateInspecting: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "metal3_operation_inspect_duration_seconds",
Help: "Length of time per hardware inspection per host",
Buckets: slowOperationBuckets,
}, []string{"host"}),
}, []string{labelHostNamespace, labelHostName}),
metal3v1alpha1.StateProvisioning: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "metal3_operation_provision_duration_seconds",
Help: "Length of time per hardware provision operation per host",
Buckets: slowOperationBuckets,
}, []string{"host"}),
}, []string{labelHostNamespace, labelHostName}),
metal3v1alpha1.StateDeprovisioning: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "metal3_operation_deprovision_duration_seconds",
Help: "Length of time per hardware deprovision operation per host",
Buckets: slowOperationBuckets,
}, []string{"host"}),
}, []string{labelHostNamespace, labelHostName}),
}

func init() {
Expand Down Expand Up @@ -98,8 +98,10 @@ func recordStateEnd(info *reconcileInfo, host *metal3v1alpha1.BareMetalHost, sta
if !prevMetric.Start.IsZero() {
prevMetric.End = time
info.postSaveCallbacks = append(info.postSaveCallbacks, func() {
stateTime[state].WithLabelValues(host.Name).Observe(
prevMetric.Duration().Seconds())
stateTime[state].With(prometheus.Labels{
labelHostNamespace: host.Namespace,
labelHostName: host.Name,
}).Observe(prevMetric.Duration().Seconds())
})
}
}
Expand Down

0 comments on commit b5095d1

Please sign in to comment.