Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log prometheus metric registration error and fix CRD metric names #71767

Merged
merged 3 commits into from
Jan 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func NewAutoRegistrationController(crdinformer crdinformers.CustomResourceDefini
crdSynced: crdinformer.Informer().HasSynced,
apiServiceRegistration: apiServiceRegistration,
syncedInitialSet: make(chan struct{}),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "crd-autoregister"),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "crd_autoregistration_controller"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to understand: dash was never allowed and this metric was never registered?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nvmd. It's the queue name.

}
c.syncHandler = c.handleVersionUpdate

Expand Down
1 change: 1 addition & 0 deletions pkg/util/workqueue/prometheus/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ go_library(
deps = [
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)

Expand Down
29 changes: 22 additions & 7 deletions pkg/util/workqueue/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package prometheus

import (
"k8s.io/client-go/util/workqueue"
"k8s.io/klog"

"github.com/prometheus/client_golang/prometheus"
)
Expand All @@ -37,7 +38,9 @@ func (prometheusMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetr
Name: "depth",
Help: "Current depth of workqueue: " + name,
})
prometheus.Register(depth)
if err := prometheus.Register(depth); err != nil {
klog.Errorf("failed to register depth metric %v: %v", name, err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this me a more fatal error actually? It's a programming error. Those names are static strings, i.e. they don't come from the user.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are more failures caused by duplication that I didn't dig into (see the gist). We can have a followup issue to clean those up

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not a complete list. kcm/ccm may also have such failure

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

created #73551

}
return depth
}

Expand All @@ -47,7 +50,9 @@ func (prometheusMetricsProvider) NewAddsMetric(name string) workqueue.CounterMet
Name: "adds",
Help: "Total number of adds handled by workqueue: " + name,
})
prometheus.Register(adds)
if err := prometheus.Register(adds); err != nil {
klog.Errorf("failed to register adds metric %v: %v", name, err)
}
return adds
}

Expand All @@ -57,7 +62,9 @@ func (prometheusMetricsProvider) NewLatencyMetric(name string) workqueue.Summary
Name: "queue_latency",
Help: "How long an item stays in workqueue" + name + " before being requested.",
})
prometheus.Register(latency)
if err := prometheus.Register(latency); err != nil {
klog.Errorf("failed to register latency metric %v: %v", name, err)
}
return latency
}

Expand All @@ -67,7 +74,9 @@ func (prometheusMetricsProvider) NewWorkDurationMetric(name string) workqueue.Su
Name: "work_duration",
Help: "How long processing an item from workqueue" + name + " takes.",
})
prometheus.Register(workDuration)
if err := prometheus.Register(workDuration); err != nil {
klog.Errorf("failed to register work_duration metric %v: %v", name, err)
}
return workDuration
}

Expand All @@ -80,7 +89,9 @@ func (prometheusMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) wor
"values indicate stuck threads. One can deduce the number of stuck " +
"threads by observing the rate at which this increases.",
})
prometheus.Register(unfinished)
if err := prometheus.Register(unfinished); err != nil {
klog.Errorf("failed to register unfinished_work_seconds metric %v: %v", name, err)
}
return unfinished
}

Expand All @@ -91,7 +102,9 @@ func (prometheusMetricsProvider) NewLongestRunningProcessorMicrosecondsMetric(na
Help: "How many microseconds has the longest running " +
"processor for " + name + " been running.",
})
prometheus.Register(unfinished)
if err := prometheus.Register(unfinished); err != nil {
klog.Errorf("failed to register longest_running_processor_microseconds metric %v: %v", name, err)
}
return unfinished
}

Expand All @@ -101,6 +114,8 @@ func (prometheusMetricsProvider) NewRetriesMetric(name string) workqueue.Counter
Name: "retries",
Help: "Total number of retries handled by workqueue: " + name,
})
prometheus.Register(retries)
if err := prometheus.Register(retries); err != nil {
klog.Errorf("failed to register retries metric %v: %v", name, err)
}
return retries
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func NewCRDFinalizer(
crdLister: crdInformer.Lister(),
crdSynced: crdInformer.Informer().HasSynced,
crClientGetter: crClientGetter,
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "CustomResourceDefinition-CRDFinalizer"),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "crd_finalizer"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

crd_finalizer_controller?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

crd_finalizer_controller sounds like a controller for crd finalizer to me

also we tend to omit "controller" from the name for other in-tree controllers: https://github.com/kubernetes/kubernetes/search?q=NewNamedRateLimitingQueue&unscoped_q=NewNamedRateLimitingQueue

}

crdInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func NewNamingConditionController(
crdClient: crdClient,
crdLister: crdInformer.Lister(),
crdSynced: crdInformer.Informer().HasSynced,
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "CustomResourceDefinition-NamingConditionController"),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "crd_naming_condition_controller"),
}

informerIndexer := crdInformer.Informer().GetIndexer()
Expand Down