Skip to content

Commit

Permalink
metrics: add nfd_node_update_requests_total counter
Browse files Browse the repository at this point in the history
Add a counter for total number of node update/sync requests. In
practice, this counts the number of gRPC requests received if the gRPC
API is in use. If the NodeFeature API is enabled, this counts the
requests initiated by the NFD API controller, i.e. updates triggered by
changes in NodeFeature or NodeFeatureRule objects plus updates initiated
by the controller resync period.
  • Loading branch information
marquiz committed Aug 2, 2023
1 parent 72d9df9 commit 75c2d5d
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/deployment/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The exposed metrics are
| ------------------------------------------------- | --------- | ---------------------------------------
| `nfd_master_build_info` | Gauge | Version from which nfd-master was built
| `nfd_worker_build_info` | Gauge | Version from which nfd-worker was built
| `nfd_node_update_requests_total` | Counter | Number of node update requests processed by the master
| `nfd_node_updates_total` | Counter | Number of nodes updated
| `nfd_node_update_failures_total` | Counter | Number of nodes update failures
| `nfd_node_labels_rejected_total` | Counter | Number of nodes labels rejected by nfd-master
Expand Down
5 changes: 5 additions & 0 deletions pkg/nfd-master/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
// When adding metric names, see https://prometheus.io/docs/practices/naming/#metric-names
const (
buildInfoQuery = "nfd_master_build_info"
nodeUpdateRequestsQuery = "nfd_node_update_requests_total"
nodeUpdatesQuery = "nfd_node_updates_total"
nodeUpdateFailuresQuery = "nfd_node_update_failures_total"
nodeLabelsRejectedQuery = "nfd_node_labels_rejected_total"
Expand All @@ -48,6 +49,10 @@ var (
"version": version.Get(),
},
})
nodeUpdateRequests = prometheus.NewCounter(prometheus.CounterOpts{
Name: nodeUpdateRequestsQuery,
Help: "Number of node update requests processed by the master.",
})
nodeUpdates = prometheus.NewCounter(prometheus.CounterOpts{
Name: nodeUpdatesQuery,
Help: "Number of nodes updated by the master.",
Expand Down
1 change: 1 addition & 0 deletions pkg/nfd-master/nfd-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ func isNamespaceDenied(labelNs string, wildcardDeniedNs map[string]struct{}, nor

// SetLabels implements LabelerServer
func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.SetLabelsReply, error) {
nodeUpdateRequests.Inc()
err := authorizeClient(c, m.args.VerifyNodeName, r.NodeName)
if err != nil {
klog.ErrorS(err, "gRPC client authorization failed", "nodeName", r.NodeName)
Expand Down
1 change: 1 addition & 0 deletions pkg/nfd-master/node-updater-pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func (u *nodeUpdaterPool) processNodeLabelRequest(queue workqueue.RateLimitingIn

defer queue.Done(nodeName)

nodeUpdateRequests.Inc()

Check warning on line 49 in pkg/nfd-master/node-updater-pool.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/node-updater-pool.go#L49

Added line #L49 was not covered by tests
if err := u.nfdMaster.nfdAPIUpdateOneNode(nodeName.(string)); err != nil {
if queue.NumRequeues(nodeName) < 5 {
klog.InfoS("retrying labeling request for node", "nodeName", nodeName)
Expand Down

0 comments on commit 75c2d5d

Please sign in to comment.