Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 1916379: Change metric for errors to gauge #24

Merged
merged 1 commit into from Jan 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -13,3 +13,4 @@

# Dependency directories (remove the comment below to include it)
# vendor/
/vsphere-problem-detector
16 changes: 8 additions & 8 deletions pkg/operator/metrics.go
Expand Up @@ -23,10 +23,10 @@ var (
[]string{checkNameLabel},
)

clusterCheckErrrorMetric = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "vsphere_cluster_check_errors_total",
Help: "Number of failed vSphere cluster-level checks performed by vsphere-problem-detector.",
clusterCheckErrrorMetric = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Name: "vsphere_cluster_check_errors",
Help: "Indicates failing vSphere cluster-level checks performed by vsphere-problem-detector. Value of 1 means - a particular check is failing.",
StabilityLevel: metrics.ALPHA,
},
[]string{checkNameLabel},
Expand All @@ -41,10 +41,10 @@ var (
[]string{checkNameLabel, nodeNameLabel},
)

nodeCheckErrrorMetric = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "vsphere_node_check_errors_total",
Help: "Number of failed vSphere node-level checks performed by vsphere-problem-detector.",
nodeCheckErrrorMetric = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Name: "vsphere_node_check_errors",
Help: "Indicates failing vSphere node-level checks performed by vsphere-problem-detector. Value of 1 means - a particular check is failing on a node.",
StabilityLevel: metrics.ALPHA,
},
[]string{checkNameLabel, nodeNameLabel},
Expand Down
6 changes: 4 additions & 2 deletions pkg/operator/operator.go
Expand Up @@ -203,9 +203,10 @@ func (c *vSphereProblemDetectorController) runSingleClusterCheck(checkContext *c
err := checkFunc(checkContext)
if err != nil {
res.Error = err
clusterCheckErrrorMetric.WithLabelValues(name).Inc()
clusterCheckErrrorMetric.WithLabelValues(name).Set(1)
klog.V(2).Infof("%s failed: %s", name, err)
} else {
clusterCheckErrrorMetric.WithLabelValues(name).Set(0)
klog.V(2).Infof("%s passed", name)
}
clusterCheckTotalMetric.WithLabelValues(name).Inc()
Expand Down Expand Up @@ -265,9 +266,10 @@ func (c *vSphereProblemDetectorController) runSingleNodeSingleCheck(checkContext
err := check.CheckNode(checkContext, node, vm)
if err != nil {
res.Error = err
nodeCheckErrrorMetric.WithLabelValues(name, node.Name).Inc()
nodeCheckErrrorMetric.WithLabelValues(name, node.Name).Set(1)
klog.V(2).Infof("%s:%s failed: %s", name, node.Name, err)
} else {
nodeCheckErrrorMetric.WithLabelValues(name, node.Name).Set(0)
klog.V(2).Infof("%s:%s passed", name, node.Name)
}
nodeCheckTotalMetric.WithLabelValues(name, node.Name).Inc()
Expand Down