Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor monitoring rules #1757

Merged
merged 5 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,7 @@ docker-push-registry:
$(OCI_BIN) push $(IMAGE_REGISTRY)/$(REGISTRY_IMAGE):$(IMAGE_TAG)

prom-rules-verify:
hack/prom-rule-ci/verify-rules.sh \
data/monitoring/prom-rule.yaml \
hack/prom-rule-ci/prom-rules-tests.yaml
go run ./tools/prom-rule-ci $(OCI_BIN) ./tools/prom-rule-ci/tmp_prom_rules.yaml ./tools/prom-rule-ci/prom-rules-tests.yaml

cluster-up:
./cluster/up.sh
Expand Down
67 changes: 0 additions & 67 deletions data/monitoring/prom-rule.yaml

This file was deleted.

6 changes: 3 additions & 3 deletions docs/metrics.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Cluster Network Addons Operator Metrics

### kubevirt_cnao_cr_kubemacpool_aggregated
Total count of KubeMacPool manager pods deployed by CNAO CR. Type: Gauge.

### kubevirt_cnao_cr_kubemacpool_deployed
KubeMacpool is deployed by CNAO CR. Type: Gauge.

### kubevirt_cnao_cr_ready
CNAO CR Ready. Type: Gauge.

### kubevirt_cnao_cr_kubemacpool_aggregated
Total count of KubeMacPool manager pods deployed by CNAO CR. Type: Gauge.

### kubevirt_cnao_kubemacpool_duplicate_macs
Total count of duplicate KubeMacPool MAC addresses. Type: Gauge.

Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ require (
github.com/openshift/origin v4.1.0+incompatible
github.com/operator-framework/operator-sdk v1.12.0
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.64.1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you run make vendor afterwards, and add the necessary files?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, no additional changes

github.com/prometheus/client_golang v1.16.0
github.com/prometheus/common v0.44.0
github.com/spf13/pflag v1.0.5
github.com/thanhpk/randstr v1.0.4
golang.org/x/oauth2 v0.10.0
golang.org/x/tools v0.13.0
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1
k8s.io/api v0.28.1
k8s.io/apiextensions-apiserver v0.28.0
k8s.io/apimachinery v0.28.1
Expand Down Expand Up @@ -182,7 +182,6 @@ require (
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pjbgf/sha1cd v0.3.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.64.1 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/procfs v0.11.1 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
Expand Down Expand Up @@ -241,6 +240,7 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.62.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
helm.sh/helm/v3 v3.11.1 // indirect
k8s.io/apiserver v0.28.1 // indirect
k8s.io/cli-runtime v0.26.3 // indirect
Expand Down
69 changes: 0 additions & 69 deletions hack/prom-rule-ci/verify-rules.sh

This file was deleted.

4 changes: 2 additions & 2 deletions pkg/components/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (

cnao "github.com/kubevirt/cluster-network-addons-operator/pkg/apis/networkaddonsoperator/shared"
cnaov1 "github.com/kubevirt/cluster-network-addons-operator/pkg/apis/networkaddonsoperator/v1"
"github.com/kubevirt/cluster-network-addons-operator/pkg/monitoring"
"github.com/kubevirt/cluster-network-addons-operator/pkg/monitoring/rules/alerts"
"github.com/kubevirt/cluster-network-addons-operator/pkg/names"
"github.com/kubevirt/cluster-network-addons-operator/pkg/util/k8s"
)
Expand Down Expand Up @@ -289,7 +289,7 @@ func GetDeployment(version string, operatorVersion string, namespace string, rep
},
{
Name: "RUNBOOK_URL_TEMPLATE",
Value: monitoring.GetRunbookURLTemplate(),
Value: alerts.GetRunbookURLTemplate(),
},
},
SecurityContext: &corev1.SecurityContext{
Expand Down
39 changes: 24 additions & 15 deletions pkg/monitoring/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"os"
"path/filepath"

"github.com/kubevirt/cluster-network-addons-operator/pkg/monitoring/rules"
"k8s.io/apimachinery/pkg/runtime"

"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"

Expand All @@ -13,31 +16,46 @@ import (
const (
defaultMonitoringNamespace = "monitoring"
defaultServiceAccountName = "prometheus-k8s"
defaultRunbookURLTemplate = "https://kubevirt.io/monitoring/runbooks/"
runbookURLTemplateEnv = "RUNBOOK_URL_TEMPLATE"
)

func RenderMonitoring(manifestDir string, monitoringAvailable bool) ([]*unstructured.Unstructured, error) {
if !monitoringAvailable {
return nil, nil
}

operandNamespace := os.Getenv("OPERAND_NAMESPACE")
monitoringNamespace := getMonitoringNamespace()

// render the manifests on disk
data := render.MakeRenderData()
data.Data["Namespace"] = os.Getenv("OPERAND_NAMESPACE")
data.Data["MonitoringNamespace"] = getNamespace()
data.Data["Namespace"] = operandNamespace
data.Data["MonitoringNamespace"] = monitoringNamespace
data.Data["MonitoringServiceAccount"] = getServiceAccount()
data.Data["RunbookURLTemplate"] = GetRunbookURLTemplate()

objs, err := render.RenderDir(filepath.Join(manifestDir, "monitoring"), &data)
if err != nil {
return nil, errors.Wrap(err, "failed to render monitoring manifests")
}

if err := rules.SetupRules(operandNamespace); err != nil {
return nil, errors.Wrap(err, "failed to setup monitoring rules")
}

promRule, err := rules.BuildPrometheusRule(operandNamespace)
if err != nil {
return nil, errors.Wrap(err, "failed to build PrometheusRule")
}

unstructuredPromRule, err := runtime.DefaultUnstructuredConverter.ToUnstructured(promRule)
if err != nil {
return nil, errors.Wrap(err, "failed to convert PrometheusRule to unstructured")
}
objs = append(objs, &unstructured.Unstructured{Object: unstructuredPromRule})

return objs, nil
}

func getNamespace() string {
func getMonitoringNamespace() string {
monitoringNamespaceFromEnv := os.Getenv("MONITORING_NAMESPACE")

if monitoringNamespaceFromEnv != "" {
Expand All @@ -54,12 +72,3 @@ func getServiceAccount() string {
}
return defaultServiceAccountName
}

func GetRunbookURLTemplate() string {
runbookURLTemplate, exists := os.LookupEnv(runbookURLTemplateEnv)
if !exists {
runbookURLTemplate = defaultRunbookURLTemplate
}

return runbookURLTemplate
}
33 changes: 33 additions & 0 deletions pkg/monitoring/rules/alerts/kubemacpool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package alerts

import (
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)

var kubemacpoolAlerts = []promv1.Rule{
{
Alert: "KubeMacPoolDuplicateMacsFound",
Expr: intstr.FromString("kubevirt_cnao_kubemacpool_duplicate_macs != 0"),
For: "5m",
Annotations: map[string]string{
"summary": "Duplicate macs found.",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "warning",
},
},
{
Alert: "KubemacpoolDown",
Expr: intstr.FromString("kubevirt_cnao_cr_kubemacpool_aggregated == 1 and kubevirt_cnao_kubemacpool_manager_up == 0"),
For: "5m",
Annotations: map[string]string{
"summary": "KubeMacpool is deployed by CNAO CR but KubeMacpool pod is down.",
},
Labels: map[string]string{
severityAlertLabelKey: "critical",
operatorHealthImpactLabelKey: "critical",
},
},
}
37 changes: 37 additions & 0 deletions pkg/monitoring/rules/alerts/operator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package alerts

import (
"fmt"

promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)

func operatorAlerts(namespace string) []promv1.Rule {
return []promv1.Rule{
{
Alert: "CnaoDown",
Expr: intstr.FromString("kubevirt_cnao_operator_up == 0"),
For: "5m",
Annotations: map[string]string{
"summary": "CNAO pod is down.",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "warning",
},
},
{
Alert: "NetworkAddonsConfigNotReady",
Expr: intstr.FromString(fmt.Sprintf("sum(kubevirt_cnao_cr_ready{namespace='%s'} or vector(0)) == 0", namespace)),
For: "5m",
Annotations: map[string]string{
"summary": "CNAO CR NetworkAddonsConfig is not ready.",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
operatorHealthImpactLabelKey: "warning",
},
},
}
}
Loading
Loading