Skip to content

Commit

Permalink
Add alert to notify of nmstate removal
Browse files Browse the repository at this point in the history
kubernetes-nmstate is removed in the next CNAO version.
This change adds an alert that notifies users who have
knmstate deployed with CNAO, and point them to runbook,
that explains that standalone kubernetes-nmstate operator
should be installed.

Signed-off-by: Radim Hrazdil <rhrazdil@redhat.com>
  • Loading branch information
Radim Hrazdil committed Mar 24, 2022
1 parent 295f867 commit 9ec9af3
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 2 deletions.
14 changes: 14 additions & 0 deletions data/monitoring/prom-rule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@ spec:
rules:
- expr: sum(up{namespace='{{ .Namespace }}', pod=~'cluster-network-addons-operator-.*'} or vector(0))
record: kubevirt_cnao_num_up_operators
- expr: sum(kube_deployment_labels{deployment=~'kubernetes-nmstate-operator'} or vector(0))
record: kubevirt_nmstate_operator_deployments
- expr: sum(kube_daemonset_labels{daemonset=~'nmstate-handler'} or vector(0))
record: kubevirt_nmstate_handler_daemonsets
- alert: CnaoNmstateMigration
annotations:
summary: Nmstate will be removed from CNAO.
runbook_url: http://kubevirt.io/monitoring/runbooks/CnaoNmstateMigration
expr: sum(kubevirt_nmstate_handler_daemonsets or vector(0)) > 0 and sum(kubevirt_nmstate_operator_deployments or vector(0)) == 0
for: 5m
labels:
severity: warning
kubernetes_operator_part_of: kubevirt
kubernetes_operator_component: cluster-network-addons-operator
- alert: CnaoDown
annotations:
summary: CNAO pod is down.
Expand Down
31 changes: 31 additions & 0 deletions hack/prom-rule-ci/prom-rules-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,37 @@ tests:
alertname: CnaoDown
exp_alerts:

# NmstateCnaoMigration positive tests
- interval: 1m
input_series:
- series: "kubevirt_nmstate_operator_deployments"
values: "0 0 0 0 0 0"
- series: "kubevirt_nmstate_handler_daemonsets"
values: "1 1 1 1 1 1"
alert_rule_test:
- eval_time: 5m
alertname: CnaoNmstateMigration
exp_alerts:
- exp_annotations:
summary: "Nmstate will be removed from CNAO."
runbook_url: "http://kubevirt.io/monitoring/runbooks/CnaoNmstateMigration"
exp_labels:
severity: "warning"
kubernetes_operator_part_of: "kubevirt"
kubernetes_operator_component: "cluster-network-addons-operator"
# NmstateCnaoMigration negative tests
- interval: 1m
input_series:
- series: "kubevirt_nmstate_operator_deployments"
values: "0 0 0 1 1 1"
- series: "kubevirt_nmstate_handler_daemonsets"
values: "1 1 1 1 1 1"

alert_rule_test:
- eval_time: 5m
alertname: CnaoNmstateMigration
exp_alerts:

# NetworkAddonsConfigNotReady positive tests
- interval: 1m
input_series:
Expand Down
12 changes: 11 additions & 1 deletion pkg/monitoring/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,20 @@ var (
Name: "kubevirt_cnao_cr_kubemacpool_deployed",
Help: "Kubemacpool is deployed by Cnao CR",
})
nmstateOperatorDeployments = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "kubevirt_nmstate_operator_deployments",
Help: "Number of nmstate operator deployments",
})
nmstateHandlerDaemonsets = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "kubevirt_nmstate_handler_daemonsets",
Help: "Number of nmstate handler daemonsets",
})
)

func init() {
metrics.Registry.MustRegister(readyGauge, kmpDeployedGauge)
metrics.Registry.MustRegister(readyGauge, kmpDeployedGauge, nmstateOperatorDeployments, nmstateHandlerDaemonsets)
}

func setGaugeParam(setTrueFlag bool, gaugeParam *prometheus.Gauge) {
Expand Down
32 changes: 31 additions & 1 deletion test/e2e/workflow/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,12 +408,34 @@ var _ = Describe("NetworkAddonsConfig", func() {
var (
configSpec cnao.NetworkAddonsConfigSpec
)
checkMetricValues := func(expectedMetricValueMap map[string]string) {
EventuallyWithOffset(1, func() error {
By("scraping the monitoring endpoint")
scrapedData, err := GetScrapedDataFromMonitoringEndpoint()
Expect(err).ToNot(HaveOccurred())

By("comparing the scraped Data to the expected metrics' values")
for metricName, expectedValue := range expectedMetricValueMap {
metricEntry := FindMetric(scrapedData, metricName)
Expect(metricEntry).ToNot(BeEmpty(), fmt.Sprintf("metric %s does not appear in endpoint scrape", metricName))

if metricEntry != fmt.Sprintf("%s %s", metricName, expectedValue) {
return fmt.Errorf("metric %s does not have the expected value %s", metricName, expectedValue)
}
}
return nil
}, 3*time.Minute, time.Minute).Should(Succeed(), "Should scrape the correct metrics")
}
BeforeEach(func() {
configSpec = cnao.NetworkAddonsConfigSpec{
NMState: &cnao.NMState{},
}
})
Context("with nmstate-operator installed", func() {
var expectedMetricValues = map[string]string{
"kubevirt_nmstate_operator_deployments": "1",
"kubevirt_nmstate_handler_daemonsets": "1",
}
JustBeforeEach(func() {
// Install nmstate-operator here
installNMStateOperator()
Expand All @@ -431,8 +453,10 @@ var _ = Describe("NetworkAddonsConfig", func() {
Eventually(func() error {
nmstateHandlerDaemonSet := &v1.DaemonSet{}
return framework.Global.Client.Get(context.TODO(), types.NamespacedName{Name: NMStateComponent.DaemonSets[0], Namespace: "nmstate"}, nmstateHandlerDaemonSet)
}, 5*time.Minute, time.Second).Should(BeNil(), fmt.Sprintf("Timed out waiting for nmstate-operator daemonset"))
}, 5*time.Minute, time.Second).Should(BeNil(), "Timed out waiting for nmstate-operator daemonset")
checkMetricValues(expectedMetricValues)
})

})
Context("when it is not already deployed", func() {
BeforeEach(func() {
Expand All @@ -449,17 +473,23 @@ var _ = Describe("NetworkAddonsConfig", func() {
nmstateHandlerDaemonSet := &v1.DaemonSet{}
return framework.Global.Client.Get(context.TODO(), types.NamespacedName{Name: NMStateComponent.DaemonSets[0], Namespace: "nmstate"}, nmstateHandlerDaemonSet)
}, 5*time.Minute, time.Second).Should(BeNil(), "Timed out waiting for nmstate-operator daemonset")
checkMetricValues(expectedMetricValues)
})
})
})
Context("without nmstate-operator pre-installed", func() {
var expectedMetricValues = map[string]string{
"kubevirt_nmstate_operator_deployments": "0",
"kubevirt_nmstate_handler_daemonsets": "1",
}
BeforeEach(func() {
By("Deploying Nmstate")
config := cnao.NetworkAddonsConfigSpec{NMState: &cnao.NMState{}}
CreateConfig(gvk, config)
})
It("should deploy nmstate via CNAO", func() {
CheckConfigCondition(gvk, ConditionAvailable, ConditionTrue, 15*time.Minute, CheckDoNotRepeat)
checkMetricValues(expectedMetricValues)
})
Context("when nmstate-operator is then installed", func() {
BeforeEach(func() {
Expand Down

0 comments on commit 9ec9af3

Please sign in to comment.