From 0dce2f07133d644b6441f4bd5c1e5a0d4522278a Mon Sep 17 00:00:00 2001 From: Alexandre Lamarre Date: Thu, 14 Sep 2023 11:40:58 -0400 Subject: [PATCH] rework alerting integration test setup --- test/plugins/alerting/alerting_suite_test.go | 26 +- test/plugins/alerting/alerting_test.go | 1454 +++++++++--------- test/plugins/alerting/routing_test.go | 405 ++--- test/plugins/alerting/slo_test.go | 1 + 4 files changed, 927 insertions(+), 959 deletions(-) diff --git a/test/plugins/alerting/alerting_suite_test.go b/test/plugins/alerting/alerting_suite_test.go index 4c29cdc187..acab5e22be 100644 --- a/test/plugins/alerting/alerting_suite_test.go +++ b/test/plugins/alerting/alerting_suite_test.go @@ -8,9 +8,7 @@ import ( . "github.com/onsi/gomega" "github.com/prometheus/common/model" "github.com/rancher/opni/pkg/alerting/drivers/routing" - "github.com/rancher/opni/pkg/test" _ "github.com/rancher/opni/pkg/test/setup" - "github.com/rancher/opni/pkg/test/testruntime" _ "github.com/rancher/opni/plugins/alerting/test" _ "github.com/rancher/opni/plugins/metrics/test" "github.com/samber/lo" @@ -40,16 +38,16 @@ func TestAlerting(t *testing.T) { RunSpecs(t, "Alerting Suite") } -var env *test.Environment -var tmpConfigDir string +// var env *test.Environment +// var tmpConfigDir string -var _ = BeforeSuite(func() { - testruntime.IfIntegration(func() { - env = &test.Environment{} - Expect(env).NotTo(BeNil()) - Expect(env.Start()).To(Succeed()) - DeferCleanup(env.Stop) - tmpConfigDir = env.GenerateNewTempDirectory("alertmanager-config") - Expect(tmpConfigDir).NotTo(Equal("")) - }) -}) +// var _ = BeforeSuite(func() { +// testruntime.IfIntegration(func() { +// env = &test.Environment{} +// Expect(env).NotTo(BeNil()) +// Expect(env.Start()).To(Succeed()) +// DeferCleanup(env.Stop) +// tmpConfigDir = env.GenerateNewTempDirectory("alertmanager-config") +// Expect(tmpConfigDir).NotTo(Equal("")) +// }) +// }) diff --git a/test/plugins/alerting/alerting_test.go b/test/plugins/alerting/alerting_test.go index eef4aee75c..58f85541e8 100644 --- a/test/plugins/alerting/alerting_test.go +++ b/test/plugins/alerting/alerting_test.go @@ -22,7 +22,6 @@ import ( managementv1 "github.com/rancher/opni/pkg/apis/management/v1" "github.com/rancher/opni/pkg/test" "github.com/rancher/opni/pkg/test/alerting" - "github.com/rancher/opni/pkg/test/testruntime" "github.com/rancher/opni/plugins/alerting/apis/alertops" "github.com/samber/lo" "golang.org/x/exp/slices" @@ -39,51 +38,6 @@ const ( numNotificationServers = 2 ) -func init() { - testruntime.IfIntegration(func() { - BuildAlertingClusterIntegrationTests([]*alertops.ClusterConfiguration{ - // { - // NumReplicas: 1, - // ResourceLimits: &alertops.ResourceLimitSpec{ - // Cpu: "100m", - // Memory: "100Mi", - // Storage: "5Gi", - // }, - // ClusterSettleTimeout: "1m", - // ClusterGossipInterval: "1m", - // ClusterPushPullInterval: "1m", - // }, - { - NumReplicas: 3, - ResourceLimits: &alertops.ResourceLimitSpec{ - Cpu: "100m", - Memory: "100Mi", - Storage: "5Gi", - }, - ClusterSettleTimeout: "1m", - ClusterGossipInterval: "1m", - ClusterPushPullInterval: "1m", - }, - }, - func() alertops.AlertingAdminClient { - return alertops.NewAlertingAdminClient(env.ManagementClientConn()) - }, - func() alertingv1.AlertConditionsClient { - return env.NewAlertConditionsClient() - }, - func() alertingv1.AlertEndpointsClient { - return env.NewAlertEndpointsClient() - }, - func() alertingv1.AlertNotificationsClient { - return env.NewAlertNotificationsClient() - }, - func() managementv1.ManagementClient { - return env.NewManagementClient() - }, - ) - }) -} - type agentWithContext struct { id string context.Context @@ -91,829 +45,843 @@ type agentWithContext struct { port int } -func BuildAlertingClusterIntegrationTests( - clusterConfigurations []*alertops.ClusterConfiguration, - alertingAdminConstructor func() alertops.AlertingAdminClient, - alertingConditionsConstructor func() alertingv1.AlertConditionsClient, - alertingEndpointsConstructor func() alertingv1.AlertEndpointsClient, - alertingNotificationsConstructor func() alertingv1.AlertNotificationsClient, - mgmtClientConstructor func() managementv1.ManagementClient, -) bool { - return Describe("Alerting Cluster Integration tests", Ordered, Label("integration"), func() { - var alertClusterClient alertops.AlertingAdminClient - var alertEndpointsClient alertingv1.AlertEndpointsClient - var alertConditionsClient alertingv1.AlertConditionsClient - var alertNotificationsClient alertingv1.AlertNotificationsClient - var mgmtClient managementv1.ManagementClient - var numAgents int - - // contains agent id and other useful metadata and functions - var agents []*agentWithContext - // physical servers that receive opni alerting notifications - var servers []*alerting.MockIntegrationWebhookServer - // physical servers that receive all opni alerting notifications - var notificationServers []*alerting.MockIntegrationWebhookServer - // conditionsIds => endopintIds - expectedRouting := map[string]*alertingv1.ConditionReferenceList{} - // maps condition ids where agents are disconnect to their webhook ids - involvedDisconnects := map[string][]string{} - - var httpProxyClient *http.Client - When("Installing the Alerting Cluster", func() { - BeforeAll(func() { - alertClusterClient = alertingAdminConstructor() - alertEndpointsClient = alertingEndpointsConstructor() - alertConditionsClient = alertingConditionsConstructor() - alertNotificationsClient = alertingNotificationsConstructor() - mgmtClient = mgmtClientConstructor() - numAgents = 5 - tlsConfig := env.GatewayClientTLSConfig() - httpProxyClient = &http.Client{ - Transport: &http.Transport{ - DialTLS: func(network, addr string) (net.Conn, error) { - conn, err := tls.Dial(network, addr, tlsConfig) - return conn, err - }, +var _ = Describe("Alerting Cluster Integration tests", Ordered, Label("integration"), func() { + var env *test.Environment + + var clusterConfigurations = []*alertops.ClusterConfiguration{ + { + NumReplicas: 3, + ResourceLimits: &alertops.ResourceLimitSpec{ + Cpu: "100m", + Memory: "100Mi", + Storage: "5Gi", + }, + ClusterSettleTimeout: "1m", + ClusterGossipInterval: "1m", + ClusterPushPullInterval: "1m", + }, + } + + var alertClusterClient alertops.AlertingAdminClient + var alertEndpointsClient alertingv1.AlertEndpointsClient + var alertConditionsClient alertingv1.AlertConditionsClient + var alertNotificationsClient alertingv1.AlertNotificationsClient + var mgmtClient managementv1.ManagementClient + var numAgents int + + // contains agent id and other useful metadata and functions + var agents []*agentWithContext + // physical servers that receive opni alerting notifications + var servers []*alerting.MockIntegrationWebhookServer + // physical servers that receive all opni alerting notifications + var notificationServers []*alerting.MockIntegrationWebhookServer + // conditionsIds => endopintIds + expectedRouting := map[string]*alertingv1.ConditionReferenceList{} + // maps condition ids where agents are disconnect to their webhook ids + involvedDisconnects := map[string][]string{} + + var httpProxyClient *http.Client + When("Installing the Alerting Cluster", func() { + BeforeAll(func() { + env = &test.Environment{} + Expect(env).NotTo(BeNil()) + Expect(env.Start()).To(Succeed()) + DeferCleanup(env.Stop) + tmpConfigDir := env.GenerateNewTempDirectory("alertmanager-config") + Expect(tmpConfigDir).NotTo(Equal("")) + + alertClusterClient = alertops.NewAlertingAdminClient(env.ManagementClientConn()) + alertEndpointsClient = env.NewAlertEndpointsClient() + alertConditionsClient = env.NewAlertConditionsClient() + alertNotificationsClient = env.NewAlertNotificationsClient() + mgmtClient = env.NewManagementClient() + numAgents = 5 + tlsConfig := env.GatewayClientTLSConfig() + httpProxyClient = &http.Client{ + Transport: &http.Transport{ + DialTLS: func(network, addr string) (net.Conn, error) { + conn, err := tls.Dial(network, addr, tlsConfig) + return conn, err }, - } - }) + }, + } + }) - AfterEach(func() { - status, err := alertClusterClient.GetClusterStatus(env.Context(), &emptypb.Empty{}) + AfterEach(func() { + status, err := alertClusterClient.GetClusterStatus(env.Context(), &emptypb.Empty{}) + Expect(err).To(BeNil()) + installed := status.State != alertops.InstallState_NotInstalled && status.State != alertops.InstallState_Uninstalling + if installed { + info, err := alertClusterClient.Info(env.Context(), &emptypb.Empty{}) Expect(err).To(BeNil()) - installed := status.State != alertops.InstallState_NotInstalled && status.State != alertops.InstallState_Uninstalling - if installed { - info, err := alertClusterClient.Info(env.Context(), &emptypb.Empty{}) - Expect(err).To(BeNil()) - Expect(info).NotTo(BeNil()) - Expect(info.CurSyncId).NotTo(BeEmpty()) - ts := timestamppb.Now() - for _, comp := range info.Components { - Expect(comp.LastHandshake.AsTime()).To(BeTemporally("<", ts.AsTime())) - if comp.ConnectInfo.GetState() != alertops.SyncState_Synced { - if comp.ConnectInfo.SyncId != info.CurSyncId { - Expect(comp.LastHandshake.AsTime()).To(BeTemporally(">", ts.AsTime().Add(15*(-time.Second))), "sync is stuttering") - } + Expect(info).NotTo(BeNil()) + Expect(info.CurSyncId).NotTo(BeEmpty()) + ts := timestamppb.Now() + for _, comp := range info.Components { + Expect(comp.LastHandshake.AsTime()).To(BeTemporally("<", ts.AsTime())) + if comp.ConnectInfo.GetState() != alertops.SyncState_Synced { + if comp.ConnectInfo.SyncId != info.CurSyncId { + Expect(comp.LastHandshake.AsTime()).To(BeTemporally(">", ts.AsTime().Add(15*(-time.Second))), "sync is stuttering") } } } - }) - - for _, clusterConf := range clusterConfigurations { - It("should install the alerting cluster", func() { - _, err := alertClusterClient.InstallCluster(env.Context(), &emptypb.Empty{}) - Expect(err).To(BeNil()) + } + }) - Eventually(func() error { - status, err := alertClusterClient.GetClusterStatus(env.Context(), &emptypb.Empty{}) - if err != nil { - return err - } - if status.State != alertops.InstallState_Installed { - return fmt.Errorf("alerting cluster install state is %s", status.State.String()) - } - return nil - }, time.Second*5, time.Millisecond*200).Should(Succeed()) - }) + for _, clusterConf := range clusterConfigurations { + It("should install the alerting cluster", func() { + _, err := alertClusterClient.InstallCluster(env.Context(), &emptypb.Empty{}) + Expect(err).To(BeNil()) - It("should apply the configuration", func() { - _, err := alertClusterClient.ConfigureCluster(env.Context(), clusterConf) + Eventually(func() error { + status, err := alertClusterClient.GetClusterStatus(env.Context(), &emptypb.Empty{}) if err != nil { - if s, ok := status.FromError(err); ok { // conflict is ok if using default config - Expect(s.Code()).To(Equal(codes.FailedPrecondition)) - } + return err } - Expect(err).To(BeNil()) - Eventually(func() error { - status, err := alertClusterClient.GetClusterStatus(env.Context(), &emptypb.Empty{}) - if err != nil { - return err - } - if status.State != alertops.InstallState_Installed { - return fmt.Errorf("alerting cluster install state is %s", status.State.String()) - } - return nil - }, time.Second*5, time.Millisecond*200).Should(Succeed()) + if status.State != alertops.InstallState_Installed { + return fmt.Errorf("alerting cluster install state is %s", status.State.String()) + } + return nil + }, time.Second*5, time.Millisecond*200).Should(Succeed()) + }) - Eventually(func() error { - getConf, err := alertClusterClient.GetClusterConfiguration(env.Context(), &emptypb.Empty{}) - if !proto.Equal(getConf, clusterConf) { - return fmt.Errorf("cluster config not equal : not applied") - } + It("should apply the configuration", func() { + _, err := alertClusterClient.ConfigureCluster(env.Context(), clusterConf) + if err != nil { + if s, ok := status.FromError(err); ok { // conflict is ok if using default config + Expect(s.Code()).To(Equal(codes.FailedPrecondition)) + } + } + Expect(err).To(BeNil()) + Eventually(func() error { + status, err := alertClusterClient.GetClusterStatus(env.Context(), &emptypb.Empty{}) + if err != nil { return err - }, time.Second*5, time.Millisecond*200).Should(Succeed()) - }) + } + if status.State != alertops.InstallState_Installed { + return fmt.Errorf("alerting cluster install state is %s", status.State.String()) + } + return nil + }, time.Second*5, time.Millisecond*200).Should(Succeed()) + + Eventually(func() error { + getConf, err := alertClusterClient.GetClusterConfiguration(env.Context(), &emptypb.Empty{}) + if !proto.Equal(getConf, clusterConf) { + return fmt.Errorf("cluster config not equal : not applied") + } + return err + }, time.Second*5, time.Millisecond*200).Should(Succeed()) + }) - Specify("the alerting plugin components should be running and healthy", func() { - alertingReadinessProbe := fmt.Sprintf("https://%s/plugin_alerting/ready", env.GatewayConfig().Spec.HTTPListenAddress) - alertingHealthProbe := fmt.Sprintf("https://%s/plugin_alerting/healthy", env.GatewayConfig().Spec.HTTPListenAddress) + Specify("the alerting plugin components should be running and healthy", func() { + alertingReadinessProbe := fmt.Sprintf("https://%s/plugin_alerting/ready", env.GatewayConfig().Spec.HTTPListenAddress) + alertingHealthProbe := fmt.Sprintf("https://%s/plugin_alerting/healthy", env.GatewayConfig().Spec.HTTPListenAddress) - Eventually(func() error { - reqReady, err := http.NewRequestWithContext(env.Context(), http.MethodGet, alertingReadinessProbe, nil) - if err != nil { - return err - } - reqHealthy, err := http.NewRequestWithContext(env.Context(), http.MethodGet, alertingHealthProbe, nil) - if err != nil { - return err - } - respReady, err := httpProxyClient.Do(reqReady) - if err != nil { - return err - } - defer respReady.Body.Close() - if respReady.StatusCode != http.StatusOK { - return fmt.Errorf("alerting plugin not yet ready %d, %s", respReady.StatusCode, respReady.Status) - } - respHealthy, err := httpProxyClient.Do(reqHealthy) + Eventually(func() error { + reqReady, err := http.NewRequestWithContext(env.Context(), http.MethodGet, alertingReadinessProbe, nil) + if err != nil { + return err + } + reqHealthy, err := http.NewRequestWithContext(env.Context(), http.MethodGet, alertingHealthProbe, nil) + if err != nil { + return err + } + respReady, err := httpProxyClient.Do(reqReady) + if err != nil { + return err + } + defer respReady.Body.Close() + if respReady.StatusCode != http.StatusOK { + return fmt.Errorf("alerting plugin not yet ready %d, %s", respReady.StatusCode, respReady.Status) + } + respHealthy, err := httpProxyClient.Do(reqHealthy) + if err != nil { + return err + } + defer respHealthy.Body.Close() + if respHealthy.StatusCode != http.StatusOK { if err != nil { - return err - } - defer respHealthy.Body.Close() - if respHealthy.StatusCode != http.StatusOK { - if err != nil { - return fmt.Errorf("alerting plugin unhealthy %d, %s", respHealthy.StatusCode, respHealthy.Status) - } + return fmt.Errorf("alerting plugin unhealthy %d, %s", respHealthy.StatusCode, respHealthy.Status) } - return nil - }, time.Second*30, time.Second).Should(Succeed()) - }) - - It("should be able to create some endpoints", func() { - servers = alerting.CreateWebhookServer(env, numServers) - - for _, server := range servers { - ref, err := alertEndpointsClient.CreateAlertEndpoint(env.Context(), server.Endpoint()) - Expect(err).To(Succeed()) - server.EndpointId = ref.Id } + return nil + }, time.Second*30, time.Second).Should(Succeed()) + }) - By("verifying they are externally persisted") + It("should be able to create some endpoints", func() { + servers = alerting.CreateWebhookServer(env, numServers) - endpList, err := alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) + for _, server := range servers { + ref, err := alertEndpointsClient.CreateAlertEndpoint(env.Context(), server.Endpoint()) Expect(err).To(Succeed()) - Expect(endpList.Items).To(HaveLen(numServers)) + server.EndpointId = ref.Id + } - By("verifying they are reachable") + By("verifying they are externally persisted") - for _, endp := range endpList.GetItems() { - Expect(endp.GetId()).NotTo(BeNil()) - _, err := alertNotificationsClient.TestAlertEndpoint(env.Context(), endp.GetId()) - Expect(err).To(Succeed()) - } - maxSuccesses := 0 - Eventually(func() error { - success := 0 - errs := []error{} - for _, server := range servers { - if len(server.GetBuffer()) == 0 { - if success > maxSuccesses { - maxSuccesses = success - } - errs = append(errs, fmt.Errorf("server %v did not receive any alerts", server.Endpoint())) - } else { - success++ - } - } - if len(errs) > 0 { - return errors.Join(errs...) - } - return nil - }, time.Second*15, time.Millisecond*100).Should(Succeed(), fmt.Sprintf("only %d/%d servers received alerts", maxSuccesses, numServers)) + endpList, err := alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) + Expect(err).To(Succeed()) + Expect(endpList.Items).To(HaveLen(numServers)) + + By("verifying they are reachable") + for _, endp := range endpList.GetItems() { + Expect(endp.GetId()).NotTo(BeNil()) + _, err := alertNotificationsClient.TestAlertEndpoint(env.Context(), endp.GetId()) + Expect(err).To(Succeed()) + } + maxSuccesses := 0 + Eventually(func() error { + success := 0 + errs := []error{} for _, server := range servers { - server.ClearBuffer() + if len(server.GetBuffer()) == 0 { + if success > maxSuccesses { + maxSuccesses = success + } + errs = append(errs, fmt.Errorf("server %v did not receive any alerts", server.Endpoint())) + } else { + success++ + } } - }) + if len(errs) > 0 { + return errors.Join(errs...) + } + return nil + }, time.Second*15, time.Millisecond*100).Should(Succeed(), fmt.Sprintf("only %d/%d servers received alerts", maxSuccesses, numServers)) - It("should create some default conditions when bootstrapping agents", func() { - By("expecting to have no initial conditions") - Eventually(func() error { - condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) - if err != nil { - return err - } - if len(condList.Items) != 0 { - return fmt.Errorf("expected 0 conditions, got %d", len(condList.Items)) - } - return nil - }, time.Second*30, time.Millisecond*200).Should(Succeed()) + for _, server := range servers { + server.ClearBuffer() + } + }) - By(fmt.Sprintf("bootstrapping %d agents", numAgents)) - certsInfo, err := mgmtClient.CertsInfo(context.Background(), &emptypb.Empty{}) - Expect(err).NotTo(HaveOccurred()) - fingerprint := certsInfo.Chain[len(certsInfo.Chain)-1].Fingerprint - Expect(fingerprint).NotTo(BeEmpty()) + It("should create some default conditions when bootstrapping agents", func() { + By("expecting to have no initial conditions") + Eventually(func() error { + condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + if err != nil { + return err + } + if len(condList.Items) != 0 { + return fmt.Errorf("expected 0 conditions, got %d", len(condList.Items)) + } + return nil + }, time.Second*30, time.Millisecond*200).Should(Succeed()) - token, err := mgmtClient.CreateBootstrapToken(context.Background(), &managementv1.CreateBootstrapTokenRequest{ - Ttl: durationpb.New(1 * time.Hour), + By(fmt.Sprintf("bootstrapping %d agents", numAgents)) + certsInfo, err := mgmtClient.CertsInfo(context.Background(), &emptypb.Empty{}) + Expect(err).NotTo(HaveOccurred()) + fingerprint := certsInfo.Chain[len(certsInfo.Chain)-1].Fingerprint + Expect(fingerprint).NotTo(BeEmpty()) + + token, err := mgmtClient.CreateBootstrapToken(context.Background(), &managementv1.CreateBootstrapTokenRequest{ + Ttl: durationpb.New(1 * time.Hour), + }) + Expect(err).NotTo(HaveOccurred()) + agentIdFunc := func(i int) string { + return fmt.Sprintf("agent-%d-%s", i, uuid.New().String()) + } + agents = []*agentWithContext{} + for i := 0; i < numAgents; i++ { + ctxCa, ca := context.WithCancel(env.Context()) + id := agentIdFunc(i) + port, errC := env.StartAgent(id, token, []string{fingerprint}, test.WithContext(ctxCa)) + Eventually(errC).Should(Receive(BeNil())) + agents = append(agents, &agentWithContext{ + port: port, + CancelFunc: ca, + Context: ctxCa, + id: id, }) - Expect(err).NotTo(HaveOccurred()) - agentIdFunc := func(i int) string { - return fmt.Sprintf("agent-%d-%s", i, uuid.New().String()) - } - agents = []*agentWithContext{} - for i := 0; i < numAgents; i++ { - ctxCa, ca := context.WithCancel(env.Context()) - id := agentIdFunc(i) - port, errC := env.StartAgent(id, token, []string{fingerprint}, test.WithContext(ctxCa)) - Eventually(errC).Should(Receive(BeNil())) - agents = append(agents, &agentWithContext{ - port: port, - CancelFunc: ca, - Context: ctxCa, - id: id, - }) + } + By("verifying that there are default conditions") + Eventually(func() error { + condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + if err != nil { + return err + } + if len(condList.Items) != numAgents*2 { + return fmt.Errorf("expected %d conditions, got %d", numAgents*2, len(condList.Items)) } - By("verifying that there are default conditions") + return nil + }, time.Second*5, time.Millisecond*200).Should(Succeed()) + + By("verifying that only the default group exists") + groupList, err := alertConditionsClient.ListAlertConditionGroups(env.Context(), &emptypb.Empty{}) + Expect(err).To(Succeed()) + Expect(groupList.Items).To(HaveLen(1)) + Expect(groupList.Items[0].Id).To(Equal("")) + }) + + It("shoud list conditions by given filters", func() { + for _, agent := range agents { Eventually(func() error { - condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + filteredByCluster, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ + Clusters: []string{agent.id}, + }) if err != nil { return err } - if len(condList.Items) != numAgents*2 { - return fmt.Errorf("expected %d conditions, got %d", numAgents*2, len(condList.Items)) + if len(filteredByCluster.Items) != 2 { + return fmt.Errorf("expected 2 conditions, got %d", len(filteredByCluster.Items)) } return nil - }, time.Second*5, time.Millisecond*200).Should(Succeed()) + }, time.Second).Should(Succeed()) - By("verifying that only the default group exists") - groupList, err := alertConditionsClient.ListAlertConditionGroups(env.Context(), &emptypb.Empty{}) - Expect(err).To(Succeed()) - Expect(groupList.Items).To(HaveLen(1)) - Expect(groupList.Items[0].Id).To(Equal("")) - }) - - It("shoud list conditions by given filters", func() { - for _, agent := range agents { - Eventually(func() error { - filteredByCluster, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ - Clusters: []string{agent.id}, - }) - if err != nil { - return err - } - if len(filteredByCluster.Items) != 2 { - return fmt.Errorf("expected 2 conditions, got %d", len(filteredByCluster.Items)) - } - return nil - }, time.Second).Should(Succeed()) - - } + } - By("verifying all the agents conditions are critical") + By("verifying all the agents conditions are critical") - filterList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ - Severities: []alertingv1.OpniSeverity{ - alertingv1.OpniSeverity_Warning, - alertingv1.OpniSeverity_Error, - alertingv1.OpniSeverity_Info, - }, - }) - Expect(err).To(Succeed()) - Expect(filterList.Items).To(HaveLen(0)) + filterList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ + Severities: []alertingv1.OpniSeverity{ + alertingv1.OpniSeverity_Warning, + alertingv1.OpniSeverity_Error, + alertingv1.OpniSeverity_Info, + }, + }) + Expect(err).To(Succeed()) + Expect(filterList.Items).To(HaveLen(0)) - By("verifying we have an equal number of disconnect and capability unhealthy") + By("verifying we have an equal number of disconnect and capability unhealthy") - disconnectList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ + disconnectList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ + AlertTypes: []alertingv1.AlertType{ + alertingv1.AlertType_System, + }, + }) + Expect(err).To(Succeed()) + disconnectStatusList, err := alertConditionsClient.ListAlertConditionsWithStatus(env.Context(), &alertingv1.ListStatusRequest{ + ItemFilter: &alertingv1.ListAlertConditionRequest{ AlertTypes: []alertingv1.AlertType{ alertingv1.AlertType_System, }, - }) - Expect(err).To(Succeed()) - disconnectStatusList, err := alertConditionsClient.ListAlertConditionsWithStatus(env.Context(), &alertingv1.ListStatusRequest{ - ItemFilter: &alertingv1.ListAlertConditionRequest{ - AlertTypes: []alertingv1.AlertType{ - alertingv1.AlertType_System, - }, - }, - }) - Expect(err).To(Succeed()) + }, + }) + Expect(err).To(Succeed()) + + capabilityList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ + AlertTypes: []alertingv1.AlertType{ + alertingv1.AlertType_DownstreamCapability, + }, + }) + Expect(err).To(Succeed()) - capabilityList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ + capabilityStatusList, err := alertConditionsClient.ListAlertConditionsWithStatus(env.Context(), &alertingv1.ListStatusRequest{ + ItemFilter: &alertingv1.ListAlertConditionRequest{ AlertTypes: []alertingv1.AlertType{ alertingv1.AlertType_DownstreamCapability, }, - }) - Expect(err).To(Succeed()) - - capabilityStatusList, err := alertConditionsClient.ListAlertConditionsWithStatus(env.Context(), &alertingv1.ListStatusRequest{ - ItemFilter: &alertingv1.ListAlertConditionRequest{ - AlertTypes: []alertingv1.AlertType{ - alertingv1.AlertType_DownstreamCapability, - }, - }, - }) - - Expect(err).To(Succeed()) - Expect(capabilityList.Items).To(HaveLen(len(disconnectList.Items))) - Expect(capabilityStatusList.GetAlertConditions()).To(HaveLen(len(disconnectStatusList.GetAlertConditions()))) + }, }) - It("should be able to attach endpoints to conditions", func() { - By("attaching a sample of random endpoints to default agent conditions") - condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + Expect(err).To(Succeed()) + Expect(capabilityList.Items).To(HaveLen(len(disconnectList.Items))) + Expect(capabilityStatusList.GetAlertConditions()).To(HaveLen(len(disconnectStatusList.GetAlertConditions()))) + }) + + It("should be able to attach endpoints to conditions", func() { + By("attaching a sample of random endpoints to default agent conditions") + condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + Expect(err).To(Succeed()) + endpList, err := alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) + for _, cond := range condList.Items { Expect(err).To(Succeed()) - endpList, err := alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) - for _, cond := range condList.Items { - Expect(err).To(Succeed()) - endps := lo.Map( - lo.Samples(endpList.Items, 1+rand.Intn(len(endpList.Items)-1)), - func(a *alertingv1.AlertEndpointWithId, _ int) *alertingv1.AttachedEndpoint { - return &alertingv1.AttachedEndpoint{ - EndpointId: a.GetId().Id, - } - }) + endps := lo.Map( + lo.Samples(endpList.Items, 1+rand.Intn(len(endpList.Items)-1)), + func(a *alertingv1.AlertEndpointWithId, _ int) *alertingv1.AttachedEndpoint { + return &alertingv1.AttachedEndpoint{ + EndpointId: a.GetId().Id, + } + }) - if cond.GetAlertCondition().GetAlertType().GetSystem() != nil { - for _, endp := range endps { - if _, ok := expectedRouting[endp.EndpointId]; !ok { - expectedRouting[endp.EndpointId] = &alertingv1.ConditionReferenceList{ - Items: []*alertingv1.ConditionReference{}, - } + if cond.GetAlertCondition().GetAlertType().GetSystem() != nil { + for _, endp := range endps { + if _, ok := expectedRouting[endp.EndpointId]; !ok { + expectedRouting[endp.EndpointId] = &alertingv1.ConditionReferenceList{ + Items: []*alertingv1.ConditionReference{}, } - expectedRouting[endp.EndpointId].Items = append(expectedRouting[endp.EndpointId].Items, cond.GetId()) } - cond.AlertCondition.AttachedEndpoints = &alertingv1.AttachedEndpoints{ - Items: endps, - InitialDelay: durationpb.New(time.Second * 1), - ThrottlingDuration: durationpb.New(time.Second * 1), - Details: &alertingv1.EndpointImplementation{ - Title: "disconnected agent", - Body: "agent %s is disconnected", - }, - } - _, err = alertConditionsClient.UpdateAlertCondition(env.Context(), &alertingv1.UpdateAlertConditionRequest{ - Id: cond.GetId(), - UpdateAlert: cond.AlertCondition, - }) - Expect(err).To(Succeed()) + expectedRouting[endp.EndpointId].Items = append(expectedRouting[endp.EndpointId].Items, cond.GetId()) + } + cond.AlertCondition.AttachedEndpoints = &alertingv1.AttachedEndpoints{ + Items: endps, + InitialDelay: durationpb.New(time.Second * 1), + ThrottlingDuration: durationpb.New(time.Second * 1), + Details: &alertingv1.EndpointImplementation{ + Title: "disconnected agent", + Body: "agent %s is disconnected", + }, } - } - for _, refs := range expectedRouting { - slices.SortFunc(refs.Items, func(a, b *alertingv1.ConditionReference) bool { - if a.GroupId != b.GroupId { - return a.GroupId < b.GroupId - } - return a.Id < b.Id + _, err = alertConditionsClient.UpdateAlertCondition(env.Context(), &alertingv1.UpdateAlertConditionRequest{ + Id: cond.GetId(), + UpdateAlert: cond.AlertCondition, }) - } - - By("creating some default webhook servers as endpoints") - notificationServers = alerting.CreateWebhookServer(env, numNotificationServers) - for _, server := range notificationServers { - ref, err := alertEndpointsClient.CreateAlertEndpoint(env.Context(), server.Endpoint()) Expect(err).To(Succeed()) - server.EndpointId = ref.Id } - endpList, err = alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) - Expect(err).To(Succeed()) - Expect(endpList.Items).To(HaveLen(numNotificationServers + numServers)) + } + for _, refs := range expectedRouting { + slices.SortFunc(refs.Items, func(a, b *alertingv1.ConditionReference) bool { + if a.GroupId != b.GroupId { + return a.GroupId < b.GroupId + } + return a.Id < b.Id + }) + } - By("setting the default servers as default endpoints") - for _, server := range notificationServers { - _, err = alertEndpointsClient.ToggleNotifications(env.Context(), &alertingv1.ToggleRequest{ - Id: &corev1.Reference{Id: server.EndpointId}, - }) - Expect(err).To(Succeed()) - } + By("creating some default webhook servers as endpoints") + notificationServers = alerting.CreateWebhookServer(env, numNotificationServers) + for _, server := range notificationServers { + ref, err := alertEndpointsClient.CreateAlertEndpoint(env.Context(), server.Endpoint()) + Expect(err).To(Succeed()) + server.EndpointId = ref.Id + } + endpList, err = alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) + Expect(err).To(Succeed()) + Expect(endpList.Items).To(HaveLen(numNotificationServers + numServers)) + + By("setting the default servers as default endpoints") + for _, server := range notificationServers { + _, err = alertEndpointsClient.ToggleNotifications(env.Context(), &alertingv1.ToggleRequest{ + Id: &corev1.Reference{Id: server.EndpointId}, + }) + Expect(err).To(Succeed()) + } - By("expecting the conditions to eventually move to the 'OK' state") - Eventually(func() error { - for _, cond := range condList.Items { - status, err := alertConditionsClient.AlertConditionStatus(env.Context(), cond.Id) - if err != nil { - return err - } - if status.State != alertingv1.AlertConditionState_Ok { - return fmt.Errorf("condition %s \"%s\" is not OK, instead in state %s, %s", cond.AlertCondition.Id, cond.AlertCondition.Name, status.State.String(), status.Reason) - } - } - return nil - }, time.Second*30, time.Second).Should(Succeed()) - By("verifying the routing relationships are correctly loaded") - Eventually(func() int { - relationships, err := alertNotificationsClient.ListRoutingRelationships(env.Context(), &emptypb.Empty{}) + By("expecting the conditions to eventually move to the 'OK' state") + Eventually(func() error { + for _, cond := range condList.Items { + status, err := alertConditionsClient.AlertConditionStatus(env.Context(), cond.Id) if err != nil { - return -1 + return err + } + if status.State != alertingv1.AlertConditionState_Ok { + return fmt.Errorf("condition %s \"%s\" is not OK, instead in state %s, %s", cond.AlertCondition.Id, cond.AlertCondition.Name, status.State.String(), status.Reason) } - return len(relationships.RoutingRelationships) - }).Should(Equal(len(expectedRouting))) + } + return nil + }, time.Second*30, time.Second).Should(Succeed()) + By("verifying the routing relationships are correctly loaded") + Eventually(func() int { relationships, err := alertNotificationsClient.ListRoutingRelationships(env.Context(), &emptypb.Empty{}) - Expect(err).To(Succeed()) + if err != nil { + return -1 + } + return len(relationships.RoutingRelationships) + }).Should(Equal(len(expectedRouting))) + relationships, err := alertNotificationsClient.ListRoutingRelationships(env.Context(), &emptypb.Empty{}) + Expect(err).To(Succeed()) - for endpId, rel := range relationships.RoutingRelationships { - slices.SortFunc(rel.Items, func(a, b *alertingv1.ConditionReference) bool { - if a.GroupId != b.GroupId { - return a.GroupId < b.GroupId - } - return a.Id < b.Id - }) - if _, ok := expectedRouting[endpId]; !ok { - Fail(fmt.Sprintf("Expected a routing relation to exist for endpoint id %s", endpId)) - } - Expect(len(rel.Items)).To(Equal(len(expectedRouting[endpId].Items))) - for i := range rel.Items { - Expect(rel.Items[i].Id).To(Equal(expectedRouting[endpId].Items[i].Id)) - Expect(rel.Items[i].GroupId).To(Equal(expectedRouting[endpId].Items[i].GroupId)) + for endpId, rel := range relationships.RoutingRelationships { + slices.SortFunc(rel.Items, func(a, b *alertingv1.ConditionReference) bool { + if a.GroupId != b.GroupId { + return a.GroupId < b.GroupId } + return a.Id < b.Id + }) + if _, ok := expectedRouting[endpId]; !ok { + Fail(fmt.Sprintf("Expected a routing relation to exist for endpoint id %s", endpId)) } - }) + Expect(len(rel.Items)).To(Equal(len(expectedRouting[endpId].Items))) + for i := range rel.Items { + Expect(rel.Items[i].Id).To(Equal(expectedRouting[endpId].Items[i].Id)) + Expect(rel.Items[i].GroupId).To(Equal(expectedRouting[endpId].Items[i].GroupId)) + } + } + }) - Specify("agent disconnect alarms should fire when agents are disconnected ", func() { - // Disconnect a random 3 agents, and verify the servers have the messages - By("disconnecting a random 3 agents") - disconnectedIds := []string{} - toDisconnect := lo.Samples(agents, 3) - for _, disc := range toDisconnect { - disc.CancelFunc() - disconnectedIds = append(disconnectedIds, disc.id) + Specify("agent disconnect alarms should fire when agents are disconnected ", func() { + // Disconnect a random 3 agents, and verify the servers have the messages + By("disconnecting a random 3 agents") + disconnectedIds := []string{} + toDisconnect := lo.Samples(agents, 3) + for _, disc := range toDisconnect { + disc.CancelFunc() + disconnectedIds = append(disconnectedIds, disc.id) + } + + condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + Expect(err).To(Succeed()) + notInvolvedDisconnects := map[string]struct{}{} + for _, cond := range condList.Items { + if cond.GetAlertCondition().GetAlertType().GetSystem() != nil { + if slices.Contains(disconnectedIds, cond.GetAlertCondition().GetAlertType().GetSystem().ClusterId.Id) { + endps := lo.Map(cond.AlertCondition.AttachedEndpoints.Items, + func(a *alertingv1.AttachedEndpoint, _ int) string { + return a.EndpointId + }) + involvedDisconnects[cond.GetAlertCondition().Id] = endps + } else { + notInvolvedDisconnects[cond.GetAlertCondition().Id] = struct{}{} + } } + } + webhooks := lo.Uniq(lo.Flatten(lo.Values(involvedDisconnects))) + Expect(len(webhooks)).To(BeNumerically(">", 0)) - condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) - Expect(err).To(Succeed()) - notInvolvedDisconnects := map[string]struct{}{} - for _, cond := range condList.Items { - if cond.GetAlertCondition().GetAlertType().GetSystem() != nil { - if slices.Contains(disconnectedIds, cond.GetAlertCondition().GetAlertType().GetSystem().ClusterId.Id) { - endps := lo.Map(cond.AlertCondition.AttachedEndpoints.Items, - func(a *alertingv1.AttachedEndpoint, _ int) string { - return a.EndpointId - }) - involvedDisconnects[cond.GetAlertCondition().Id] = endps - } else { - notInvolvedDisconnects[cond.GetAlertCondition().Id] = struct{}{} + By("verifying the agents are actually disconnected") + Eventually(func() error { + clusters, err := mgmtClient.ListClusters(env.Context(), &managementv1.ListClustersRequest{}) + if err != nil { + return err + } + for _, cl := range clusters.Items { + if slices.Contains(disconnectedIds, cl.GetId()) { + healthStatus, err := mgmtClient.GetClusterHealthStatus(env.Context(), cl.Reference()) + if err != nil { + return err + } + if !healthStatus.Status.Connected == false { + return fmt.Errorf("expected disconnected health status for cluster %s: %s", cl.GetId(), healthStatus.Status.String()) } } } - webhooks := lo.Uniq(lo.Flatten(lo.Values(involvedDisconnects))) - Expect(len(webhooks)).To(BeNumerically(">", 0)) - - By("verifying the agents are actually disconnected") - Eventually(func() error { - clusters, err := mgmtClient.ListClusters(env.Context(), &managementv1.ListClustersRequest{}) + return nil + }, 5*time.Second, 200*time.Millisecond).Should(Succeed()) + + By("verifying the physical servers have received the disconnect messages") + Eventually(func() error { + servers := servers + conditionIds := lo.Keys(involvedDisconnects) + for _, id := range conditionIds { + status, err := alertConditionsClient.AlertConditionStatus(env.Context(), &alertingv1.ConditionReference{Id: id}) if err != nil { return err } - for _, cl := range clusters.Items { - if slices.Contains(disconnectedIds, cl.GetId()) { - healthStatus, err := mgmtClient.GetClusterHealthStatus(env.Context(), cl.Reference()) - if err != nil { - return err - } - if !healthStatus.Status.Connected == false { - return fmt.Errorf("expected disconnected health status for cluster %s: %s", cl.GetId(), healthStatus.Status.String()) - } - } + if status.GetState() != alertingv1.AlertConditionState_Firing { + return fmt.Errorf("expected alerting condition %s to be firing, got %s", id, status.GetState().String()) } - return nil - }, 5*time.Second, 200*time.Millisecond).Should(Succeed()) + } - By("verifying the physical servers have received the disconnect messages") - Eventually(func() error { - servers := servers - conditionIds := lo.Keys(involvedDisconnects) - for _, id := range conditionIds { - status, err := alertConditionsClient.AlertConditionStatus(env.Context(), &alertingv1.ConditionReference{Id: id}) - if err != nil { - return err - } - if status.GetState() != alertingv1.AlertConditionState_Firing { - return fmt.Errorf("expected alerting condition %s to be firing, got %s", id, status.GetState().String()) - } + for id := range notInvolvedDisconnects { + status, err := alertConditionsClient.AlertConditionStatus(env.Context(), &alertingv1.ConditionReference{Id: id}) + if err != nil { + return err } - - for id := range notInvolvedDisconnects { - status, err := alertConditionsClient.AlertConditionStatus(env.Context(), &alertingv1.ConditionReference{Id: id}) - if err != nil { - return err - } - if status.GetState() != alertingv1.AlertConditionState_Ok { - return fmt.Errorf("expected unaffected alerting condition %s to be ok, got %s", id, status.GetState().String()) - } + if status.GetState() != alertingv1.AlertConditionState_Ok { + return fmt.Errorf("expected unaffected alerting condition %s to be ok, got %s", id, status.GetState().String()) } + } - for _, server := range servers { - if slices.Contains(webhooks, server.EndpointId) { - // hard to map these excatly without recreating the internal routing logic from the routers - // since we have dedicated routing integration tests, we can just check that the buffer is not empty - if len(server.GetBuffer()) == 0 { - return fmt.Errorf("expected webhook server %s to have messages, got %d", server.EndpointId, len(server.GetBuffer())) - } + for _, server := range servers { + if slices.Contains(webhooks, server.EndpointId) { + // hard to map these excatly without recreating the internal routing logic from the routers + // since we have dedicated routing integration tests, we can just check that the buffer is not empty + if len(server.GetBuffer()) == 0 { + return fmt.Errorf("expected webhook server %s to have messages, got %d", server.EndpointId, len(server.GetBuffer())) } } - return nil - }, time.Second*60, time.Millisecond*500).Should(Succeed()) + } + return nil + }, time.Second*60, time.Millisecond*500).Should(Succeed()) - By("verifying the notification servers have not received any alarm disconnect messages") - Eventually(func() error { - for _, server := range notificationServers { - if len(server.GetBuffer()) != 0 { - return fmt.Errorf("expected webhook server %s to not have any notifications, got %d", server.EndpointId, len(server.GetBuffer())) - } + By("verifying the notification servers have not received any alarm disconnect messages") + Eventually(func() error { + for _, server := range notificationServers { + if len(server.GetBuffer()) != 0 { + return fmt.Errorf("expected webhook server %s to not have any notifications, got %d", server.EndpointId, len(server.GetBuffer())) } - return nil - }, time.Second*5, time.Second*1).Should(Succeed()) - }) + } + return nil + }, time.Second*5, time.Second*1).Should(Succeed()) + }) - It("should be able to batch list status and filter by status", func() { - condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) - Expect(err).To(Succeed()) + It("should be able to batch list status and filter by status", func() { + condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + Expect(err).To(Succeed()) - statusCondList, err := alertConditionsClient.ListAlertConditionsWithStatus(env.Context(), &alertingv1.ListStatusRequest{}) - Expect(err).To(Succeed()) - Expect(statusCondList.AlertConditions).To(HaveLen(len(condList.Items))) - for condId, cond := range statusCondList.AlertConditions { - if slices.Contains(lo.Keys(involvedDisconnects), condId) { - Expect(cond.Status.State).To(Equal(alertingv1.AlertConditionState_Firing)) - } else { - Expect(cond.Status.State).To(Equal(alertingv1.AlertConditionState_Ok)) - } - } - firingOnlyStatusList, err := alertConditionsClient.ListAlertConditionsWithStatus(env.Context(), &alertingv1.ListStatusRequest{ - States: []alertingv1.AlertConditionState{ - alertingv1.AlertConditionState_Firing, - }, - }) - Expect(err).To(Succeed()) - Expect(firingOnlyStatusList.AlertConditions).To(HaveLen(len(involvedDisconnects))) - for _, cond := range firingOnlyStatusList.AlertConditions { + statusCondList, err := alertConditionsClient.ListAlertConditionsWithStatus(env.Context(), &alertingv1.ListStatusRequest{}) + Expect(err).To(Succeed()) + Expect(statusCondList.AlertConditions).To(HaveLen(len(condList.Items))) + for condId, cond := range statusCondList.AlertConditions { + if slices.Contains(lo.Keys(involvedDisconnects), condId) { Expect(cond.Status.State).To(Equal(alertingv1.AlertConditionState_Firing)) + } else { + Expect(cond.Status.State).To(Equal(alertingv1.AlertConditionState_Ok)) } + } + firingOnlyStatusList, err := alertConditionsClient.ListAlertConditionsWithStatus(env.Context(), &alertingv1.ListStatusRequest{ + States: []alertingv1.AlertConditionState{ + alertingv1.AlertConditionState_Firing, + }, }) + Expect(err).To(Succeed()) + Expect(firingOnlyStatusList.AlertConditions).To(HaveLen(len(involvedDisconnects))) + for _, cond := range firingOnlyStatusList.AlertConditions { + Expect(cond.Status.State).To(Equal(alertingv1.AlertConditionState_Firing)) + } + }) - It("should be able to push notifications to our notification endpoints", func() { - Expect(len(notificationServers)).To(BeNumerically(">", 0)) - By("forwarding the message to AlertManager") - _, err := alertNotificationsClient.PushNotification(env.Context(), &alertingv1.Notification{ - Title: "hello", - Body: "world", - // set to critical in order to expedite the notification during testing - Properties: map[string]string{ - message.NotificationPropertySeverity: alertingv1.OpniSeverity_Critical.String(), - }, - }) - Expect(err).To(Succeed()) - - By("verifying the endpoints have received the notification messages") - Eventually(func() error { - for _, server := range notificationServers { - if len(server.GetBuffer()) == 0 { - return fmt.Errorf("expected webhook server %s to have messages, got %d", server.EndpointId, len(server.GetBuffer())) - } - } - return nil - }, time.Second*60, time.Second).Should(Succeed()) + It("should be able to push notifications to our notification endpoints", func() { + Expect(len(notificationServers)).To(BeNumerically(">", 0)) + By("forwarding the message to AlertManager") + _, err := alertNotificationsClient.PushNotification(env.Context(), &alertingv1.Notification{ + Title: "hello", + Body: "world", + // set to critical in order to expedite the notification during testing + Properties: map[string]string{ + message.NotificationPropertySeverity: alertingv1.OpniSeverity_Critical.String(), + }, }) + Expect(err).To(Succeed()) - It("should be able to list opni messages", func() { - Eventually(func() error { - list, err := alertNotificationsClient.ListNotifications(env.Context(), &alertingv1.ListNotificationRequest{}) - if err != nil { - return err - } - if len(list.Items) == 0 { - return fmt.Errorf("expected to find at least one notification, got 0") + By("verifying the endpoints have received the notification messages") + Eventually(func() error { + for _, server := range notificationServers { + if len(server.GetBuffer()) == 0 { + return fmt.Errorf("expected webhook server %s to have messages, got %d", server.EndpointId, len(server.GetBuffer())) } - return nil - }, time.Second*60, time.Second).Should(Succeed()) + } + return nil + }, time.Second*60, time.Second).Should(Succeed()) + }) - By("verifying we enforce limits") - list, err := alertNotificationsClient.ListNotifications(env.Context(), &alertingv1.ListNotificationRequest{ - Limit: lo.ToPtr(int32(1)), - }) - Expect(err).To(Succeed()) - Expect(len(list.Items)).To(Equal(1)) + It("should be able to list opni messages", func() { + Eventually(func() error { + list, err := alertNotificationsClient.ListNotifications(env.Context(), &alertingv1.ListNotificationRequest{}) + if err != nil { + return err + } + if len(list.Items) == 0 { + return fmt.Errorf("expected to find at least one notification, got 0") + } + return nil + }, time.Second*60, time.Second).Should(Succeed()) + + By("verifying we enforce limits") + list, err := alertNotificationsClient.ListNotifications(env.Context(), &alertingv1.ListNotificationRequest{ + Limit: lo.ToPtr(int32(1)), }) + Expect(err).To(Succeed()) + Expect(len(list.Items)).To(Equal(1)) + }) - It("should return warnings when trying to edit/delete alert endpoints that are involved in conditions", func() { - webhooks := lo.Uniq(lo.Flatten(lo.Values(involvedDisconnects))) - Expect(len(webhooks)).To(BeNumerically(">", 0)) + It("should return warnings when trying to edit/delete alert endpoints that are involved in conditions", func() { + webhooks := lo.Uniq(lo.Flatten(lo.Values(involvedDisconnects))) + Expect(len(webhooks)).To(BeNumerically(">", 0)) - for _, webhook := range webhooks { - involvedConditions, err := alertEndpointsClient.UpdateAlertEndpoint(env.Context(), &alertingv1.UpdateAlertEndpointRequest{ - Id: &corev1.Reference{ - Id: webhook, - }, - UpdateAlert: &alertingv1.AlertEndpoint{ - Name: "update", - Description: "update", - Endpoint: &alertingv1.AlertEndpoint_Webhook{ - Webhook: &alertingv1.WebhookEndpoint{ - Url: "http://example.com", - }, + for _, webhook := range webhooks { + involvedConditions, err := alertEndpointsClient.UpdateAlertEndpoint(env.Context(), &alertingv1.UpdateAlertEndpointRequest{ + Id: &corev1.Reference{ + Id: webhook, + }, + UpdateAlert: &alertingv1.AlertEndpoint{ + Name: "update", + Description: "update", + Endpoint: &alertingv1.AlertEndpoint_Webhook{ + Webhook: &alertingv1.WebhookEndpoint{ + Url: "http://example.com", }, - Id: "id", }, - ForceUpdate: false, - }) - Expect(err).NotTo(HaveOccurred()) + Id: "id", + }, + ForceUpdate: false, + }) + Expect(err).NotTo(HaveOccurred()) - Expect(involvedConditions.Items).NotTo(HaveLen(0)) - involvedConditions, err = alertEndpointsClient.DeleteAlertEndpoint(env.Context(), &alertingv1.DeleteAlertEndpointRequest{ - Id: &corev1.Reference{ - Id: webhook, - }, - ForceDelete: false, - }) - Expect(err).NotTo(HaveOccurred()) - Expect(involvedConditions.Items).NotTo(HaveLen(0)) - } - }) + Expect(involvedConditions.Items).NotTo(HaveLen(0)) + involvedConditions, err = alertEndpointsClient.DeleteAlertEndpoint(env.Context(), &alertingv1.DeleteAlertEndpointRequest{ + Id: &corev1.Reference{ + Id: webhook, + }, + ForceDelete: false, + }) + Expect(err).NotTo(HaveOccurred()) + Expect(involvedConditions.Items).NotTo(HaveLen(0)) + } + }) - It("should have a functional timeline", func() { - condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) - Expect(err).To(Succeed()) + It("should have a functional timeline", func() { + condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + Expect(err).To(Succeed()) - // By("verifying the timeline shows only the firing conditions ") - Eventually(func() error { - timeline, err := alertConditionsClient.Timeline(env.Context(), &alertingv1.TimelineRequest{ - LookbackWindow: durationpb.New(time.Minute * 5), - }) - if err != nil { - return err - } + // By("verifying the timeline shows only the firing conditions ") + Eventually(func() error { + timeline, err := alertConditionsClient.Timeline(env.Context(), &alertingv1.TimelineRequest{ + LookbackWindow: durationpb.New(time.Minute * 5), + }) + if err != nil { + return err + } - By("verifying the timeline matches the conditions") - if len(timeline.Items) != len(condList.Items) { - return fmt.Errorf("expected timeline to have %d items, got %d", len(condList.Items), len(timeline.Items)) - } + By("verifying the timeline matches the conditions") + if len(timeline.Items) != len(condList.Items) { + return fmt.Errorf("expected timeline to have %d items, got %d", len(condList.Items), len(timeline.Items)) + } - for id, item := range timeline.GetItems() { - if slices.Contains(lo.Keys(involvedDisconnects), id) { - if len(item.Windows) == 0 { - return fmt.Errorf("firing condition should show up on timeline, but does not") - } - if len(item.Windows) != 1 { - return fmt.Errorf("condition evaluation is flaky, should only have one window, but has %d", len(item.Windows)) - } - messages, err := alertNotificationsClient.ListAlarmMessages(env.Context(), &alertingv1.ListAlarmMessageRequest{ - ConditionId: &alertingv1.ConditionReference{ - Id: id, - }, - Fingerprints: item.Windows[0].Fingerprints, - Start: item.Windows[0].Start, - End: timestamppb.Now(), - }) - if err != nil { - return err - } - if !(len(messages.Items) > 0) { - return fmt.Errorf("expected firing condition to have cached messages") - } - Expect(len(messages.Items)).To(BeNumerically(">", 0)) + for id, item := range timeline.GetItems() { + if slices.Contains(lo.Keys(involvedDisconnects), id) { + if len(item.Windows) == 0 { + return fmt.Errorf("firing condition should show up on timeline, but does not") + } + if len(item.Windows) != 1 { + return fmt.Errorf("condition evaluation is flaky, should only have one window, but has %d", len(item.Windows)) + } + messages, err := alertNotificationsClient.ListAlarmMessages(env.Context(), &alertingv1.ListAlarmMessageRequest{ + ConditionId: &alertingv1.ConditionReference{ + Id: id, + }, + Fingerprints: item.Windows[0].Fingerprints, + Start: item.Windows[0].Start, + End: timestamppb.Now(), + }) + if err != nil { + return err + } + if !(len(messages.Items) > 0) { + return fmt.Errorf("expected firing condition to have cached messages") + } + Expect(len(messages.Items)).To(BeNumerically(">", 0)) - } else { - if len(item.Windows) != 0 { - return fmt.Errorf("conditions that have not fired should not show up on timeline, but do") - } + } else { + if len(item.Windows) != 0 { + return fmt.Errorf("conditions that have not fired should not show up on timeline, but do") } } - return nil - }, time.Second*15, time.Second).Should(Succeed()) - }) + } + return nil + }, time.Second*15, time.Second).Should(Succeed()) + }) - Specify("the alertmanager proxy served by the Gateway HTTP port should be able to list the alarms", func() { - alertingProxyGET := fmt.Sprintf("https://%s/plugin_alerting/alertmanager/api/v2/alerts/groups", env.GatewayConfig().Spec.HTTPListenAddress) - req, err := http.NewRequestWithContext(env.Context(), http.MethodGet, alertingProxyGET, nil) - Expect(err).To(Succeed()) + Specify("the alertmanager proxy served by the Gateway HTTP port should be able to list the alarms", func() { + alertingProxyGET := fmt.Sprintf("https://%s/plugin_alerting/alertmanager/api/v2/alerts/groups", env.GatewayConfig().Spec.HTTPListenAddress) + req, err := http.NewRequestWithContext(env.Context(), http.MethodGet, alertingProxyGET, nil) + Expect(err).To(Succeed()) - Eventually(func() error { - resp, err := httpProxyClient.Do(req) - if err != nil { - return err - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("expected proxy to return status OK, instead got : %d, %s", resp.StatusCode, resp.Status) - } - res := alertmanagerv2.AlertGroups{} - if err := json.NewDecoder(resp.Body).Decode(&res); err != nil { - return err - } - if len(res) == 0 { - return fmt.Errorf("expected to get non-empty alertgroup from alertmanager") - } - return nil - }).Should(Succeed()) - }) + Eventually(func() error { + resp, err := httpProxyClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("expected proxy to return status OK, instead got : %d, %s", resp.StatusCode, resp.Status) + } + res := alertmanagerv2.AlertGroups{} + if err := json.NewDecoder(resp.Body).Decode(&res); err != nil { + return err + } + if len(res) == 0 { + return fmt.Errorf("expected to get non-empty alertgroup from alertmanager") + } + return nil + }).Should(Succeed()) + }) - It("should sync friendly cluster names to conditions", func() { - friendlyName := "friendly-name" - agentId := agents[0].id - cl, err := mgmtClient.GetCluster(env.Context(), &corev1.Reference{Id: agentId}) - Expect(err).To(Succeed()) + It("should sync friendly cluster names to conditions", func() { + friendlyName := "friendly-name" + agentId := agents[0].id + cl, err := mgmtClient.GetCluster(env.Context(), &corev1.Reference{Id: agentId}) + Expect(err).To(Succeed()) + + _, err = mgmtClient.EditCluster(env.Context(), &managementv1.EditClusterRequest{ + Cluster: &corev1.Reference{Id: agentId}, + Labels: lo.Assign(cl.GetLabels(), map[string]string{ + corev1.NameLabel: friendlyName, + }), + }) + Expect(err).To(Succeed()) - _, err = mgmtClient.EditCluster(env.Context(), &managementv1.EditClusterRequest{ - Cluster: &corev1.Reference{Id: agentId}, - Labels: lo.Assign(cl.GetLabels(), map[string]string{ - corev1.NameLabel: friendlyName, - }), + Eventually(func() error { + conds, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ + Clusters: []string{agentId}, }) - Expect(err).To(Succeed()) + if err != nil { + return err + } - Eventually(func() error { - conds, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{ - Clusters: []string{agentId}, - }) - if err != nil { - return err + for _, cond := range conds.Items { + name, ok := cond.GetAlertCondition().Annotations[message.NotificationContentClusterName] + if !ok { + return fmt.Errorf("expected alert condition to contain annotations with cluster name") } - - for _, cond := range conds.Items { - name, ok := cond.GetAlertCondition().Annotations[message.NotificationContentClusterName] - if !ok { - return fmt.Errorf("expected alert condition to contain annotations with cluster name") - } - if name != friendlyName { - return fmt.Errorf("expected alert condition to contain annotations with cluster name %s, got %s", friendlyName, name) - } + if name != friendlyName { + return fmt.Errorf("expected alert condition to contain annotations with cluster name %s, got %s", friendlyName, name) } - return nil - }, time.Second*5, time.Millisecond*300).Should(Succeed()) + } + return nil + }, time.Second*5, time.Millisecond*300).Should(Succeed()) - }) + }) - It("should force update/delete alert endpoints involved in conditions", func() { - By("verifying we can edit Alert Endpoints in use by Alert Conditions") - endpList, err := alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) - Expect(err).NotTo(HaveOccurred()) - Expect(len(endpList.Items)).To(BeNumerically(">", 0)) - Expect(endpList.Items).To(HaveLen(numServers + numNotificationServers)) - for _, endp := range endpList.Items { - _, err := alertEndpointsClient.UpdateAlertEndpoint(env.Context(), &alertingv1.UpdateAlertEndpointRequest{ - Id: &corev1.Reference{ - Id: endp.Id.Id, - }, - UpdateAlert: &alertingv1.AlertEndpoint{ - Name: "update", - Description: "update", - Endpoint: &alertingv1.AlertEndpoint_Webhook{ - Webhook: &alertingv1.WebhookEndpoint{ - Url: "http://example.com", - }, + It("should force update/delete alert endpoints involved in conditions", func() { + By("verifying we can edit Alert Endpoints in use by Alert Conditions") + endpList, err := alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) + Expect(err).NotTo(HaveOccurred()) + Expect(len(endpList.Items)).To(BeNumerically(">", 0)) + Expect(endpList.Items).To(HaveLen(numServers + numNotificationServers)) + for _, endp := range endpList.Items { + _, err := alertEndpointsClient.UpdateAlertEndpoint(env.Context(), &alertingv1.UpdateAlertEndpointRequest{ + Id: &corev1.Reference{ + Id: endp.Id.Id, + }, + UpdateAlert: &alertingv1.AlertEndpoint{ + Name: "update", + Description: "update", + Endpoint: &alertingv1.AlertEndpoint_Webhook{ + Webhook: &alertingv1.WebhookEndpoint{ + Url: "http://example.com", }, - Id: endp.Id.Id, }, - ForceUpdate: true, - }) - Expect(err).NotTo(HaveOccurred()) - } - endpList, err = alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) - Expect(err).NotTo(HaveOccurred()) - Expect(len(endpList.Items)).To(BeNumerically(">", 0)) - Expect(endpList.Items).To(HaveLen(numServers + numNotificationServers)) - updatedList := lo.Filter(endpList.Items, func(item *alertingv1.AlertEndpointWithId, _ int) bool { - if item.Endpoint.GetWebhook() != nil { - return item.Endpoint.GetName() == "update" && item.Endpoint.GetDescription() == "update" - } - return false + Id: endp.Id.Id, + }, + ForceUpdate: true, }) - Expect(updatedList).To(HaveLen(len(endpList.Items))) - - By("verifying we can delete Alert Endpoint in use by Alert Conditions") - for _, endp := range endpList.Items { - _, err := alertEndpointsClient.DeleteAlertEndpoint(env.Context(), &alertingv1.DeleteAlertEndpointRequest{ - Id: &corev1.Reference{ - Id: endp.Id.Id, - }, - ForceDelete: true, - }) - Expect(err).NotTo(HaveOccurred()) - } - endpList, err = alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) Expect(err).NotTo(HaveOccurred()) - Expect(endpList.Items).To(HaveLen(0)) - - condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) - Expect(err).NotTo(HaveOccurred()) - Expect(condList.Items).NotTo(HaveLen(0)) - hasEndpoints := lo.Filter(condList.Items, func(item *alertingv1.AlertConditionWithId, _ int) bool { - if item.AlertCondition.AttachedEndpoints != nil { - return len(item.AlertCondition.AttachedEndpoints.Items) != 0 - } - return false - }) - Expect(hasEndpoints).To(HaveLen(0)) + } + endpList, err = alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) + Expect(err).NotTo(HaveOccurred()) + Expect(len(endpList.Items)).To(BeNumerically(">", 0)) + Expect(endpList.Items).To(HaveLen(numServers + numNotificationServers)) + updatedList := lo.Filter(endpList.Items, func(item *alertingv1.AlertEndpointWithId, _ int) bool { + if item.Endpoint.GetWebhook() != nil { + return item.Endpoint.GetName() == "update" && item.Endpoint.GetDescription() == "update" + } + return false }) + Expect(updatedList).To(HaveLen(len(endpList.Items))) - It("should delete the downstream agents", func() { - client := env.NewManagementClient() - agents, err := client.ListClusters(env.Context(), &managementv1.ListClustersRequest{}) + By("verifying we can delete Alert Endpoint in use by Alert Conditions") + for _, endp := range endpList.Items { + _, err := alertEndpointsClient.DeleteAlertEndpoint(env.Context(), &alertingv1.DeleteAlertEndpointRequest{ + Id: &corev1.Reference{ + Id: endp.Id.Id, + }, + ForceDelete: true, + }) Expect(err).NotTo(HaveOccurred()) - for _, agent := range agents.Items { - _, err := client.DeleteCluster(env.Context(), agent.Reference()) - Expect(err).NotTo(HaveOccurred()) + } + endpList, err = alertEndpointsClient.ListAlertEndpoints(env.Context(), &alertingv1.ListAlertEndpointsRequest{}) + Expect(err).NotTo(HaveOccurred()) + Expect(endpList.Items).To(HaveLen(0)) + + condList, err := alertConditionsClient.ListAlertConditions(env.Context(), &alertingv1.ListAlertConditionRequest{}) + Expect(err).NotTo(HaveOccurred()) + Expect(condList.Items).NotTo(HaveLen(0)) + hasEndpoints := lo.Filter(condList.Items, func(item *alertingv1.AlertConditionWithId, _ int) bool { + if item.AlertCondition.AttachedEndpoints != nil { + return len(item.AlertCondition.AttachedEndpoints.Items) != 0 } + return false }) + Expect(hasEndpoints).To(HaveLen(0)) + }) - It("should uninstall the alerting cluster", func() { - _, err := alertClusterClient.UninstallCluster(env.Context(), &alertops.UninstallRequest{ - DeleteData: true, - }) - Expect(err).To(BeNil()) + It("should delete the downstream agents", func() { + client := env.NewManagementClient() + agents, err := client.ListClusters(env.Context(), &managementv1.ListClustersRequest{}) + Expect(err).NotTo(HaveOccurred()) + for _, agent := range agents.Items { + _, err := client.DeleteCluster(env.Context(), agent.Reference()) + Expect(err).NotTo(HaveOccurred()) + } + }) + + It("should uninstall the alerting cluster", func() { + _, err := alertClusterClient.UninstallCluster(env.Context(), &alertops.UninstallRequest{ + DeleteData: true, }) - } - }) + Expect(err).To(BeNil()) + }) + } }) -} +}) diff --git a/test/plugins/alerting/routing_test.go b/test/plugins/alerting/routing_test.go index 3768c79842..ad083ea688 100644 --- a/test/plugins/alerting/routing_test.go +++ b/test/plugins/alerting/routing_test.go @@ -18,9 +18,9 @@ import ( "github.com/rancher/opni/pkg/alerting/drivers/routing" "github.com/rancher/opni/pkg/alerting/shared" alertingv1 "github.com/rancher/opni/pkg/apis/alerting/v1" + "github.com/rancher/opni/pkg/test" "github.com/rancher/opni/pkg/test/alerting" "github.com/rancher/opni/pkg/test/freeport" - "github.com/rancher/opni/pkg/test/testruntime" "github.com/rancher/opni/pkg/util" "github.com/samber/lo" "golang.org/x/exp/slices" @@ -29,225 +29,226 @@ import ( var defaultHook *alerting.MockIntegrationWebhookServer -func init() { - testruntime.IfIntegration(func() { - BuildRoutingLogicTest( - func() routing.OpniRouting { - defaultHooks := alerting.NewWebhookMemoryServer(env, "webhook") - defaultHook = defaultHooks - cfg := config.WebhookConfig{ - NotifierConfig: config.NotifierConfig{ - VSendResolved: false, - }, - URL: &amCfg.URL{ - URL: util.Must(url.Parse(defaultHook.GetWebhook())), - }, - } - return routing.NewOpniRouterV1(cfg) - }, - ) +var _ = Describe("Alerting routing logic translation to physical dispatching", Ordered, Label("integration"), func() { + var alertingClient client.AlertingClient + var alertingClient2 client.AlertingClient + var alertingClient3 client.AlertingClient + var router routing.OpniRouting + var env *test.Environment + var tmpConfigDir string + + BeforeAll(func() { + env = &test.Environment{} + Expect(env).NotTo(BeNil()) + Expect(env.Start()).To(Succeed()) + DeferCleanup(env.Stop) + tmpConfigDir = env.GenerateNewTempDirectory("alertmanager-config") + Expect(tmpConfigDir).NotTo(Equal("")) + + routerConstructor := func() routing.OpniRouting { + defaultHooks := alerting.NewWebhookMemoryServer(env, "webhook") + defaultHook = defaultHooks + cfg := config.WebhookConfig{ + NotifierConfig: config.NotifierConfig{ + VSendResolved: false, + }, + URL: &amCfg.URL{ + URL: util.Must(url.Parse(defaultHook.GetWebhook())), + }, + } + return routing.NewOpniRouterV1(cfg) + } + + router = routerConstructor() + Expect(router).NotTo(BeNil()) }) -} -func BuildRoutingLogicTest( - routerConstructor func() routing.OpniRouting, -) bool { - return Describe("Alerting routing logic translation to physical dispatching", Ordered, Label("integration"), func() { - var alertingClient client.AlertingClient - var alertingClient2 client.AlertingClient - var alertingClient3 client.AlertingClient - When("setting namespace specs on the routing tree", func() { - step := "initial" - var router routing.OpniRouting - BeforeAll(func() { - Expect(env).NotTo(BeNil()) - router = routerConstructor() - Expect(router).NotTo(BeNil()) - }) - AfterEach(func() { - By(fmt.Sprintf("%s step: expecting that the router can build the config", step)) - currentCfg, err := router.BuildConfig() - Expect(err).To(Succeed()) - By(fmt.Sprintf("%s step: expecting that the formed alertmanager config is correct", step)) - fp := freeport.GetFreePort() + When("setting namespace specs on the routing tree", func() { + step := "initial" - alerting.ExpectAlertManagerConfigToBeValid(env.Context(), env, tmpConfigDir, step+".yaml", currentCfg, fp) - }) + AfterEach(func() { + By(fmt.Sprintf("%s step: expecting that the router can build the config", step)) + currentCfg, err := router.BuildConfig() + Expect(err).To(Succeed()) + By(fmt.Sprintf("%s step: expecting that the formed alertmanager config is correct", step)) + fp := freeport.GetFreePort() - It("should be able to dynamically update alert routing", func() { - step = "dynamic-alert-routing" - tmpConfigDir := env.GenerateNewTempDirectory("webhook") - err := os.MkdirAll(tmpConfigDir, 0755) - Expect(err).To(Succeed()) - By("Creating some test webhook servers") + alerting.ExpectAlertManagerConfigToBeValid(env.Context(), env, tmpConfigDir, step+".yaml", currentCfg, fp) + }) - servers := alerting.CreateWebhookServer(env, 3) - server1, server2, server3 := servers[0], servers[1], servers[2] + It("should be able to dynamically update alert routing", func() { + step = "dynamic-alert-routing" + tmpConfigDir := env.GenerateNewTempDirectory("webhook") + err := os.MkdirAll(tmpConfigDir, 0755) + Expect(err).To(Succeed()) + By("Creating some test webhook servers") - condId1, condId2, condId3 := uuid.New().String(), uuid.New().String(), uuid.New().String() - ns := "test" - By("routing to a subset of the test webhook servers") - details1 := &alertingv1.EndpointImplementation{ - Title: "test1", - Body: "test1", - } - details2 := &alertingv1.EndpointImplementation{ - Title: "test2", - Body: "test2", - } - details3 := &alertingv1.EndpointImplementation{ - Title: "test3", - Body: "test3", - } - suiteSpec := &testSpecSuite{ - name: "dynamic-alert-routing", - defaultServer: defaultHook, - specs: []*testSpec{ - { - namespace: ns, - id: condId1, - servers: []*alerting.MockIntegrationWebhookServer{server1}, - details: details1, - }, - { - namespace: ns, - id: condId2, - servers: []*alerting.MockIntegrationWebhookServer{server1, server2}, - details: details2, - }, - { - namespace: ns, - id: condId3, - servers: []*alerting.MockIntegrationWebhookServer{server1, server2, server3}, - details: details3, - }, - }, - } - By("setting the router to the namespace specs") - for _, spec := range suiteSpec.specs { - endpoints := lo.Map( - spec.servers, - func(server *alerting.MockIntegrationWebhookServer, _ int) *alertingv1.FullAttachedEndpoint { - return &alertingv1.FullAttachedEndpoint{ - AlertEndpoint: server.Endpoint(), - EndpointId: server.Endpoint().Id, - Details: spec.details, - } - }) - err = router.SetNamespaceSpec( - spec.namespace, - spec.id, - &alertingv1.FullAttachedEndpoints{ - Items: endpoints, - Details: spec.details, - InitialDelay: durationpb.New(time.Second * 1), - ThrottlingDuration: durationpb.New(time.Second * 1), - }, - ) - Expect(err).To(Succeed()) - } + servers := alerting.CreateWebhookServer(env, 3) + server1, server2, server3 := servers[0], servers[1], servers[2] - By("running alertmanager with this config") - amPort, ca := alerting.RunAlertManager(env, router, tmpConfigDir, step+".yaml") - alertingClient = client.NewClient( - nil, - fmt.Sprintf("http://localhost:%d", amPort), - fmt.Sprintf("http://localhost:%d", 0), - ) - defer ca() - By("sending alerts to each condition in the router") - for _, spec := range suiteSpec.specs { - err := alertingClient.AlertClient().PostAlarm(context.TODO(), client.AlertObject{ - Id: spec.id, - Labels: map[string]string{ - ns: spec.id, - }, - Annotations: map[string]string{}, + condId1, condId2, condId3 := uuid.New().String(), uuid.New().String(), uuid.New().String() + ns := "test" + By("routing to a subset of the test webhook servers") + details1 := &alertingv1.EndpointImplementation{ + Title: "test1", + Body: "test1", + } + details2 := &alertingv1.EndpointImplementation{ + Title: "test2", + Body: "test2", + } + details3 := &alertingv1.EndpointImplementation{ + Title: "test3", + Body: "test3", + } + suiteSpec := &testSpecSuite{ + name: "dynamic-alert-routing", + defaultServer: defaultHook, + specs: []*testSpec{ + { + namespace: ns, + id: condId1, + servers: []*alerting.MockIntegrationWebhookServer{server1}, + details: details1, + }, + { + namespace: ns, + id: condId2, + servers: []*alerting.MockIntegrationWebhookServer{server1, server2}, + details: details2, + }, + { + namespace: ns, + id: condId3, + servers: []*alerting.MockIntegrationWebhookServer{server1, server2, server3}, + details: details3, + }, + }, + } + By("setting the router to the namespace specs") + for _, spec := range suiteSpec.specs { + endpoints := lo.Map( + spec.servers, + func(server *alerting.MockIntegrationWebhookServer, _ int) *alertingv1.FullAttachedEndpoint { + return &alertingv1.FullAttachedEndpoint{ + AlertEndpoint: server.Endpoint(), + EndpointId: server.Endpoint().Id, + Details: spec.details, + } }) - Expect(err).To(Succeed()) - } - Eventually(func() error { - return suiteSpec.ExpectAlertsToBeRouted(amPort) - }, time.Second*30, time.Second*1).Should(Succeed()) - ca() - server1.ClearBuffer() - server2.ClearBuffer() - server3.ClearBuffer() - defaultHook.ClearBuffer() + err = router.SetNamespaceSpec( + spec.namespace, + spec.id, + &alertingv1.FullAttachedEndpoints{ + Items: endpoints, + Details: spec.details, + InitialDelay: durationpb.New(time.Second * 1), + ThrottlingDuration: durationpb.New(time.Second * 1), + }, + ) + Expect(err).To(Succeed()) + } - By("deleting a random server endpoint") - // ok - err = router.DeleteEndpoint(suiteSpec.specs[0].servers[0].Endpoint().Id) + By("running alertmanager with this config") + amPort, ca := alerting.RunAlertManager(env, router, tmpConfigDir, step+".yaml") + alertingClient = client.NewClient( + nil, + fmt.Sprintf("http://localhost:%d", amPort), + fmt.Sprintf("http://localhost:%d", 0), + ) + defer ca() + By("sending alerts to each condition in the router") + for _, spec := range suiteSpec.specs { + err := alertingClient.AlertClient().PostAlarm(context.TODO(), client.AlertObject{ + Id: spec.id, + Labels: map[string]string{ + ns: spec.id, + }, + Annotations: map[string]string{}, + }) Expect(err).To(Succeed()) - for _, spec := range suiteSpec.specs { - spec.servers = spec.servers[1:] - } + } + Eventually(func() error { + return suiteSpec.ExpectAlertsToBeRouted(amPort) + }, time.Second*30, time.Second*1).Should(Succeed()) + ca() + server1.ClearBuffer() + server2.ClearBuffer() + server3.ClearBuffer() + defaultHook.ClearBuffer() - amPort2, ca2 := alerting.RunAlertManager(env, router, tmpConfigDir, step+".yaml") - alertingClient2 = client.NewClient( - nil, - fmt.Sprintf("http://localhost:%d", amPort2), - fmt.Sprintf("http://localhost:%d", 0), - ) - defer ca2() - By("sending alerts to each condition in the router") - for _, spec := range suiteSpec.specs { - err := alertingClient2.AlertClient().PostAlarm(context.TODO(), client.AlertObject{ - Id: spec.id, - Labels: map[string]string{ - ns: spec.id, - }, - Annotations: map[string]string{}, - }) - Expect(err).To(Succeed()) - } - Eventually(func() error { - return suiteSpec.ExpectAlertsToBeRouted(amPort2) - }, time.Second*30, time.Second*1).Should(Succeed()) - ca2() + By("deleting a random server endpoint") + // ok + err = router.DeleteEndpoint(suiteSpec.specs[0].servers[0].Endpoint().Id) + Expect(err).To(Succeed()) + for _, spec := range suiteSpec.specs { + spec.servers = spec.servers[1:] + } - By("updating an endpoint to another endpoint") + amPort2, ca2 := alerting.RunAlertManager(env, router, tmpConfigDir, step+".yaml") + alertingClient2 = client.NewClient( + nil, + fmt.Sprintf("http://localhost:%d", amPort2), + fmt.Sprintf("http://localhost:%d", 0), + ) + defer ca2() + By("sending alerts to each condition in the router") + for _, spec := range suiteSpec.specs { + err := alertingClient2.AlertClient().PostAlarm(context.TODO(), client.AlertObject{ + Id: spec.id, + Labels: map[string]string{ + ns: spec.id, + }, + Annotations: map[string]string{}, + }) + Expect(err).To(Succeed()) + } + Eventually(func() error { + return suiteSpec.ExpectAlertsToBeRouted(amPort2) + }, time.Second*30, time.Second*1).Should(Succeed()) + ca2() - server1.ClearBuffer() - server2.ClearBuffer() - server3.ClearBuffer() - defaultHook.ClearBuffer() + By("updating an endpoint to another endpoint") - err = router.UpdateEndpoint(server2.Endpoint().Id, server1.Endpoint()) - Expect(err).To(Succeed()) - for _, spec := range suiteSpec.specs { - if len(spec.servers) != 0 { - spec.servers[0] = server1 - } - } + server1.ClearBuffer() + server2.ClearBuffer() + server3.ClearBuffer() + defaultHook.ClearBuffer() - By("send an an alert to each specs") - amPort3, ca3 := alerting.RunAlertManager(env, router, tmpConfigDir, step+".yaml") - defer ca3() - alertingClient3 = client.NewClient( - nil, - fmt.Sprintf("http://localhost:%d", amPort3), - fmt.Sprintf("http://localhost:%d", 0), - ) - By("sending alerts to each condition in the router") - for _, spec := range suiteSpec.specs { - err := alertingClient3.AlertClient().PostAlarm(context.TODO(), client.AlertObject{ - Id: spec.id, - Labels: map[string]string{ - ns: spec.id, - }, - Annotations: map[string]string{}, - }) - Expect(err).To(Succeed()) + err = router.UpdateEndpoint(server2.Endpoint().Id, server1.Endpoint()) + Expect(err).To(Succeed()) + for _, spec := range suiteSpec.specs { + if len(spec.servers) != 0 { + spec.servers[0] = server1 } - Eventually(func() error { - return suiteSpec.ExpectAlertsToBeRouted(amPort3) - }, time.Second*30, time.Second*1).Should(Succeed()) - ca3() - }) + } + + By("send an an alert to each specs") + amPort3, ca3 := alerting.RunAlertManager(env, router, tmpConfigDir, step+".yaml") + defer ca3() + alertingClient3 = client.NewClient( + nil, + fmt.Sprintf("http://localhost:%d", amPort3), + fmt.Sprintf("http://localhost:%d", 0), + ) + By("sending alerts to each condition in the router") + for _, spec := range suiteSpec.specs { + err := alertingClient3.AlertClient().PostAlarm(context.TODO(), client.AlertObject{ + Id: spec.id, + Labels: map[string]string{ + ns: spec.id, + }, + Annotations: map[string]string{}, + }) + Expect(err).To(Succeed()) + } + Eventually(func() error { + return suiteSpec.ExpectAlertsToBeRouted(amPort3) + }, time.Second*30, time.Second*1).Should(Succeed()) + ca3() }) }) -} +}) type testSpecSuite struct { name string diff --git a/test/plugins/alerting/slo_test.go b/test/plugins/alerting/slo_test.go index 756ffee1ee..6c61f78ec0 100644 --- a/test/plugins/alerting/slo_test.go +++ b/test/plugins/alerting/slo_test.go @@ -33,6 +33,7 @@ const ( var _ = Describe("SLO Alerting", Ordered, Label("integration"), func() { var sloClient slov1.SLOClient + var env *test.Environment BeforeAll(func() { env = &test.Environment{} Expect(env).NotTo(BeNil())