From 19a6ce34bf37ee41dc526d5d4a4da8119d696705 Mon Sep 17 00:00:00 2001 From: Mohamed Mahmoud Date: Fri, 28 Oct 2022 17:23:43 -0400 Subject: [PATCH 1/4] OCPBUGS-2934: unlink events unix socket during shutdown to be to reuse unix socket we have to unlink them when the events container is shuting down Signed-off-by: Mohamed Mahmoud --- cmd/syslog/syslog.go | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/cmd/syslog/syslog.go b/cmd/syslog/syslog.go index df90dbe7..aa1f903d 100644 --- a/cmd/syslog/syslog.go +++ b/cmd/syslog/syslog.go @@ -3,6 +3,9 @@ package main import ( "fmt" "log" + "os" + "os/signal" + "syscall" "gopkg.in/mcuadros/go-syslog.v2" ) @@ -21,13 +24,27 @@ func main() { server.SetHandler(handler) if err := server.ListenUnixgram(listenAddress); err != nil { - log.Fatal(err) + log.Fatalf("failed to listen to syslog unix socket %s, err %s", listenAddress, err) } if err := server.Boot(); err != nil { - log.Fatal(err) + log.Fatalf("failed to boot syslog server err %s", err) } + // Unix sockets must be unlink()ed before being reused again. + // Handle common process-killing signals so we can gracefully shut down: + sigc := make(chan os.Signal, 1) + signal.Notify(sigc, os.Interrupt, os.Kill, syscall.SIGTERM) + go func(c chan os.Signal) { + // Wait for a SIGINT or SIGKILL: + sig := <-c + log.Printf("Caught signal %s: shutting down.", sig) + // Stop listening (and unlink the socket if unix type): + _ = server.Kill() + _ = os.Remove(listenAddress) + os.Exit(0) + }(sigc) + go func(channel syslog.LogPartsChannel) { for logParts := range channel { fmt.Printf("%s %s %s\n", logParts["timestamp"], logParts["hostname"], logParts["content"]) From 24c6846dc6ba3dc29021ed0fbac0f88f50f5483a Mon Sep 17 00:00:00 2001 From: Martin Kennelly Date: Mon, 24 Oct 2022 10:58:02 +0100 Subject: [PATCH 2/4] E2E tests: add basic disruption tests Signed-off-by: Martin Kennelly --- test/e2e/daemonset/daemonset.go | 28 +- test/e2e/deployment/deployment.go | 64 ++++ test/e2e/functional/tests/e2e.go | 352 +++++++++++++----- .../ingress-node-firewall.go | 10 + test/e2e/pods/pods.go | 21 +- 5 files changed, 374 insertions(+), 101 deletions(-) create mode 100644 test/e2e/deployment/deployment.go diff --git a/test/e2e/daemonset/daemonset.go b/test/e2e/daemonset/daemonset.go index 26caade3..a8074eb9 100644 --- a/test/e2e/daemonset/daemonset.go +++ b/test/e2e/daemonset/daemonset.go @@ -15,11 +15,33 @@ import ( "k8s.io/apimachinery/pkg/util/wait" ) -func WaitForDaemonSetReady(client *testclient.ClientSet, ds *appsv1.DaemonSet, namespace, name string, retryInterval, timeout time.Duration) error { +func GetDaemonSet(client *testclient.ClientSet, namespace, name string, timeout time.Duration) (*appsv1.DaemonSet, error) { + var daemonSet appsv1.DaemonSet + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + err := client.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, &daemonSet) + return &daemonSet, err +} + +func GetDaemonSetWithRetry(client *testclient.ClientSet, namespace, name string, retryInterval, timeout time.Duration) (*appsv1.DaemonSet, error) { + var daemonSet *appsv1.DaemonSet + err := wait.PollImmediate(retryInterval, timeout, func() (done bool, err error) { + if daemonSet, err = GetDaemonSet(client, namespace, name, timeout); err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + return true, err + }) + return daemonSet, err +} + +func WaitForDaemonSetReady(client *testclient.ClientSet, ds *appsv1.DaemonSet, retryInterval, timeout time.Duration) error { err := wait.PollImmediate(retryInterval, timeout, func() (done bool, err error) { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() - err = client.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, ds) + err = client.Get(ctx, types.NamespacedName{Name: ds.Name, Namespace: ds.Namespace}, ds) if err != nil { if errors.IsNotFound(err) { return false, nil @@ -33,7 +55,7 @@ func WaitForDaemonSetReady(client *testclient.ClientSet, ds *appsv1.DaemonSet, n } }) if err != nil { - return fmt.Errorf("failed to wait for daemonset %s in namespace %s to be ready: %v", ds.Name, namespace, err) + return fmt.Errorf("failed to wait for daemonset %s in namespace %s to be ready: %v", ds.Name, ds.Namespace, err) } return nil diff --git a/test/e2e/deployment/deployment.go b/test/e2e/deployment/deployment.go new file mode 100644 index 00000000..fda7bb7f --- /dev/null +++ b/test/e2e/deployment/deployment.go @@ -0,0 +1,64 @@ +package deployment + +import ( + "context" + "fmt" + "time" + + testclient "github.com/openshift/ingress-node-firewall/test/e2e/client" + + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" +) + +func GetDeployment(client *testclient.ClientSet, namespace, name string, timeout time.Duration) (*appsv1.Deployment, error) { + var deployment appsv1.Deployment + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + err := client.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, &deployment) + return &deployment, err +} + +func GetDeploymentWithRetry(client *testclient.ClientSet, namespace, name string, retryInterval, timeout time.Duration) (*appsv1.Deployment, error) { + var deployment *appsv1.Deployment + err := wait.PollImmediate(retryInterval, timeout, func() (done bool, err error) { + if deployment, err = GetDeployment(client, namespace, name, timeout); err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + return true, nil + }) + + return deployment, err +} + +func WaitForDeploymentSetReady(client *testclient.ClientSet, deployment *appsv1.Deployment, retryInterval, + timeout time.Duration) error { + + err := wait.PollImmediate(retryInterval, timeout, func() (done bool, err error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + err = client.Get(ctx, types.NamespacedName{Name: deployment.Name, Namespace: deployment.Namespace}, deployment) + if err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + if *deployment.Spec.Replicas == deployment.Status.ReadyReplicas { + return true, nil + } else { + return false, nil + } + }) + if err != nil { + return fmt.Errorf("failed to wait for deployment %s in namespace %s to be ready: %v", deployment.Name, + deployment.Namespace, err) + } + + return nil +} diff --git a/test/e2e/functional/tests/e2e.go b/test/e2e/functional/tests/e2e.go index 1c309b75..a8500ff5 100644 --- a/test/e2e/functional/tests/e2e.go +++ b/test/e2e/functional/tests/e2e.go @@ -15,6 +15,7 @@ import ( inftestconsts "github.com/openshift/ingress-node-firewall/test/consts" testclient "github.com/openshift/ingress-node-firewall/test/e2e/client" "github.com/openshift/ingress-node-firewall/test/e2e/daemonset" + "github.com/openshift/ingress-node-firewall/test/e2e/deployment" "github.com/openshift/ingress-node-firewall/test/e2e/events" "github.com/openshift/ingress-node-firewall/test/e2e/exec" "github.com/openshift/ingress-node-firewall/test/e2e/icmp" @@ -36,14 +37,18 @@ import ( ) var ( - OperatorNameSpace = inftestconsts.DefaultOperatorNameSpace - retryInterval = time.Millisecond * 10 - timeout = time.Second * 40 - testInterface = "eth0" - sctpEnabled = false - isSingleStack = false - v4Enabled = false - v6Enabled = false + OperatorNameSpace = inftestconsts.DefaultOperatorNameSpace + retryInterval = time.Millisecond * 10 + timeout = time.Second * 40 + testInterface = "eth0" + sctpEnabled = false + isSingleStack = false + v4Enabled = false + v6Enabled = false + testArtifactsLabelKey = "e2e-inf-test" + testArtifactsLabelValue = "" + testArtifactsLabelMap = map[string]string{testArtifactsLabelKey: testArtifactsLabelValue} + testArtifactsLabelString = fmt.Sprintf("%s=%s", testArtifactsLabelKey, testArtifactsLabelValue) ) // testIngressNodeFirewall represents one IngressNodeFirewall object and is used to generate the sourceCIDRs and protocol @@ -110,18 +115,16 @@ func init() { v4Enabled = node.IPV4NetworkExists(testclient.Client, timeout) v6Enabled = node.IPV6NetworkExists(testclient.Client, timeout) + + if !v4Enabled && !v6Enabled { + panic("Unable to detect if cluster is IPV4 or IPV6") + } } var _ = Describe("Ingress Node Firewall", func() { - var ( - testArtifactsLabelKey = "e2e-inf-test" - testArtifactsLabelValue = "" - testArtifactsLabelMap = map[string]string{testArtifactsLabelKey: testArtifactsLabelValue} - testArtifactsLabelString = fmt.Sprintf("%s=%s", testArtifactsLabelKey, testArtifactsLabelValue) - ) - // Because we don't fully use BeforeAll / AfterAll to setup/teardown test infrastructure and if an error or interrupt occurs, // we ensure a clean cluster using AfterSuite. This normally is a no-op and is only valid when user sends interrupt or test failure. + AfterSuite(func() { Expect(pods.EnsureDeletedWithLabel(testclient.Client, OperatorNameSpace, testArtifactsLabelString, timeout)).Should(Succeed()) Expect(infwutils.DeleteIngressNodeFirewallsWithLabels(testclient.Client, OperatorNameSpace, @@ -837,8 +840,9 @@ var _ = Describe("Ingress Node Firewall", func() { Expect(err).ShouldNot(HaveOccurred()) //wait for daemonset to be rolled out infDaemonSet := &appsv1.DaemonSet{} - err = daemonset.WaitForDaemonSetReady(testclient.Client, infDaemonSet, OperatorNameSpace, - "ingress-node-firewall-daemon", retryInterval, timeout) + infDaemonSet.SetName("ingress-node-firewall-daemon") + infDaemonSet.SetNamespace(OperatorNameSpace) + err = daemonset.WaitForDaemonSetReady(testclient.Client, infDaemonSet, retryInterval, timeout) Expect(err).ShouldNot(HaveOccurred()) }) @@ -1044,39 +1048,176 @@ var _ = Describe("Ingress Node Firewall", func() { } }) + Context("Disruption", func() { + var ( + testName = "e2e-disruption" + clientOnePodName = "e2e-disruption-client-one" + serverOnePodName = "e2e-disruption-server-one" + serverLabelKey = "e2e-disruption-server" + serverLabelValue = "" + serverPodLabel = map[string]string{serverLabelKey: serverLabelValue, testArtifactsLabelKey: testArtifactsLabelValue} + clientLabelKey = "e2e-disruption-client" + clientLabelValue = "" + clientPodLabel = map[string]string{clientLabelKey: clientLabelValue, testArtifactsLabelKey: testArtifactsLabelValue} + ) + + It("controller manager functions after deletion", func() { + infConfigs, err := infwutils.GetIngressNodeFirewallConfigs(testclient.Client, timeout) + Expect(err).ShouldNot(HaveOccurred()) + By("Ensure IngressNodeFirewallConfigs doesn't exist") + Expect(len(infConfigs)).Should(BeNumerically("==", 0)) + By("Delete controller manager pods") + controllerManagerDeployment, err := deployment.GetDeploymentWithRetry(testclient.Client, OperatorNameSpace, + inftestconsts.IngressNodeFirewallOperatorDeploymentName, retryInterval, timeout) + Expect(err).ShouldNot(HaveOccurred()) + Expect(deployment.WaitForDeploymentSetReady(testclient.Client, controllerManagerDeployment, retryInterval, + timeout)).ShouldNot(HaveOccurred()) + Expect(pods.EnsureDeletedWithLabel(testclient.Client, OperatorNameSpace, + fmt.Sprintf("control-plane=%s", inftestconsts.IngressNodeFirewallOperatorDeploymentLabel), + timeout)).ShouldNot(HaveOccurred()) + Expect(deployment.WaitForDeploymentSetReady(testclient.Client, controllerManagerDeployment, retryInterval, + timeout)).ShouldNot(HaveOccurred()) + By("Ensure controller manager reacts to new IngressNodeFirewallConfig object") + config := &ingressnodefwv1alpha1.IngressNodeFirewallConfig{} + err = infwutils.LoadIngressNodeFirewallConfigFromFile(config, inftestconsts.IngressNodeFirewallConfigCRFile) + Expect(err).ToNot(HaveOccurred()) + config.SetNamespace(OperatorNameSpace) + Expect(infwutils.EnsureIngressNodeFirewallConfigExists(testclient.Client, config, timeout)).ShouldNot(HaveOccurred()) + defer infwutils.DeleteIngressNodeFirewallConfig(testclient.Client, config, retryInterval, timeout) + Eventually(func() bool { + if _, err = daemonset.GetDaemonSet(testclient.Client, OperatorNameSpace, + inftestconsts.IngressNodeFirewallDaemonsetName, timeout); err != nil { + return false + } + return true + }, timeout, retryInterval).Should(BeTrue()) + By("Ensure no controller manager restarts occurred") + restartCount, err := pods.GetPodWithLabelRestartCount(testclient.Client, OperatorNameSpace, + fmt.Sprintf("control-plane=%s", inftestconsts.IngressNodeFirewallOperatorDeploymentLabel), timeout) + Expect(err).ShouldNot(HaveOccurred()) + Expect(restartCount).Should(BeNumerically("==", 0)) + }) + + It("Existing IngresNodeFirewall policy persists after daemon deletion", func() { + config := &ingressnodefwv1alpha1.IngressNodeFirewallConfig{} + err := infwutils.LoadIngressNodeFirewallConfigFromFile(config, inftestconsts.IngressNodeFirewallConfigCRFile) + Expect(err).ToNot(HaveOccurred()) + config.SetNamespace(OperatorNameSpace) + Expect(infwutils.EnsureIngressNodeFirewallConfigExists(testclient.Client, config, timeout)).ShouldNot(HaveOccurred()) + defer infwutils.DeleteIngressNodeFirewallConfig(testclient.Client, config, retryInterval, timeout) + daemonSetDeployment, err := daemonset.GetDaemonSetWithRetry(testclient.Client, OperatorNameSpace, + inftestconsts.IngressNodeFirewallDaemonsetName, retryInterval, timeout) + Expect(err).ShouldNot(HaveOccurred()) + Expect(daemonset.WaitForDaemonSetReady(testclient.Client, daemonSetDeployment, retryInterval, timeout)).ShouldNot(HaveOccurred()) + clientPod, serverPod, cleanupPodsFn, err := getClientServerTestPods(testclient.Client, + OperatorNameSpace, clientOnePodName, clientPodLabel, serverOnePodName, serverPodLabel) + Expect(err).ShouldNot(HaveOccurred()) + defer cleanupPodsFn() + inf, err := getICMPEchoBlockINF(clientPod, testName, v4Enabled, v6Enabled, isSingleStack) + Expect(err).ShouldNot(HaveOccurred()) + Expect(infwutils.CreateIngressNodeFirewall(testclient.Client, inf, timeout)).ShouldNot(HaveOccurred()) + By("Confirm connectivity is affected by IngressNodeFirewall policy") + if v4Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP, clientPod, pods.GetIPV4(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeFalse()) + } + + if !isSingleStack && v6Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP6, clientPod, pods.GetIPV6(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeFalse()) + + } + By("Delete node daemon on node where policy will be applied") + daemonSetPod, err := daemonset.GetDaemonSetOnNode(testclient.Client, OperatorNameSpace, serverPod.Spec.NodeName) + Expect(err).ShouldNot(HaveOccurred()) + Expect(pods.EnsureDeleted(testclient.Client, daemonSetPod, timeout)).ShouldNot(HaveOccurred()) + Expect(daemonset.WaitForDaemonSetReady(testclient.Client, daemonSetDeployment, retryInterval, timeout)).ShouldNot(HaveOccurred()) + By("Confirm IngressNodeFirewall policy is unaffected after daemon restart") + if v4Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP, clientPod, pods.GetIPV4(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeFalse()) + } + + if !isSingleStack && v6Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP6, clientPod, pods.GetIPV6(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeFalse()) + + } + Expect(infwutils.DeleteIngressNodeFirewall(testclient.Client, inf, timeout)).ShouldNot(HaveOccurred()) + }) + + It("IngressNodeFirewall policy is configurable after daemon deletion", func() { + config := &ingressnodefwv1alpha1.IngressNodeFirewallConfig{} + err := infwutils.LoadIngressNodeFirewallConfigFromFile(config, inftestconsts.IngressNodeFirewallConfigCRFile) + Expect(err).ToNot(HaveOccurred()) + config.SetNamespace(OperatorNameSpace) + Expect(infwutils.EnsureIngressNodeFirewallConfigExists(testclient.Client, config, timeout)).ShouldNot(HaveOccurred()) + defer infwutils.DeleteIngressNodeFirewallConfig(testclient.Client, config, retryInterval, timeout) + daemonSetDeployment, err := daemonset.GetDaemonSetWithRetry(testclient.Client, OperatorNameSpace, + inftestconsts.IngressNodeFirewallDaemonsetName, retryInterval, timeout) + Expect(err).ShouldNot(HaveOccurred()) + Expect(daemonset.WaitForDaemonSetReady(testclient.Client, daemonSetDeployment, retryInterval, timeout)).ShouldNot(HaveOccurred()) + clientPod, serverPod, cleanupPodsFn, err := getClientServerTestPods(testclient.Client, + OperatorNameSpace, clientOnePodName, clientPodLabel, serverOnePodName, serverPodLabel) + Expect(err).ShouldNot(HaveOccurred()) + defer cleanupPodsFn() + inf, err := getICMPEchoBlockINF(clientPod, testName, v4Enabled, v6Enabled, isSingleStack) + Expect(err).ShouldNot(HaveOccurred()) + Expect(infwutils.CreateIngressNodeFirewall(testclient.Client, inf, timeout)).ShouldNot(HaveOccurred()) + By("Confirm connectivity is affected by IngressNodeFirewall policy") + if v4Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP, clientPod, pods.GetIPV4(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeFalse()) + } + + if !isSingleStack && v6Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP6, clientPod, pods.GetIPV6(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeFalse()) + + } + By("Delete node daemon on node where policy will be applied") + daemonSetPod, err := daemonset.GetDaemonSetOnNode(testclient.Client, OperatorNameSpace, serverPod.Spec.NodeName) + Expect(err).ShouldNot(HaveOccurred()) + Expect(pods.EnsureDeleted(testclient.Client, daemonSetPod, timeout)).ShouldNot(HaveOccurred()) + Expect(daemonset.WaitForDaemonSetReady(testclient.Client, daemonSetDeployment, retryInterval, timeout)).ShouldNot(HaveOccurred()) + By("Delete IngressNodeFirewall removes policy") + Expect(infwutils.DeleteIngressNodeFirewall(testclient.Client, inf, timeout)).ShouldNot(HaveOccurred()) + if v4Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP, clientPod, pods.GetIPV4(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeTrue()) + } + + if !isSingleStack && v6Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP6, clientPod, pods.GetIPV6(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeTrue()) + + } + }) + }) + Context("Statistics", func() { var config *ingressnodefwv1alpha1.IngressNodeFirewallConfig - var configCRExisted bool BeforeEach(func() { var err error config = &ingressnodefwv1alpha1.IngressNodeFirewallConfig{} + config.SetLabels(testArtifactsLabelMap) err = infwutils.LoadIngressNodeFirewallConfigFromFile(config, inftestconsts.IngressNodeFirewallConfigCRFile) Expect(err).ToNot(HaveOccurred()) config.SetNamespace(OperatorNameSpace) - configCRExisted = true - err = testclient.Client.Get(context.Background(), goclient.ObjectKey{Namespace: config.Namespace, Name: config.Name}, config) - if errors.IsNotFound(err) { - configCRExisted = false - Expect(testclient.Client.Create(context.Background(), config)).Should(Succeed()) - } else { - Expect(err).ToNot(HaveOccurred()) - } + Expect(infwutils.EnsureIngressNodeFirewallConfigExists(testclient.Client, config, timeout)).ShouldNot(HaveOccurred()) }) AfterEach(func() { - if !configCRExisted { - daemonset, err := testclient.Client.DaemonSets(config.Namespace).Get(context.Background(), inftestconsts.IngressNodeFirewallDaemonsetName, metav1.GetOptions{}) - if err != nil { - if !errors.IsNotFound(err) { - Expect(err).ToNot(HaveOccurred()) - } - } else { - Expect(daemonset.OwnerReferences).ToNot(BeNil()) - Expect(daemonset.OwnerReferences[0].Kind).To(Equal("IngressNodeFirewallConfig")) - } - infwutils.DeleteIngressNodeFirewallConfig(testclient.Client, config, retryInterval, timeout) - } + infwutils.DeleteIngressNodeFirewallConfig(testclient.Client, config, retryInterval, timeout) }) It("should expose at least one endpoint via a daemon metrics service", func() { @@ -1108,7 +1249,6 @@ var _ = Describe("Ingress Node Firewall", func() { It("should expose daemon metrics", func() { var ( - inf = &ingressnodefwv1alpha1.IngressNodeFirewall{} clientOnePodName = "e2e-inf-client-one" serverOnePodName = "e2e-inf-server-one" serverLabelKey = "e2e-inf-server" @@ -1118,69 +1258,28 @@ var _ = Describe("Ingress Node Firewall", func() { clientLabelValue = "" clientPodLabel = map[string]string{clientLabelKey: clientLabelValue, testArtifactsLabelKey: testArtifactsLabelValue} ) - clientPod, clientCleanupFn, err := transport.GetAndEnsureRunningClient(testclient.Client, clientOnePodName, OperatorNameSpace, clientPodLabel, clientPodLabel, - serverPodLabel, retryInterval, timeout) + clientPod, serverPod, cleanupPodsFn, err := getClientServerTestPods(testclient.Client, + OperatorNameSpace, clientOnePodName, clientPodLabel, serverOnePodName, serverPodLabel) Expect(err).ShouldNot(HaveOccurred()) - defer clientCleanupFn() - sourceCIDRs := make([]string, 0) - if v4Enabled { - _, v4CIDR, err := net.ParseCIDR(fmt.Sprintf("%s/%s", pods.GetIPV4(clientPod.Status.PodIPs), "32")) - Expect(err).ShouldNot(HaveOccurred()) - sourceCIDRs = append(sourceCIDRs, v4CIDR.String()) - } - if !isSingleStack && v6Enabled { - // no-op if v6 tests are disabled - _, v6CIDR, err := net.ParseCIDR(fmt.Sprintf("%s/%s", pods.GetIPV6(clientPod.Status.PodIPs), "128")) - Expect(err).ShouldNot(HaveOccurred()) - sourceCIDRs = append(sourceCIDRs, v6CIDR.String()) - } - serverPod, serverCleanupFn, err := transport.GetAndEnsureRunningTransportServer(testclient.Client, serverOnePodName, OperatorNameSpace, - serverPodLabel, serverPodLabel, clientPodLabel, retryInterval, timeout) + defer cleanupPodsFn() + inf, err := getICMPEchoBlockINF(clientPod, "e2e-inf-daemon-metrics", v4Enabled, v6Enabled, isSingleStack) Expect(err).ShouldNot(HaveOccurred()) - defer serverCleanupFn() - inf.SetName("e2e-inf-daemon-metrics") - inf.SetLabels(testArtifactsLabelMap) - infwutils.DefineWithWorkerNodeSelector(inf) - infwutils.DefineWithInterface(inf, testInterface) - protoRules := make([]ingressnodefwv1alpha1.IngressNodeFirewallProtocolRule, 0) + Expect(infwutils.CreateIngressNodeFirewall(testclient.Client, inf, timeout)).ShouldNot(HaveOccurred()) + defer func() { + Expect(infwutils.DeleteIngressNodeFirewall(testclient.Client, inf, timeout)).ShouldNot(HaveOccurred()) + }() if v4Enabled { - protoRules = append(protoRules, infwutils.GetICMPBlockRule(ingressnodefwv1alpha1.ProtocolTypeICMP, 1, 8, 0)) - } - if !isSingleStack && v6Enabled { - protoRules = append(protoRules, infwutils.GetICMPBlockRule(ingressnodefwv1alpha1.ProtocolTypeICMP6, 2, 128, 0)) + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP, clientPod, pods.GetIPV4(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeFalse()) } - inf.Spec.Ingress = append(inf.Spec.Ingress, ingressnodefwv1alpha1.IngressNodeFirewallRules{ - SourceCIDRs: sourceCIDRs, - FirewallProtocolRules: protoRules, - }) - - Eventually(func() error { - err := testclient.Client.Create(context.Background(), inf) - return err - }, timeout, retryInterval).Should(Succeed()) - - defer Eventually(func() bool { - err := testclient.Client.Delete(context.Background(), inf) - return errors.IsNotFound(err) - }, timeout, retryInterval).Should(BeTrue(), "Failed destination delete IngressNodeFirewall custom resource") - Eventually(func() bool { - if v4Enabled { - _, _, err = icmp.PingFromPod(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP, clientPod, pods.GetIPV4(serverPod.Status.PodIPs)) - if err == nil { - return false - } - } - - if !isSingleStack && v6Enabled { - _, _, err = icmp.PingFromPod(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP6, clientPod, pods.GetIPV6(serverPod.Status.PodIPs)) - if err == nil { - return false - } - } - return true - }).WithTimeout(timeout).Should(BeTrue()) + if !isSingleStack && v6Enabled { + Eventually(func() bool { + return icmp.IsConnectivityOK(testclient.Client, ingressnodefwv1alpha1.ProtocolTypeICMP6, clientPod, pods.GetIPV6(serverPod.Status.PodIPs)) + }, timeout, retryInterval).Should(BeFalse()) + } daemonsetPod, err := daemonset.GetDaemonSetOnNode(testclient.Client, OperatorNameSpace, serverPod.Spec.NodeName) Expect(err).ShouldNot(HaveOccurred()) var stdOut, stdError string @@ -1404,3 +1503,64 @@ func isConnectivitySeen(client *testclient.ClientSet, protocol ingressnodefwv1al panic("Unexpected protocol") } } + +func getClientServerTestPods(client *testclient.ClientSet, namespace, clientName string, clientLabel map[string]string, + serverName string, serverLabel map[string]string) (*corev1.Pod, *corev1.Pod, func(), error) { + clientPod, clientCleanupFn, err := transport.GetAndEnsureRunningClient(client, clientName, namespace, clientLabel, clientLabel, + serverLabel, retryInterval, timeout) + if err != nil { + return nil, nil, nil, err + } + serverPod, serverCleanupFn, err := transport.GetAndEnsureRunningTransportServer(client, serverName, + OperatorNameSpace, serverLabel, serverLabel, clientLabel, retryInterval, timeout) + if err != nil { + return nil, nil, nil, err + } + return clientPod, serverPod, func() { + clientCleanupFn() + serverCleanupFn() + }, nil +} + +func getPodSourceCIDRs(pod *corev1.Pod, v4, v6, singleStack bool) ([]string, error) { + sourceCIDRs := make([]string, 0) + if v4 { + _, v4CIDR, err := net.ParseCIDR(fmt.Sprintf("%s/%s", pods.GetIPV4(pod.Status.PodIPs), "32")) + if err != nil { + return nil, err + } + sourceCIDRs = append(sourceCIDRs, v4CIDR.String()) + } + if !singleStack && v6 { + _, v6CIDR, err := net.ParseCIDR(fmt.Sprintf("%s/%s", pods.GetIPV6(pod.Status.PodIPs), "128")) + if err != nil { + return nil, err + } + sourceCIDRs = append(sourceCIDRs, v6CIDR.String()) + } + return sourceCIDRs, nil +} + +func getICMPEchoBlockINF(pod *corev1.Pod, infName string, v4, v6, singleStack bool) (*ingressnodefwv1alpha1.IngressNodeFirewall, error) { + inf := &ingressnodefwv1alpha1.IngressNodeFirewall{} + inf.SetName(infName) + inf.SetLabels(testArtifactsLabelMap) + infwutils.DefineWithWorkerNodeSelector(inf) + infwutils.DefineWithInterface(inf, testInterface) + sourceCIDRs, err := getPodSourceCIDRs(pod, v4, v6, singleStack) + if err != nil { + return nil, err + } + protoRules := make([]ingressnodefwv1alpha1.IngressNodeFirewallProtocolRule, 0) + if v4 { + protoRules = append(protoRules, infwutils.GetICMPBlockRule(ingressnodefwv1alpha1.ProtocolTypeICMP, 1, 8, 0)) + } + if !singleStack && v6 { + protoRules = append(protoRules, infwutils.GetICMPBlockRule(ingressnodefwv1alpha1.ProtocolTypeICMP6, 2, 128, 0)) + } + inf.Spec.Ingress = append(inf.Spec.Ingress, ingressnodefwv1alpha1.IngressNodeFirewallRules{ + SourceCIDRs: sourceCIDRs, + FirewallProtocolRules: protoRules, + }) + return inf, nil +} diff --git a/test/e2e/ingress-node-firewall/ingress-node-firewall.go b/test/e2e/ingress-node-firewall/ingress-node-firewall.go index 5407cff1..24bc31a8 100644 --- a/test/e2e/ingress-node-firewall/ingress-node-firewall.go +++ b/test/e2e/ingress-node-firewall/ingress-node-firewall.go @@ -76,6 +76,16 @@ func EnsureIngressNodeFirewallConfigExists(client *testclient.ClientSet, config return err } +func GetIngressNodeFirewallConfigs(client *testclient.ClientSet, timeout time.Duration) ([]ingressnodefwv1alpha1.IngressNodeFirewallConfig, error) { + var infcList ingressnodefwv1alpha1.IngressNodeFirewallConfigList + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + if err := client.List(ctx, &infcList); err != nil { + return nil, err + } + return infcList.Items, nil +} + func CreateIngressNodeFirewall(client *testclient.ClientSet, inf *ingressnodefwv1alpha1.IngressNodeFirewall, timeout time.Duration) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) diff --git a/test/e2e/pods/pods.go b/test/e2e/pods/pods.go index 10686a22..3258652f 100644 --- a/test/e2e/pods/pods.go +++ b/test/e2e/pods/pods.go @@ -53,8 +53,8 @@ func EnsureDeleted(client *testclient.ClientSet, pod *corev1.Pod, timeout time.D return nil } -func EnsureDeletedWithLabel(client *testclient.ClientSet, ns, label string, timeout time.Duration) error { - podList, err := client.Pods(ns).List(context.TODO(), metav1.ListOptions{ +func EnsureDeletedWithLabel(client *testclient.ClientSet, namespace, label string, timeout time.Duration) error { + podList, err := client.Pods(namespace).List(context.TODO(), metav1.ListOptions{ LabelSelector: label, }) if err != nil { @@ -68,6 +68,23 @@ func EnsureDeletedWithLabel(client *testclient.ClientSet, ns, label string, time return nil } +func GetPodWithLabelRestartCount(client *testclient.ClientSet, namespace, label string, timeout time.Duration) (int, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + pods, err := client.Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: label}) + if err != nil { + return 0, err + } + var count int + for _, pod := range pods.Items { + for _, containerStatus := range pod.Status.ContainerStatuses { + count += int(containerStatus.RestartCount) + } + } + return count, nil +} + func GetIPV4(ips []corev1.PodIP) string { for _, ip := range ips { parsedIP := net.ParseIP(ip.IP) From 02a69990b00c3c417e4f021095585ef25d8846f9 Mon Sep 17 00:00:00 2001 From: Martin Kennelly Date: Sun, 30 Oct 2022 09:53:35 +0000 Subject: [PATCH 3/4] Daemon: add finalizer to IngressNodeFirewallNodeState Daemon may miss deletion of this object and therefore not handle any cleanup operations that needs to be performed. Wait for daemon to consume deletion update and only then allow this object to be deleted. Signed-off-by: Martin Kennelly --- ...ingressnodefirewallnodestate_controller.go | 45 +++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/controllers/ingressnodefirewallnodestate_controller.go b/controllers/ingressnodefirewallnodestate_controller.go index 6fa219a7..d0a822f2 100644 --- a/controllers/ingressnodefirewallnodestate_controller.go +++ b/controllers/ingressnodefirewallnodestate_controller.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) // IngressNodeFirewallNodeStateReconciler reconciles a IngressNodeFirewallNodeState object @@ -38,6 +39,8 @@ type IngressNodeFirewallNodeStateReconciler struct { Stats *metrics.Statistics } +var ingressNodeFirewallFinalizer = "ingressnodefirewall.openshift.io/finalizer" + //+kubebuilder:rbac:groups=ingressnodefirewall.openshift.io,resources=ingressnodefirewallnodestates,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=ingressnodefirewall.openshift.io,namespace=ingress-node-firewall-system,resources=ingressnodefirewallnodestates,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=ingressnodefirewall.openshift.io,namespace=ingress-node-firewall-system,resources=ingressnodefirewallnodestates/status,verbs=get;update;patch @@ -62,15 +65,37 @@ func (r *IngressNodeFirewallNodeStateReconciler) Reconcile(ctx context.Context, nodeState := &infv1alpha1.IngressNodeFirewallNodeState{} err := r.Get(ctx, req.NamespacedName, nodeState) if err != nil { - if apierrors.IsNotFound(err) { - // Request object not found, could have been deleted after reconcile request. - // Owned objects are automatically garbage collected. For additional cleanup logic use finalizers. - // Return and don't requeue - return r.reconcileResource(ctx, req, nodeState, true) + // Expect not to find node state that has been deleted. Handling of deletion should have previously occurred, + // therefore we only ignore errors with reason 'StatusReasonNotFound'. + if !apierrors.IsNotFound(err) { + r.Log.Error(err, "unable to get IngressNodeFirewallNodeState") + return ctrl.Result{}, err + } + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // check if node state is being deleted + if isNodeStateDeletionInProgress(nodeState) { + if controllerutil.ContainsFinalizer(nodeState, ingressNodeFirewallFinalizer) { + if result, err := r.reconcileResource(ctx, req, nodeState, true); err != nil { + r.Log.Error(err, "failed to reconcile IngressNodeFirewallNodeState that is being deleted") + return result, err + } + controllerutil.RemoveFinalizer(nodeState, ingressNodeFirewallFinalizer) + if err = r.Update(ctx, nodeState); err != nil { + return ctrl.Result{}, err + } + } + // stop reconciliation as node state is being deleted + return ctrl.Result{}, nil + } else { + // ensure node state contains finalizer if object is not being deleted + if !controllerutil.ContainsFinalizer(nodeState, ingressNodeFirewallFinalizer) { + controllerutil.AddFinalizer(nodeState, ingressNodeFirewallFinalizer) + if err = r.Update(ctx, nodeState); err != nil { + return ctrl.Result{}, err + } } - // Error reading the object - requeue the request. - r.Log.Error(err, "Failed to get IngressNodeFirewallNodeState") - return ctrl.Result{}, err } r.Log.Info("Reconciling resource and programming bpf", "name", nodeState.Name, "namespace", nodeState.Namespace) @@ -96,3 +121,7 @@ func (r *IngressNodeFirewallNodeStateReconciler) reconcileResource( } return ctrl.Result{}, nil } + +func isNodeStateDeletionInProgress(nodeState *infv1alpha1.IngressNodeFirewallNodeState) bool { + return !nodeState.ObjectMeta.DeletionTimestamp.IsZero() +} From 957b7a99144bb50e984c6a8b93c186976aedf91c Mon Sep 17 00:00:00 2001 From: Martin Kennelly Date: Sun, 30 Oct 2022 09:56:48 +0000 Subject: [PATCH 4/4] Daemon: ensure we know about all managed interfaces If the daemon is restarted, we start not knowing about any of the managed interfaces. If we perform activities like reseting everything due to IngressNodeFirewallNodeState deletion for example, we will not cleanup all XDP programs. IngNodeFwController knows all pinned interfaces and ensure we know about them in ebpfSingleton. Future work is needed to refactor this to not have two seperate sources of information about the same thing. Signed-off-by: Martin Kennelly --- pkg/ebpf/ingress_node_firewall_loader.go | 9 +++++++++ pkg/ebpfsyncer/ebpfsyncer.go | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/pkg/ebpf/ingress_node_firewall_loader.go b/pkg/ebpf/ingress_node_firewall_loader.go index d3475694..c9cec316 100644 --- a/pkg/ebpf/ingress_node_firewall_loader.go +++ b/pkg/ebpf/ingress_node_firewall_loader.go @@ -376,6 +376,15 @@ func (infc *IngNodeFwController) loadPinnedLinks() error { return nil } +func (infc *IngNodeFwController) GetPinnedLinkNames() []string { + linkNames := make([]string, 0, len(infc.links)) + + for linkName, _ := range infc.links { + linkNames = append(linkNames, linkName) + } + return linkNames +} + // cleanup will delete an interface's eBPF objects. func (infc *IngNodeFwController) cleanup(ifName string) error { l, ok := infc.links[ifName] diff --git a/pkg/ebpfsyncer/ebpfsyncer.go b/pkg/ebpfsyncer/ebpfsyncer.go index 770db749..080a8a21 100644 --- a/pkg/ebpfsyncer/ebpfsyncer.go +++ b/pkg/ebpfsyncer/ebpfsyncer.go @@ -87,6 +87,12 @@ func (e *ebpfSingleton) SyncInterfaceIngressRules( return err } + // Ensure IngNodeFwController's pinned links and our managed interfaces align + // TODO: refactor to not have managed interfaces names from two sources + for _, linkName := range e.c.GetPinnedLinkNames() { + e.managedInterfaces[linkName] = struct{}{} + } + // For delete operations, detach all interfaces and run a cleanup, set managed interfaces and the // manager to empty / nil values, then return. if isDelete {