From 08b8105edc3a7da9316a77c1f149a9121665018c Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Sun, 22 Jan 2023 17:03:28 +0100 Subject: [PATCH] Delete stale egress ip snat entries by node There is an unknown scenario where stale pod snat entries are found for egress ip which refers to an unassigned node. This change ensures those stale entries are deleted in sync stale logic and also making sure that egress ip reconcile does not break because of this entry, otherwise it may lead to "unexpectedly found multiple results for provided predicate" error in DeleteNATsWithPredicateOps. Signed-off-by: Periyasamy Palanisamy (cherry picked from commit 87fb483e211cc0e3cd409af5a5d5916e87200539) Signed-off-by: Periyasamy Palanisamy --- go-controller/pkg/libovsdbops/router.go | 2 +- go-controller/pkg/ovn/egressip.go | 23 ++- go-controller/pkg/ovn/egressip_test.go | 218 ++++++++++++++++++++++-- 3 files changed, 224 insertions(+), 19 deletions(-) diff --git a/go-controller/pkg/libovsdbops/router.go b/go-controller/pkg/libovsdbops/router.go index e1584d7e98..9470c4fc98 100644 --- a/go-controller/pkg/libovsdbops/router.go +++ b/go-controller/pkg/libovsdbops/router.go @@ -1007,7 +1007,7 @@ func DeleteNATsWithPredicateOps(nbClient libovsdbclient.Client, ops []libovsdb.O ModelPredicate: func(lr *nbdb.LogicalRouter) bool { return natUUIDs.HasAny(lr.Nat...) }, OnModelMutations: []interface{}{&router.Nat}, ErrNotFound: false, - BulkOp: false, + BulkOp: true, }, } diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 0874903b15..953ab300e5 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1293,7 +1293,7 @@ func (oc *Controller) isEgressNodeReachable(egressNode *kapi.Node) bool { type egressIPCacheEntry struct { egressPods map[string]sets.String gatewayRouterIPs sets.String - egressIPs sets.String + egressIPs map[string]string } func (oc *Controller) syncEgressIPs(namespaces []interface{}) error { @@ -1454,8 +1454,9 @@ func (oc *Controller) syncStaleSNATRules(egressIPCache map[string]egressIPCacheE klog.Infof("syncStaleSNATRules will delete %s due to logical ip: %v", egressIPName, item) return true } - if !cacheEntry.egressIPs.Has(item.ExternalIP) { - klog.Infof("syncStaleSNATRules will delete %s due to external ip: %v", egressIPName, item) + if node, ok := cacheEntry.egressIPs[item.ExternalIP]; !ok || + item.LogicalPort == nil || *item.LogicalPort != types.K8sPrefix+node { + klog.Infof("syncStaleSNATRules will delete %s due to external ip or stale logical port: %v", egressIPName, item) return true } return false @@ -1495,6 +1496,18 @@ func (oc *Controller) syncStaleSNATRules(egressIPCache map[string]egressIPCacheE if len(errors) > 0 { return utilerrors.NewAggregate(errors) } + // The routers length 0 check is needed because some of ovnk master restart unit tests have + // router object referring to SNAT's UUID string instead of actual UUID (though it may not + // happen in real scenario). Hence this check is needed to delete those stale SNATs as well. + if len(routers) == 0 { + predicate := func(item *nbdb.NAT) bool { + return natIds.Has(item.UUID) + } + ops, err = libovsdbops.DeleteNATsWithPredicateOps(oc.nbClient, ops, predicate) + if err != nil { + return fmt.Errorf("unable to delete stale SNATs err: %v", err) + } + } _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) if err != nil { @@ -1518,7 +1531,7 @@ func (oc *Controller) generateCacheForEgressIP() (map[string]egressIPCacheEntry, egressIPCache[egressIP.Name] = egressIPCacheEntry{ egressPods: make(map[string]sets.String), gatewayRouterIPs: sets.NewString(), - egressIPs: sets.NewString(), + egressIPs: map[string]string{}, } for _, status := range egressIP.Status.Items { isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) @@ -1528,7 +1541,7 @@ func (oc *Controller) generateCacheForEgressIP() (map[string]egressIPCacheEntry, continue } egressIPCache[egressIP.Name].gatewayRouterIPs.Insert(gatewayRouterIP.String()) - egressIPCache[egressIP.Name].egressIPs.Insert(status.EgressIP) + egressIPCache[egressIP.Name].egressIPs[status.EgressIP] = status.Node } namespaces, err := oc.watchFactory.GetNamespacesBySelector(egressIP.Spec.NamespaceSelector) if err != nil { diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index b095e2c649..c087e892e5 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -6152,19 +6152,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", Nat: []string{"egressip-nat-UUID"}, }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, - }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, @@ -6192,6 +6179,211 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + ginkgo.It("should remove stale pod SNAT referring to wrong logical port after ovnkube-master is started", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + node1GR, + node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + node1Switch, + // This is unexpected snat entry where its logical port refers to an unavailable node + // and ensure this entry is removed as soon as ovnk master is up and running. + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.StringPtr("k8s-node2"), + Options: map[string]string{ + "stateless": "false", + }, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add("", util.GetLogicalPortName(egressPod.Namespace, egressPod.Name), "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(util.GetLogicalPortName(egressPod.Namespace, egressPod.Name)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetAllPodIPs(ePod) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.StringPtr("k8s-node1"), + Options: map[string]string{ + "stateless": "false", + }, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + UUID: "reroute-UUID1", + } + node1GR.Nat = []string{"egressip-nat-UUID1"} + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, podReRoutePolicy, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should only get assigned EgressIPs which matches their subnet when the node is tagged", func() { app.Action = func(ctx *cli.Context) error {