From a6a00440b8e6609f4f3480cf878534286ec32248 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 23 May 2023 17:17:40 +0200 Subject: [PATCH 01/31] multi-homing, localnet: update localnet port network name This change attempts to simplify how the network admin configures the underlay of the network to which the secondary network overlays connect to; the user must configure the OVN bridge mappings by associating the name of the physical network to the name of the desired OVS bridge. Currently that is done via the following pattern: `_br-localnet:` An example would be: `tenantblue_br-localnet:ovsbr1` This commit simplifies that by removing the suffix; the bridge mappings would be: `:` And an example would be: `tenantblue:ovsbr1` Signed-off-by: Miguel Duarte Barroso --- go-controller/pkg/ovn/secondary_localnet_network_controller.go | 2 +- go-controller/pkg/types/const.go | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/go-controller/pkg/ovn/secondary_localnet_network_controller.go b/go-controller/pkg/ovn/secondary_localnet_network_controller.go index ba51023448..f0f014ed36 100644 --- a/go-controller/pkg/ovn/secondary_localnet_network_controller.go +++ b/go-controller/pkg/ovn/secondary_localnet_network_controller.go @@ -89,7 +89,7 @@ func (oc *SecondaryLocalnetNetworkController) Init() error { Addresses: []string{"unknown"}, Type: "localnet", Options: map[string]string{ - "network_name": oc.GetNetworkScopedName(types.LocalNetBridgeName), + "network_name": oc.GetNetworkName(), }, } intVlanID := int(oc.Vlan()) diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index b5e1309214..73c6a19eac 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -52,8 +52,6 @@ const ( OVNLocalnetSwitch = "ovn_localnet_switch" // types.OVNLocalnetPort is the name of localnet topology localnet port OVNLocalnetPort = "ovn_localnet_port" - // Local Bridge used for localnet topology network access - LocalNetBridgeName = "br-localnet" TransitSwitch = "transit_switch" TransitSwitchToRouterPrefix = "tstor-" From 374349423dc4443b48067e922b6e77ca5fe1b6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Fri, 9 Jun 2023 10:57:23 +0000 Subject: [PATCH 02/31] e2e multi-homing: fix network name changing throughout the test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every time attachamentName() was being used, a new network name was being generated. If used more than once throughout a single test, the different parts of that test would use a different network name. Specifically for localnet tests, this meant that the bridge mapping would not be set with the same network name as the NAD network name. Signed-off-by: Jaime Caamaño Ruiz --- test/e2e/localnet-underlay.go | 2 +- test/e2e/multihoming.go | 87 +++++++++++++++++++---------------- test/e2e/multihoming_utils.go | 35 ++++++++------ 3 files changed, 71 insertions(+), 53 deletions(-) diff --git a/test/e2e/localnet-underlay.go b/test/e2e/localnet-underlay.go index 97e9edaa3f..d09600cbd7 100644 --- a/test/e2e/localnet-underlay.go +++ b/test/e2e/localnet-underlay.go @@ -35,7 +35,7 @@ func setupUnderlay(ovsPods []v1.Pod, portName string, nadConfig networkAttachmen if err := configureBridgeMappings( ovsPod.Name, defaultNetworkBridgeMapping(), - bridgeMapping(nadConfig.attachmentName(), bridgeName), + bridgeMapping(nadConfig.networkName, bridgeName), ); err != nil { return err } diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 721e035fae..48a0a21956 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -53,10 +53,13 @@ var _ = Describe("Multi Homing", func() { nadClient, err = nadclient.NewForConfig(f.ClientConfig()) Expect(err).NotTo(HaveOccurred()) mnpClient, err = mnpclient.NewForConfig(f.ClientConfig()) + Expect(err).NotTo(HaveOccurred()) }) Context("A single pod with an OVN-K secondary network", func() { - table.DescribeTable("is able to get to the Running phase", func(netConfig networkAttachmentConfig, podConfig podConfiguration) { + table.DescribeTable("is able to get to the Running phase", func(netConfigParams networkAttachmentConfigParams, podConfig podConfiguration) { + netConfig := newNetworkAttachmentConfig(netConfigParams) + if netConfig.topology != "layer3" { if isInterconnectEnabled() { e2eskipper.Skipf( @@ -106,7 +109,7 @@ var _ = Describe("Multi Homing", func() { }, table.Entry( "when attaching to an L3 - routed - network", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: netCIDR(secondaryNetworkCIDR, netPrefixLengthPerNode), name: secondaryNetworkName, topology: "layer3", @@ -118,7 +121,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an L3 - routed - network with IPv6 network", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: netCIDR(secondaryIPv6CIDR, netPrefixLengthIPv6PerNode), name: secondaryNetworkName, topology: "layer3", @@ -130,7 +133,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an L2 - switched - network", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: secondaryFlatL2NetworkCIDR, name: secondaryNetworkName, topology: "layer2", @@ -142,7 +145,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an L2 - switched - network featuring `excludeCIDR`s", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: secondaryFlatL2NetworkCIDR, name: secondaryNetworkName, topology: "layer2", @@ -155,7 +158,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an L2 - switched - network without IPAM", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", }, @@ -166,7 +169,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an L2 - switched - network with an IPv6 subnet", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: secondaryIPv6CIDR, name: secondaryNetworkName, topology: "layer2", @@ -178,7 +181,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an L2 - switched - network with a dual stack configuration", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: strings.Join([]string{secondaryFlatL2NetworkCIDR, secondaryIPv6CIDR}, ","), name: secondaryNetworkName, topology: "layer2", @@ -190,7 +193,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an localnet - switched - network", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: secondaryLocalnetNetworkCIDR, name: secondaryNetworkName, topology: "localnet", @@ -203,7 +206,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an Localnet - switched - network featuring `excludeCIDR`s", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: secondaryLocalnetNetworkCIDR, name: secondaryNetworkName, topology: "localnet", @@ -217,7 +220,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an localnet - switched - network without IPAM", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", vlanID: localnetVLANID, @@ -229,7 +232,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an localnet - switched - network with an IPv6 subnet", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: secondaryIPv6CIDR, name: secondaryNetworkName, topology: "localnet", @@ -242,7 +245,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "when attaching to an L2 - switched - network with a dual stack configuration", - networkAttachmentConfig{ + networkAttachmentConfigParams{ cidr: strings.Join([]string{secondaryLocalnetNetworkCIDR, secondaryIPv6CIDR}, ","), name: secondaryNetworkName, topology: "localnet", @@ -269,7 +272,9 @@ var _ = Describe("Multi Homing", func() { table.DescribeTable( "can communicate over the secondary network", - func(netConfig networkAttachmentConfig, clientPodConfig podConfiguration, serverPodConfig podConfiguration) { + func(netConfigParams networkAttachmentConfigParams, clientPodConfig podConfiguration, serverPodConfig podConfiguration) { + netConfig := newNetworkAttachmentConfig(netConfigParams) + // Skip the test if the netConfig topology is not layer3 and the deployment is multi zone if netConfig.topology != "layer3" { if isInterconnectEnabled() { @@ -284,9 +289,11 @@ var _ = Describe("Multi Homing", func() { serverPodConfig.namespace = f.Namespace.Name if netConfig.topology == "localnet" { + By("setting up the localnet underlay") nodes := ovsPods(cs) Expect(nodes).NotTo(BeEmpty()) defer func() { + By("tearing down the localnet underlay") Expect(teardownUnderlay(nodes)).To(Succeed()) }() @@ -372,7 +379,7 @@ var _ = Describe("Multi Homing", func() { }, table.Entry( "can communicate over an L2 secondary network when the pods are scheduled in different nodes", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", cidr: secondaryNetworkCIDR, @@ -391,7 +398,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an L2 - switched - secondary network with `excludeCIDR`s", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", cidr: secondaryNetworkCIDR, @@ -409,7 +416,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an L3 - routed - secondary network", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer3", cidr: netCIDR(secondaryNetworkCIDR, netPrefixLengthPerNode), @@ -426,7 +433,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an L3 - routed - secondary network with IPv6 subnet", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer3", cidr: netCIDR(secondaryIPv6CIDR, netPrefixLengthIPv6PerNode), @@ -443,7 +450,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an L3 - routed - secondary network with a dual stack configuration", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer3", cidr: strings.Join([]string{netCIDR(secondaryNetworkCIDR, netPrefixLengthPerNode), netCIDR(secondaryIPv6CIDR, netPrefixLengthIPv6PerNode)}, ","), @@ -462,7 +469,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an L2 - switched - secondary network without IPAM", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", }, @@ -480,7 +487,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an L2 secondary network without IPAM, with static IPs configured via network selection elements", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", }, @@ -502,7 +509,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an L2 secondary network with an IPv6 subnet when pods are scheduled in different nodes", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", cidr: secondaryIPv6CIDR, @@ -521,7 +528,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an L2 secondary network with a dual stack configuration", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", cidr: strings.Join([]string{secondaryFlatL2NetworkCIDR, secondaryIPv6CIDR}, ","), @@ -540,7 +547,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an Localnet secondary network when the pods are scheduled on different nodes", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryLocalnetNetworkCIDR, @@ -560,7 +567,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an Localnet secondary network without IPAM when the pods are scheduled on different nodes", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", vlanID: localnetVLANID, @@ -581,7 +588,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an localnet secondary network without IPAM when the pods are scheduled on different nodes, with static IPs configured via network selection elements", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", vlanID: localnetVLANID, @@ -606,7 +613,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an localnet secondary network with an IPv6 subnet when pods are scheduled on different nodes", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryIPv6CIDR, @@ -626,7 +633,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "can communicate over an localnet secondary network with a dual stack configuration when pods are scheduled on different nodes", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", cidr: strings.Join([]string{secondaryLocalnetNetworkCIDR, secondaryIPv6CIDR}, ","), @@ -684,7 +691,9 @@ var _ = Describe("Multi Homing", func() { table.DescribeTable( "multi-network policies configure traffic allow lists", - func(netConfig networkAttachmentConfig, allowedClientPodConfig podConfiguration, blockedClientPodConfig podConfiguration, serverPodConfig podConfiguration, policy *mnpapi.MultiNetworkPolicy) { + func(netConfigParams networkAttachmentConfigParams, allowedClientPodConfig podConfiguration, blockedClientPodConfig podConfiguration, serverPodConfig podConfiguration, policy *mnpapi.MultiNetworkPolicy) { + netConfig := newNetworkAttachmentConfig(netConfigParams) + // Skip the test if the netConfig topology is not layer3 and the deployment is multi zone if netConfig.topology != "layer3" { if isInterconnectEnabled() { @@ -759,7 +768,7 @@ var _ = Describe("Multi Homing", func() { }, table.Entry( "for a pure L2 overlay when the multi-net policy describes the allow-list using pod selectors", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", cidr: secondaryFlatL2NetworkCIDR, @@ -796,7 +805,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "for a routed topology when the multi-net policy describes the allow-list using pod selectors", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer3", cidr: netCIDR(secondaryNetworkCIDR, netPrefixLengthPerNode), @@ -833,7 +842,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "for a localnet topology when the multi-net policy describes the allow-list using pod selectors", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryLocalnetNetworkCIDR, @@ -870,7 +879,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "for a pure L2 overlay when the multi-net policy describes the allow-list using IPBlock", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", cidr: secondaryFlatL2NetworkCIDR, @@ -902,7 +911,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "for a routed topology when the multi-net policy describes the allow-list using IPBlock", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer3", cidr: netCIDR(secondaryNetworkCIDR, netPrefixLengthPerNode), @@ -934,7 +943,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "for a localnet topology when the multi-net policy describes the allow-list using IPBlock", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryLocalnetNetworkCIDR, @@ -966,7 +975,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "for a pure L2 overlay when the multi-net policy describes the allow-list via namespace selectors", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", cidr: secondaryFlatL2NetworkCIDR, @@ -1000,7 +1009,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "for a routed topology when the multi-net policy describes the allow-list via namespace selectors", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer3", cidr: netCIDR(secondaryNetworkCIDR, netPrefixLengthPerNode), @@ -1034,7 +1043,7 @@ var _ = Describe("Multi Homing", func() { ), table.Entry( "for a localnet topology when the multi-net policy describes the allow-list via namespace selectors", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryLocalnetNetworkCIDR, @@ -1069,7 +1078,7 @@ var _ = Describe("Multi Homing", func() { table.Entry( "for an IPAMless pure L2 overlay when the multi-net policy describes the allow-list using IPBlock", - networkAttachmentConfig{ + networkAttachmentConfigParams{ name: secondaryNetworkName, topology: "layer2", }, diff --git a/test/e2e/multihoming_utils.go b/test/e2e/multihoming_utils.go index 0103a7d2e2..c1bef996cc 100644 --- a/test/e2e/multihoming_utils.go +++ b/test/e2e/multihoming_utils.go @@ -32,7 +32,7 @@ func getNetCIDRSubnet(netCIDR string) (string, error) { return "", fmt.Errorf("invalid network cidr: %q", netCIDR) } -type networkAttachmentConfig struct { +type networkAttachmentConfigParams struct { cidr string excludeCIDRs []string namespace string @@ -42,11 +42,18 @@ type networkAttachmentConfig struct { vlanID int } -func (nac networkAttachmentConfig) attachmentName() string { - if nac.networkName != "" { - return nac.networkName +type networkAttachmentConfig struct { + networkAttachmentConfigParams +} + +func newNetworkAttachmentConfig(params networkAttachmentConfigParams) networkAttachmentConfig { + networkAttachmentConfig := networkAttachmentConfig{ + networkAttachmentConfigParams: params, } - return uniqueNadName(nac.name) + if networkAttachmentConfig.networkName == "" { + networkAttachmentConfig.networkName = uniqueNadName(networkAttachmentConfig.name) + } + return networkAttachmentConfig } func uniqueNadName(originalNetName string) string { @@ -69,7 +76,7 @@ func generateNAD(config networkAttachmentConfig) *nadapi.NetworkAttachmentDefini "vlanID": %d } `, - config.attachmentName(), + config.networkName, config.topology, config.cidr, strings.Join(config.excludeCIDRs, ","), @@ -181,13 +188,15 @@ func connectToServer(clientPodConfig podConfiguration, serverIP string, port int } func newAttachmentConfigWithOverriddenName(name, namespace, networkName, topology, cidr string) networkAttachmentConfig { - return networkAttachmentConfig{ - cidr: cidr, - name: name, - namespace: namespace, - networkName: networkName, - topology: topology, - } + return newNetworkAttachmentConfig( + networkAttachmentConfigParams{ + cidr: cidr, + name: name, + namespace: namespace, + networkName: networkName, + topology: topology, + }, + ) } func configurePodStaticIP(podNamespace string, podName string, staticIP string) error { From d5a53d7b3af5298dd54066da02923dc8490573bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Fri, 9 Jun 2023 10:59:34 +0000 Subject: [PATCH 03/31] e2e multi-homing: fix bridge mapping physnet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bridge mapping was not being generated with the physnet expected by OVN-K, which replaces `-` and `/` with `.` in the network name. Signed-off-by: Jaime Caamaño Ruiz --- test/e2e/localnet-underlay.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/e2e/localnet-underlay.go b/test/e2e/localnet-underlay.go index d09600cbd7..2e54f8d8de 100644 --- a/test/e2e/localnet-underlay.go +++ b/test/e2e/localnet-underlay.go @@ -156,6 +156,8 @@ func defaultNetworkBridgeMapping() BridgeMapping { } func bridgeMapping(physnet, ovsBridge string) BridgeMapping { + physnet = strings.ReplaceAll(physnet, "-", ".") + physnet = strings.ReplaceAll(physnet, "/", ".") return BridgeMapping{ physnet: physnet, ovsBridge: ovsBridge, From 20ebbf20c4e0d8b4975281a2628e4cbbc6246047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Fri, 9 Jun 2023 11:38:13 +0000 Subject: [PATCH 04/31] e2e multi-homing: fix missing underlay for localnet policy tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jaime Caamaño Ruiz --- test/e2e/multihoming.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 48a0a21956..92f4f97b9d 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -716,6 +716,19 @@ var _ = Describe("Multi Homing", func() { serverPodConfig.namespace = f.Namespace.Name + if netConfig.topology == "localnet" { + By("setting up the localnet underlay") + nodes := ovsPods(cs) + Expect(nodes).NotTo(BeEmpty()) + defer func() { + By("tearing down the localnet underlay") + Expect(teardownUnderlay(nodes)).To(Succeed()) + }() + + const secondaryInterfaceName = "eth1" + Expect(setupUnderlay(nodes, secondaryInterfaceName, netConfig)).To(Succeed()) + } + for _, ns := range []v1.Namespace{*f.Namespace, *extraNamespace} { stepInfo := fmt.Sprintf("creating the attachment configuration for namespace %q", ns.Name) By(stepInfo) From 77c9268e2a5062510963e6caf8d25be98a09567b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Mon, 12 Jun 2023 14:45:25 +0000 Subject: [PATCH 05/31] e2e multi-homing: remove --internal from docker localnet overlay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docker and podman do different things with the --internal parameter: - docker installs iptables rules to drop traffic on a different subnet than the bridge and we don't want that. - podman does not set the bridge as default gateway and we want that. So we need it with podman but not with docker. Neither allows us to create a bridge network without IPAM which would be ideal, so perhaps the best option would be a manual setup. Signed-off-by: Jaime Caamaño Ruiz --- contrib/kind.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/contrib/kind.sh b/contrib/kind.sh index c59c81f3a7..8084abd019 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -1168,8 +1168,19 @@ docker_create_second_disconnected_interface() { local bridge_name="${1:-kindexgw}" echo "bridge: $bridge_name" + if [ "${OCI_BIN}" = "podman" ]; then + # docker and podman do different things with the --internal parameter: + # - docker installs iptables rules to drop traffic on a different subnet + # than the bridge and we don't want that. + # - podman does not set the bridge as default gateway and we want that. + # So we need it with podman but not with docker. Neither allows us to create + # a bridge network without IPAM which would be ideal, so perhaps the best + # option would be a manual setup. + local podman_params="--internal" + fi + # Create the network without subnets; ignore if already exists. - "$OCI_BIN" network create --internal --driver=bridge "$bridge_name" || true + "$OCI_BIN" network create --driver=bridge ${podman_params-} "$bridge_name" || true KIND_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}") for n in $KIND_NODES; do @@ -1287,4 +1298,4 @@ if [ "$KIND_INSTALL_METALLB" == true ]; then fi if [ "$KIND_INSTALL_PLUGINS" == true ]; then install_plugins -fi \ No newline at end of file +fi From cf892131363ba648221a07f24836a683bcca5b49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 24 May 2023 17:46:09 +0000 Subject: [PATCH 06/31] Remove cache of logical switch UUID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have the logical switch name as client index so we no longer need to cache the UUID. Signed-off-by: Jaime Caamaño Ruiz --- .../pkg/ovn/base_network_controller.go | 2 +- .../pkg/ovn/base_network_controller_pods.go | 9 +--- ...ase_secondary_layer2_network_controller.go | 2 +- go-controller/pkg/ovn/egressgw_test.go | 42 +++++++-------- go-controller/pkg/ovn/egressip_test.go | 15 ++---- .../pkg/ovn/external_gateway_apb_test.go | 30 +++++------ .../logical_switch_manager.go | 17 +----- .../logical_switch_manager_test.go | 26 ++++----- go-controller/pkg/ovn/multicast_test.go | 4 +- go-controller/pkg/ovn/multipolicy_test.go | 11 ++-- .../pkg/ovn/pod_selector_address_set_test.go | 2 +- go-controller/pkg/ovn/pods_test.go | 54 +++++++++---------- go-controller/pkg/ovn/policy_test.go | 2 +- 13 files changed, 92 insertions(+), 124 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index b89699094b..1b6b770f24 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -387,7 +387,7 @@ func (bnc *BaseNetworkController) createNodeLogicalSwitch(nodeName string, hostS } // Add the switch to the logical switch cache - return bnc.lsManager.AddSwitch(logicalSwitch.Name, logicalSwitch.UUID, hostSubnets) + return bnc.lsManager.AddSwitch(logicalSwitch.Name, hostSubnets) } // UpdateNodeAnnotationWithRetry update node's annotation with the given node annotations. diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 65729bc3b5..5db6606700 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -118,7 +118,6 @@ func (bnc *BaseNetworkController) deleteStaleLogicalSwitchPortsOnSwitches(switch sw := nbdb.LogicalSwitch{ Name: switchName, } - sw.UUID, _ = bnc.lsManager.GetUUID(switchName) ops, err = libovsdbops.DeleteLogicalSwitchPortsWithPredicateOps(bnc.nbClient, ops, &sw, p) if err != nil { @@ -350,12 +349,10 @@ func (bnc *BaseNetworkController) waitForNodeLogicalSwitch(switchName string) (* // is created by the node watch ls := &nbdb.LogicalSwitch{Name: switchName} if err := wait.PollUntilContextTimeout(context.Background(), 30*time.Millisecond, 30*time.Second, true, func(ctx context.Context) (bool, error) { - if lsUUID, ok := bnc.lsManager.GetUUID(switchName); !ok { + if subnets := bnc.lsManager.GetSwitchSubnets(switchName); subnets == nil { return false, fmt.Errorf("error getting logical switch %s: %s", switchName, "switch not in logical switch cache") - } else { - ls.UUID = lsUUID - return true, nil } + return true, nil }); err != nil { return nil, fmt.Errorf("timed out waiting for logical switch in logical switch cache %q subnet: %v", switchName, err) } @@ -851,9 +848,7 @@ func (bnc *BaseNetworkController) getPortAddresses(switchName string, existingLS // delLSPOps returns the ovsdb operations required to delete the given logical switch port (LSP) func (bnc *BaseNetworkController) delLSPOps(logicalPort, switchName, lspUUID string) ([]ovsdb.Operation, error) { - lsUUID, _ := bnc.lsManager.GetUUID(switchName) lsw := nbdb.LogicalSwitch{ - UUID: lsUUID, Name: switchName, } lsp := nbdb.LogicalSwitchPort{ diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index e2112cda32..07ab52c1dd 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -298,7 +298,7 @@ func (oc *BaseSecondaryLayer2NetworkController) InitializeLogicalSwitch(switchNa return nil, fmt.Errorf("failed to create logical switch %+v: %v", logicalSwitch, err) } - if err = oc.lsManager.AddSwitch(switchName, logicalSwitch.UUID, hostSubnets); err != nil { + if err = oc.lsManager.AddSwitch(switchName, hostSubnets); err != nil { return nil, err } diff --git a/go-controller/pkg/ovn/egressgw_test.go b/go-controller/pkg/ovn/egressgw_test.go index 1196b1d671..e5d21de4a6 100644 --- a/go-controller/pkg/ovn/egressgw_test.go +++ b/go-controller/pkg/ovn/egressgw_test.go @@ -107,7 +107,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -244,7 +244,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -390,7 +390,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -555,7 +555,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -717,7 +717,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) config.IPv6Mode = true - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -833,7 +833,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -1043,8 +1043,8 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + t.populateLogicalSwitchCache(fakeOvn) + fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1205,8 +1205,8 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + t.populateLogicalSwitchCache(fakeOvn) + fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1373,8 +1373,8 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + t.populateLogicalSwitchCache(fakeOvn) + fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1542,8 +1542,8 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, apbExternalRouteCRList, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + t.populateLogicalSwitchCache(fakeOvn) + fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1875,8 +1875,8 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + t.populateLogicalSwitchCache(fakeOvn) + fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2008,8 +2008,8 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - fakeOvn.controller.lsManager.AddSwitch("node2", getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2"), []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + t.populateLogicalSwitchCache(fakeOvn) + fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2148,7 +2148,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -2314,7 +2314,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -2479,7 +2479,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 761d40acef..b78c224240 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -3508,9 +3508,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Pod{egressPod1}, }, ) - // we don't know the real switch UUID in the db, but it can be found by name - swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -3780,9 +3778,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - // we don't know the real switch UUID in the db, but it can be found by name - swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) fakeOvn.controller.WatchPods() fakeOvn.controller.WatchEgressIPNamespaces() fakeOvn.controller.WatchEgressIPPods() @@ -4142,11 +4138,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - // we don't know the real switch UUID in the db, but it can be found by name - sw1UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - sw2UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node2.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, sw1UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - fakeOvn.controller.lsManager.AddSwitch(node2.Name, sw2UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddSwitch(node2.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPNamespaces() diff --git a/go-controller/pkg/ovn/external_gateway_apb_test.go b/go-controller/pkg/ovn/external_gateway_apb_test.go index 899de98edb..bfbdd99949 100644 --- a/go-controller/pkg/ovn/external_gateway_apb_test.go +++ b/go-controller/pkg/ovn/external_gateway_apb_test.go @@ -108,7 +108,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -244,7 +244,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -388,7 +388,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -532,7 +532,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -691,7 +691,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -906,7 +906,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, ) config.IPv6Mode = true - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1054,7 +1054,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -1260,7 +1260,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1415,7 +1415,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1570,7 +1570,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1728,7 +1728,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1944,7 +1944,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -2071,7 +2071,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -2199,7 +2199,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() @@ -2476,7 +2476,7 @@ var _ = ginkgo.XDescribe("OVN for APB External Route Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) injectNode(fakeOvn) err := fakeOvn.controller.WatchPods() diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go index 9645996944..b723e220fd 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go @@ -26,8 +26,6 @@ type logicalSwitchInfo struct { hostSubnets []*net.IPNet ipams []ipam.Interface noHostSubnet bool - // the uuid of the logicalSwitch described by this struct - uuid string } type ipamFactoryFunc func(*net.IPNet) (ipam.Interface, error) @@ -40,16 +38,6 @@ type LogicalSwitchManager struct { ipamFunc ipamFactoryFunc } -// GetUUID returns the UUID for the given logical switch name if -func (manager *LogicalSwitchManager) GetUUID(switchName string) (string, bool) { - manager.RLock() - defer manager.RUnlock() - if _, ok := manager.cache[switchName]; !ok { - return "", ok - } - return manager.cache[switchName].uuid, true -} - // NewIPAMAllocator provides an ipam interface which can be used for IPAM // allocations for a given cidr using a contiguous allocation strategy. // It also pre-allocates certain special subnet IPs such as the .1, .2, and .3 @@ -97,7 +85,7 @@ func NewLogicalSwitchManager() *LogicalSwitchManager { // AddSwitch adds/updates a switch to the logical switch manager for subnet // and IPAM management. -func (manager *LogicalSwitchManager) AddSwitch(switchName, uuid string, hostSubnets []*net.IPNet) error { +func (manager *LogicalSwitchManager) AddSwitch(switchName string, hostSubnets []*net.IPNet) error { manager.Lock() defer manager.Unlock() if lsi, ok := manager.cache[switchName]; ok && !reflect.DeepEqual(lsi.hostSubnets, hostSubnets) { @@ -117,7 +105,6 @@ func (manager *LogicalSwitchManager) AddSwitch(switchName, uuid string, hostSubn hostSubnets: hostSubnets, ipams: ipams, noHostSubnet: len(hostSubnets) == 0, - uuid: uuid, } return nil @@ -129,7 +116,7 @@ func (manager *LogicalSwitchManager) AddNoHostSubnetSwitch(switchName string) er // setting the hostSubnets slice argument to nil in the cache means an object // exists for the switch but it was not assigned a hostSubnet by ovn-kubernetes // this will be true for switches created on nodes that are marked as host-subnet only. - return manager.AddSwitch(switchName, "", nil) + return manager.AddSwitch(switchName, nil) } // Remove a switch from the the logical switch manager diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager_test.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager_test.go index 1fdc91f8f8..f72870b437 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager_test.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager_test.go @@ -92,7 +92,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { expectedIPs := []string{"10.1.1.3", "2000::3"} - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ips, err := lsManager.AllocateNextIPs(testNode.switchName) @@ -118,7 +118,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { "2000::/64", }, } - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) allocatedHybridOverlayDRIP, err := lsManager.AllocateHybridOverlay(testNode.switchName, []string{"10.1.1.53"}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -143,7 +143,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { }, } - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) allocatedHybridOverlayDRIP, err := lsManager.AllocateHybridOverlay(testNode.switchName, []string{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -170,9 +170,10 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { }, } - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = lsManager.AllocateIPs(testNode.switchName, []*net.IPNet{ - {net.ParseIP("10.1.1.3").To4(), net.CIDRMask(32, 32)}, + {IP: net.ParseIP("10.1.1.3").To4(), Mask: net.CIDRMask(32, 32)}, }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) allocatedHybridOverlayDRIP, err := lsManager.AllocateHybridOverlay(testNode.switchName, []string{}) @@ -198,7 +199,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { subnets: []string{}, } - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) noHostSubnet := lsManager.IsNonHostSubnetSwitch(testNode.switchName) gomega.Expect(noHostSubnet).To(gomega.BeTrue()) @@ -222,16 +223,17 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { expectedIPs := []string{"10.1.1.3", "2000::3"} - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ips, err := lsManager.AllocateNextIPs(testNode.switchName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) for i, ip := range ips { gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) } testNode.subnets = []string{"10.1.2.0/24"} expectedIPs = []string{"10.1.2.3"} - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ips, err = lsManager.AllocateNextIPs(testNode.switchName) @@ -265,7 +267,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { {"10.1.1.4", "2000::4"}, } - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) for _, expectedIPs := range expectedIPAllocations { ips, err := lsManager.AllocateNextIPs(testNode.switchName) @@ -296,7 +298,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { {"10.1.1.4"}, } - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) for _, expectedIPs := range expectedIPAllocations { ips, err := lsManager.AllocateNextIPs(testNode.switchName) @@ -333,7 +335,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { {"10.1.1.5", "10.1.2.5"}, } - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) // exhaust valid ips in second subnet for _, expectedIPs := range expectedIPAllocations { @@ -381,7 +383,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { "2000::2/64", } allocatedIPNets := ovntest.MustParseIPNets(allocatedIPs...) - err = lsManager.AddSwitch(testNode.switchName, "", ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = lsManager.AllocateIPs(testNode.switchName, allocatedIPNets) klog.Errorf("Error: %v", err) diff --git a/go-controller/pkg/ovn/multicast_test.go b/go-controller/pkg/ovn/multicast_test.go index 485521b57c..90f525fd04 100644 --- a/go-controller/pkg/ovn/multicast_test.go +++ b/go-controller/pkg/ovn/multicast_test.go @@ -595,7 +595,7 @@ var _ = ginkgo.Describe("OVN Multicast with IP Address Family", func() { setIpMode(m) for _, tPod := range tPods { - tPod.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, nodeName)) + tPod.populateLogicalSwitchCache(fakeOvn) } err := fakeOvn.controller.WatchNamespaces() @@ -716,7 +716,7 @@ var _ = ginkgo.Describe("OVN Multicast with IP Address Family", func() { // Create pods for _, tPod := range tPods { - tPod.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, nodeName)) + tPod.populateLogicalSwitchCache(fakeOvn) _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(tPod.namespace).Create(context.TODO(), newPod( tPod.namespace, tPod.podName, tPod.nodeName, tPod.podIP), metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) diff --git a/go-controller/pkg/ovn/multipolicy_test.go b/go-controller/pkg/ovn/multipolicy_test.go index 2f339f5634..3067139407 100644 --- a/go-controller/pkg/ovn/multipolicy_test.go +++ b/go-controller/pkg/ovn/multipolicy_test.go @@ -141,16 +141,13 @@ func (p testPod) populateSecondaryNetworkLogicalSwitchCache(fakeOvn *FakeOVN, oc switch ocInfo.bnc.TopologyType() { case ovntypes.Layer3Topology: podInfo := p.secondaryPodInfos[ocInfo.bnc.GetNetworkName()] - uuid := getLogicalSwitchUUID(fakeOvn.controller.nbClient, ocInfo.bnc.GetNetworkScopedName(p.nodeName)) - err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(p.nodeName), uuid, []*net.IPNet{ovntest.MustParseIPNet(podInfo.nodeSubnet)}) + err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(p.nodeName), []*net.IPNet{ovntest.MustParseIPNet(podInfo.nodeSubnet)}) case ovntypes.Layer2Topology: - uuid := getLogicalSwitchUUID(fakeOvn.controller.nbClient, ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch)) subnet := ocInfo.bnc.Subnets()[0] - err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch), uuid, []*net.IPNet{subnet.CIDR}) + err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch), []*net.IPNet{subnet.CIDR}) case ovntypes.LocalnetTopology: - uuid := getLogicalSwitchUUID(fakeOvn.controller.nbClient, ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch)) subnet := ocInfo.bnc.Subnets()[0] - err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLocalnetSwitch), uuid, []*net.IPNet{subnet.CIDR}) + err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLocalnetSwitch), []*net.IPNet{subnet.CIDR}) } gomega.Expect(err).NotTo(gomega.HaveOccurred()) } @@ -344,7 +341,7 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { gomega.Expect(ok).To(gomega.Equal(true)) for _, testPod := range pods { - testPod.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, nodeName)) + testPod.populateLogicalSwitchCache(fakeOvn) } var err error if namespaces != nil { diff --git a/go-controller/pkg/ovn/pod_selector_address_set_test.go b/go-controller/pkg/ovn/pod_selector_address_set_test.go index 3295a9016c..3d92fb1889 100644 --- a/go-controller/pkg/ovn/pod_selector_address_set_test.go +++ b/go-controller/pkg/ovn/pod_selector_address_set_test.go @@ -81,7 +81,7 @@ var _ = ginkgo.Describe("OVN PodSelectorAddressSet", func() { }, ) for _, testPod := range pods { - testPod.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, nodeName)) + testPod.populateLogicalSwitchCache(fakeOvn) } var err error if namespaces != nil { diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index dbda9c9be2..a79bfe3a9d 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -202,9 +202,9 @@ func newTPod(nodeName, nodeSubnet, nodeMgtIP, nodeGWIP, podName, podIP, podMAC, return to } -func (p testPod) populateLogicalSwitchCache(fakeOvn *FakeOVN, uuid string) { +func (p testPod) populateLogicalSwitchCache(fakeOvn *FakeOVN) { gomega.Expect(p.nodeName).NotTo(gomega.Equal("")) - err := fakeOvn.controller.lsManager.AddSwitch(p.nodeName, uuid, []*net.IPNet{ovntest.MustParseIPNet(p.nodeSubnet)}) + err := fakeOvn.controller.lsManager.AddSwitch(p.nodeName, []*net.IPNet{ovntest.MustParseIPNet(p.nodeSubnet)}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } @@ -404,7 +404,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { // Assign it and perform the update t.nodeName = "node1" t.portName = util.GetLogicalPortName(t.namespace, t.podName) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Update(context.TODO(), newPod(t.namespace, t.podName, t.nodeName, t.podIP), metav1.UpdateOptions{}) @@ -452,7 +452,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -507,7 +507,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -619,7 +619,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -793,7 +793,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -841,7 +841,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -916,7 +916,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - podTest.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + podTest.populateLogicalSwitchCache(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) fakeOvn.asf.ExpectAddressSetWithIPs(podTest.namespace, []string{podTest.podIP}) @@ -992,7 +992,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - podTest.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + podTest.populateLogicalSwitchCache(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -1071,7 +1071,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - podTest.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + podTest.populateLogicalSwitchCache(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1183,9 +1183,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - podTest.populateLogicalSwitchCache( - fakeOvn, - getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + podTest.populateLogicalSwitchCache(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -1296,7 +1294,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - podTest.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + podTest.populateLogicalSwitchCache(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -1427,7 +1425,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1488,7 +1486,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1544,7 +1542,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -1603,7 +1601,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) // pod exists, networks annotations don't pod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Get(context.TODO(), t.podName, metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1663,7 +1661,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, }, ) - t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t.populateLogicalSwitchCache(fakeOvn) // pod annotations exist, lsp doesn't annotations := getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) gomega.Expect(annotations).To(gomega.MatchJSON(t.getAnnotationsJson())) @@ -1733,9 +1731,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - // we don't know the real switch UUID in the db, but it can be found by name - swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, testNode.Name) - fakeOvn.controller.lsManager.AddSwitch(testNode.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddSwitch(testNode.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -1889,9 +1885,9 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, }, ) - t1.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) - t2.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node2")) - t3.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t1.populateLogicalSwitchCache(fakeOvn) + t2.populateLogicalSwitchCache(fakeOvn) + t3.populateLogicalSwitchCache(fakeOvn) // pod annotations and lsp exist now err := fakeOvn.controller.WatchNamespaces() @@ -1966,9 +1962,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { Items: []v1.Pod{}, }, ) - // we don't know the real switch UUID in the db, but it can be found by name - swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, testNodeWithLS.Name) - fakeOvn.controller.lsManager.AddSwitch(testNodeWithLS.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddSwitch(testNodeWithLS.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) // expect stale logical switch port removed if reconciliation is successful @@ -2051,7 +2045,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, }, ) - t1.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + t1.populateLogicalSwitchCache(fakeOvn) // pod annotations and lsp exist now err := fakeOvn.controller.WatchNamespaces() diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index 467df8e662..aa0b09ae85 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -681,7 +681,7 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { }, ) for _, testPod := range pods { - testPod.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, nodeName)) + testPod.populateLogicalSwitchCache(fakeOvn) } var err error if namespaces != nil { From 05a522cdfc7f89ffe9126c5a876def411cd59ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 24 May 2023 18:26:25 +0000 Subject: [PATCH 07/31] Move ipallocator pkg out of the ovn pkg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ipallocator package will be extensively used from cluster manager so it makes sense for it to be a top level package rather than sit in the ovn package. Signed-off-by: Jaime Caamaño Ruiz --- .../pkg/clustermanager/id_allocator.go | 2 +- .../pkg/{ovn => }/ipallocator/allocator.go | 14 +++++++++- .../{ovn => }/ipallocator/allocator/bitmap.go | 0 .../ipallocator/allocator/bitmap_test.go | 0 .../ipallocator/allocator/interfaces.go | 0 .../{ovn => }/ipallocator/allocator/utils.go | 0 .../ipallocator/allocator/utils_test.go | 0 .../{ovn => }/ipallocator/allocator_test.go | 27 +++++++++++++++++++ .../pkg/ovn/base_network_controller_pods.go | 2 +- .../logical_switch_manager.go | 7 +++-- go-controller/pkg/ovn/pods_test.go | 2 +- 11 files changed, 46 insertions(+), 8 deletions(-) rename go-controller/pkg/{ovn => }/ipallocator/allocator.go (93%) rename go-controller/pkg/{ovn => }/ipallocator/allocator/bitmap.go (100%) rename go-controller/pkg/{ovn => }/ipallocator/allocator/bitmap_test.go (100%) rename go-controller/pkg/{ovn => }/ipallocator/allocator/interfaces.go (100%) rename go-controller/pkg/{ovn => }/ipallocator/allocator/utils.go (100%) rename go-controller/pkg/{ovn => }/ipallocator/allocator/utils_test.go (100%) rename go-controller/pkg/{ovn => }/ipallocator/allocator_test.go (90%) diff --git a/go-controller/pkg/clustermanager/id_allocator.go b/go-controller/pkg/clustermanager/id_allocator.go index 4de49baabb..4592366086 100644 --- a/go-controller/pkg/clustermanager/id_allocator.go +++ b/go-controller/pkg/clustermanager/id_allocator.go @@ -4,7 +4,7 @@ import ( "fmt" "sync" - bitmapallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator/allocator" + bitmapallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator/allocator" ) const ( diff --git a/go-controller/pkg/ovn/ipallocator/allocator.go b/go-controller/pkg/ipallocator/allocator.go similarity index 93% rename from go-controller/pkg/ovn/ipallocator/allocator.go rename to go-controller/pkg/ipallocator/allocator.go index 849f15f048..56f8c5fded 100644 --- a/go-controller/pkg/ovn/ipallocator/allocator.go +++ b/go-controller/pkg/ipallocator/allocator.go @@ -22,7 +22,7 @@ import ( "math/big" "net" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator/allocator" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator/allocator" utilnet "k8s.io/utils/net" ) @@ -35,6 +35,7 @@ type Interface interface { ForEach(func(net.IP)) CIDR() net.IPNet Has(ip net.IP) bool + Reserved(ip net.IP) bool } var ( @@ -199,6 +200,17 @@ func (r *Range) Has(ip net.IP) bool { return r.alloc.Has(offset) } +// Reserved returns true if the provided IP can't be allocated. This is *only* +// true for the network and broadcast addresses. +func (r *Range) Reserved(ip net.IP) bool { + if !r.net.Contains(ip) { + return false + } + + offset := calculateIPOffset(r.base, ip) + return offset == -1 || offset == r.max +} + // contains returns true and the offset if the ip is in the range, and false // and nil otherwise. The first and last addresses of the CIDR are omitted. func (r *Range) contains(ip net.IP) (bool, int) { diff --git a/go-controller/pkg/ovn/ipallocator/allocator/bitmap.go b/go-controller/pkg/ipallocator/allocator/bitmap.go similarity index 100% rename from go-controller/pkg/ovn/ipallocator/allocator/bitmap.go rename to go-controller/pkg/ipallocator/allocator/bitmap.go diff --git a/go-controller/pkg/ovn/ipallocator/allocator/bitmap_test.go b/go-controller/pkg/ipallocator/allocator/bitmap_test.go similarity index 100% rename from go-controller/pkg/ovn/ipallocator/allocator/bitmap_test.go rename to go-controller/pkg/ipallocator/allocator/bitmap_test.go diff --git a/go-controller/pkg/ovn/ipallocator/allocator/interfaces.go b/go-controller/pkg/ipallocator/allocator/interfaces.go similarity index 100% rename from go-controller/pkg/ovn/ipallocator/allocator/interfaces.go rename to go-controller/pkg/ipallocator/allocator/interfaces.go diff --git a/go-controller/pkg/ovn/ipallocator/allocator/utils.go b/go-controller/pkg/ipallocator/allocator/utils.go similarity index 100% rename from go-controller/pkg/ovn/ipallocator/allocator/utils.go rename to go-controller/pkg/ipallocator/allocator/utils.go diff --git a/go-controller/pkg/ovn/ipallocator/allocator/utils_test.go b/go-controller/pkg/ipallocator/allocator/utils_test.go similarity index 100% rename from go-controller/pkg/ovn/ipallocator/allocator/utils_test.go rename to go-controller/pkg/ipallocator/allocator/utils_test.go diff --git a/go-controller/pkg/ovn/ipallocator/allocator_test.go b/go-controller/pkg/ipallocator/allocator_test.go similarity index 90% rename from go-controller/pkg/ovn/ipallocator/allocator_test.go rename to go-controller/pkg/ipallocator/allocator_test.go index 65b98179ab..75fa789604 100644 --- a/go-controller/pkg/ovn/ipallocator/allocator_test.go +++ b/go-controller/pkg/ipallocator/allocator_test.go @@ -258,3 +258,30 @@ func TestForEach(t *testing.T) { } } } + +func TestReserved(t *testing.T) { + _, cidr, err := net.ParseCIDR("192.168.1.0/24") + if err != nil { + t.Fatal(err) + } + r, err := NewCIDRRange(cidr) + if err != nil { + t.Fatal(err) + } + + if !r.Reserved(net.ParseIP("192.168.1.0")) { + t.Errorf("should be a reserved address: %s", "192.168.1.0") + } + + if !r.Reserved(net.ParseIP("192.168.1.255")) { + t.Errorf("should be a reserved address: %s", "192.168.1.255") + } + + if r.Reserved(net.ParseIP("192.168.1.1")) { + t.Errorf("should not be a reserved address: %s", "192.168.1.1") + } + + if r.Reserved(net.ParseIP("192.168.1.254")) { + t.Errorf("should not be a reserved address: %s", "192.168.1.254") + } +} diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 5db6606700..f464040150 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -9,7 +9,7 @@ import ( nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator" logicalswitchmanager "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go index b723e220fd..60a9aac41e 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go @@ -8,9 +8,8 @@ import ( "reflect" "sync" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator" - ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator/allocator" + ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator/allocator" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" @@ -281,7 +280,7 @@ func (manager *LogicalSwitchManager) AllocateHybridOverlay(switchName string, hy // attempt to allocate the IP address that is annotated on the node. The only way there would be a collision is if the annotations of podIP or hybridOverlayDRIP // where manually edited and we do not support that err := manager.AllocateIPs(switchName, allocateAddresses) - if err != nil && err != ipallocator.ErrAllocated { + if err != nil && err != ipam.ErrAllocated { return nil, err } return allocateAddresses, nil diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index a79bfe3a9d..4c0c4060cd 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -11,8 +11,8 @@ import ( "github.com/urfave/cli/v2" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/ipallocator" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" From aec6733be8477ce8f7601038bcca97260050a251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 24 May 2023 19:07:28 +0000 Subject: [PATCH 08/31] Re-structure ipallocator package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move pkg/ipallocator to pkg/allocator/ip Move pkg/ipallocator/allocator to pkg/allocator/bitmap This makes sense since other components are using the allocator package without it actually being associated to allocating IPs. It also facilitates proper placement of a future subnet IP allocator. Had to fix an issue in hack/test-go.sh where it was using a recursive grep to find ginkgo references and pass ginkgo arguments. This grep should not be recursive, otherwise it will pass ginkgo arguments if a subpackage has ginkgo references but not the root package which was the case for pkg/allocater after this change. Signed-off-by: Jaime Caamaño Ruiz --- go-controller/hack/test-go.sh | 2 +- .../pkg/{ipallocator/allocator => allocator/bitmap}/bitmap.go | 2 +- .../allocator => allocator/bitmap}/bitmap_test.go | 2 +- .../{ipallocator/allocator => allocator/bitmap}/interfaces.go | 2 +- .../pkg/{ipallocator/allocator => allocator/bitmap}/utils.go | 2 +- .../{ipallocator/allocator => allocator/bitmap}/utils_test.go | 2 +- go-controller/pkg/{ipallocator => allocator/ip}/allocator.go | 4 ++-- .../pkg/{ipallocator => allocator/ip}/allocator_test.go | 2 +- go-controller/pkg/clustermanager/id_allocator.go | 2 +- go-controller/pkg/ovn/base_network_controller_pods.go | 2 +- .../pkg/ovn/logical_switch_manager/logical_switch_manager.go | 4 ++-- go-controller/pkg/ovn/pods_test.go | 2 +- 12 files changed, 14 insertions(+), 14 deletions(-) rename go-controller/pkg/{ipallocator/allocator => allocator/bitmap}/bitmap.go (99%) rename go-controller/pkg/{ipallocator/allocator => allocator/bitmap}/bitmap_test.go (99%) rename go-controller/pkg/{ipallocator/allocator => allocator/bitmap}/interfaces.go (98%) rename go-controller/pkg/{ipallocator/allocator => allocator/bitmap}/utils.go (98%) rename go-controller/pkg/{ipallocator/allocator => allocator/bitmap}/utils_test.go (98%) rename go-controller/pkg/{ipallocator => allocator/ip}/allocator.go (98%) rename go-controller/pkg/{ipallocator => allocator/ip}/allocator_test.go (99%) diff --git a/go-controller/hack/test-go.sh b/go-controller/hack/test-go.sh index 241e1a972d..b5ee712a61 100755 --- a/go-controller/hack/test-go.sh +++ b/go-controller/hack/test-go.sh @@ -59,7 +59,7 @@ function testrun { echo "Increasing timeout to 20m for package ${pkg}" args="${args} -test.timeout=20m" fi - if grep -q -r "ginkgo" ."${path}"; then + if grep -q "ginkgo" ."${path}"/*_test.go; then prefix=$(echo "${path}" | cut -c 2- | sed 's,/,_,g') ginkgoargs="-ginkgo.v ${ginkgo_focus} -ginkgo.reportFile ${TEST_REPORT_DIR}/junit-${prefix}.xml" fi diff --git a/go-controller/pkg/ipallocator/allocator/bitmap.go b/go-controller/pkg/allocator/bitmap/bitmap.go similarity index 99% rename from go-controller/pkg/ipallocator/allocator/bitmap.go rename to go-controller/pkg/allocator/bitmap/bitmap.go index 397dafed1b..7b90ee977f 100644 --- a/go-controller/pkg/ipallocator/allocator/bitmap.go +++ b/go-controller/pkg/allocator/bitmap/bitmap.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package allocator +package bitmap import ( "errors" diff --git a/go-controller/pkg/ipallocator/allocator/bitmap_test.go b/go-controller/pkg/allocator/bitmap/bitmap_test.go similarity index 99% rename from go-controller/pkg/ipallocator/allocator/bitmap_test.go rename to go-controller/pkg/allocator/bitmap/bitmap_test.go index ad57752bf2..a0904e129f 100644 --- a/go-controller/pkg/ipallocator/allocator/bitmap_test.go +++ b/go-controller/pkg/allocator/bitmap/bitmap_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package allocator +package bitmap import ( "testing" diff --git a/go-controller/pkg/ipallocator/allocator/interfaces.go b/go-controller/pkg/allocator/bitmap/interfaces.go similarity index 98% rename from go-controller/pkg/ipallocator/allocator/interfaces.go rename to go-controller/pkg/allocator/bitmap/interfaces.go index b078f38075..6072b34086 100644 --- a/go-controller/pkg/ipallocator/allocator/interfaces.go +++ b/go-controller/pkg/allocator/bitmap/interfaces.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package allocator +package bitmap // Interface manages the allocation of items out of a range. Interface // should be threadsafe. diff --git a/go-controller/pkg/ipallocator/allocator/utils.go b/go-controller/pkg/allocator/bitmap/utils.go similarity index 98% rename from go-controller/pkg/ipallocator/allocator/utils.go rename to go-controller/pkg/allocator/bitmap/utils.go index 4691f57a15..766fc28eee 100644 --- a/go-controller/pkg/ipallocator/allocator/utils.go +++ b/go-controller/pkg/allocator/bitmap/utils.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package allocator +package bitmap import "math/big" diff --git a/go-controller/pkg/ipallocator/allocator/utils_test.go b/go-controller/pkg/allocator/bitmap/utils_test.go similarity index 98% rename from go-controller/pkg/ipallocator/allocator/utils_test.go rename to go-controller/pkg/allocator/bitmap/utils_test.go index 5f87cb1831..fb47dc5a84 100644 --- a/go-controller/pkg/ipallocator/allocator/utils_test.go +++ b/go-controller/pkg/allocator/bitmap/utils_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package allocator +package bitmap import ( "math/big" diff --git a/go-controller/pkg/ipallocator/allocator.go b/go-controller/pkg/allocator/ip/allocator.go similarity index 98% rename from go-controller/pkg/ipallocator/allocator.go rename to go-controller/pkg/allocator/ip/allocator.go index 56f8c5fded..936027cdf6 100644 --- a/go-controller/pkg/ipallocator/allocator.go +++ b/go-controller/pkg/allocator/ip/allocator.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ipallocator +package ip import ( "errors" @@ -22,7 +22,7 @@ import ( "math/big" "net" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator/allocator" + allocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/bitmap" utilnet "k8s.io/utils/net" ) diff --git a/go-controller/pkg/ipallocator/allocator_test.go b/go-controller/pkg/allocator/ip/allocator_test.go similarity index 99% rename from go-controller/pkg/ipallocator/allocator_test.go rename to go-controller/pkg/allocator/ip/allocator_test.go index 75fa789604..8aff2b4cbe 100644 --- a/go-controller/pkg/ipallocator/allocator_test.go +++ b/go-controller/pkg/allocator/ip/allocator_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ipallocator +package ip import ( "net" diff --git a/go-controller/pkg/clustermanager/id_allocator.go b/go-controller/pkg/clustermanager/id_allocator.go index 4592366086..62be425f03 100644 --- a/go-controller/pkg/clustermanager/id_allocator.go +++ b/go-controller/pkg/clustermanager/id_allocator.go @@ -4,7 +4,7 @@ import ( "fmt" "sync" - bitmapallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator/allocator" + bitmapallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/bitmap" ) const ( diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index f464040150..606c7998b1 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -8,8 +8,8 @@ import ( "time" nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator" logicalswitchmanager "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go index 60a9aac41e..378b591a69 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go @@ -8,8 +8,8 @@ import ( "reflect" "sync" - ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator/allocator" + allocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/bitmap" + ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index 4c0c4060cd..9040d01d8d 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -10,8 +10,8 @@ import ( "github.com/urfave/cli/v2" + ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ipallocator" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" From 15d7c7ac0349356350c92cae76391c65bee4e7be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Thu, 25 May 2023 10:16:47 +0000 Subject: [PATCH 09/31] Add a subnet ip allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move most of the logical switch manager functionality to an independant subnet ip allocator so it can be used from different places without it being associated with the concept of switches or nodes. Logical switch manager retains the functionality of reserving the gateway, management and hybrid overlay IPs for every allocated subnet. Everything is functionally equivalent except when reserving the hybrid overlay IP. An issue was fixed where it could be releasing an IP it had not allocated in the first place. Signed-off-by: Jaime Caamaño Ruiz --- .../pkg/allocator/ip/subnet/allocator.go | 292 +++++++++++++ .../pkg/allocator/ip/subnet/allocator_test.go | 195 +++++++++ .../pkg/ovn/base_network_controller.go | 2 +- .../pkg/ovn/base_network_controller_pods.go | 2 +- ...ase_secondary_layer2_network_controller.go | 2 +- go-controller/pkg/ovn/egressgw_test.go | 12 +- go-controller/pkg/ovn/egressip_test.go | 8 +- .../logical_switch_manager.go | 393 +++--------------- .../logical_switch_manager_test.go | 265 +----------- go-controller/pkg/ovn/multipolicy_test.go | 6 +- go-controller/pkg/ovn/pods_test.go | 6 +- go-controller/pkg/util/net.go | 15 + go-controller/pkg/util/util.go | 10 + 13 files changed, 601 insertions(+), 607 deletions(-) create mode 100644 go-controller/pkg/allocator/ip/subnet/allocator.go create mode 100644 go-controller/pkg/allocator/ip/subnet/allocator_test.go diff --git a/go-controller/pkg/allocator/ip/subnet/allocator.go b/go-controller/pkg/allocator/ip/subnet/allocator.go new file mode 100644 index 0000000000..469144703f --- /dev/null +++ b/go-controller/pkg/allocator/ip/subnet/allocator.go @@ -0,0 +1,292 @@ +package subnet + +import ( + "errors" + "fmt" + "net" + "reflect" + "sync" + + bitmapallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/bitmap" + ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "k8s.io/klog/v2" +) + +// Allocator manages the allocation of IP within specific set of subnets +// identified by a name. Allocator should be threadsafe. +type Allocator interface { + AddOrUpdateSubnet(name string, subnets []*net.IPNet) error + DeleteSubnet(name string) + GetSubnets(name string) ([]*net.IPNet, error) + AllocateUntilFull(name string) error + AllocateIPs(name string, ips []*net.IPNet) error + AllocateNextIPs(name string) ([]*net.IPNet, error) + ReleaseIPs(name string, ips []*net.IPNet) error + ConditionalIPRelease(name string, ips []*net.IPNet, predicate func() (bool, error)) (bool, error) +} + +// ErrSubnetNotFound is used to inform the subnet is not being managed +var ErrSubnetNotFound = errors.New("subnet not found") + +// subnetInfo contains information corresponding to the subnet. It holds the +// allocations (v4 and v6) as well as the IPAM allocator instances for each +// of the managed subnets +type subnetInfo struct { + subnets []*net.IPNet + ipams []ipallocator.Interface +} + +type ipamFactoryFunc func(*net.IPNet) (ipallocator.Interface, error) + +// allocator provides IPAM for different sets of subnets. Each set is +// identified with a subnet name. +type allocator struct { + cache map[string]subnetInfo + // A RW mutex which holds subnet information + sync.RWMutex + ipamFunc ipamFactoryFunc +} + +// newIPAMAllocator provides an ipam interface which can be used for IPAM +// allocations for a given cidr using a contiguous allocation strategy. +// It also pre-allocates certain special subnet IPs such as the .1, .2, and .3 +// addresses as reserved. +func newIPAMAllocator(cidr *net.IPNet) (ipallocator.Interface, error) { + return ipallocator.NewAllocatorCIDRRange(cidr, func(max int, rangeSpec string) (bitmapallocator.Interface, error) { + return bitmapallocator.NewRoundRobinAllocationMap(max, rangeSpec), nil + }) +} + +// Initializes a new subnet IP allocator +func NewAllocator() *allocator { + return &allocator{ + cache: make(map[string]subnetInfo), + RWMutex: sync.RWMutex{}, + ipamFunc: newIPAMAllocator, + } +} + +// AddOrUpdateSubnet set to the allocator for IPAM management, or update it. +func (allocator *allocator) AddOrUpdateSubnet(name string, subnets []*net.IPNet) error { + allocator.Lock() + defer allocator.Unlock() + if subnetInfo, ok := allocator.cache[name]; ok && !reflect.DeepEqual(subnetInfo.subnets, subnets) { + klog.Warningf("Replacing subnets %v with %v for %s", util.StringSlice(subnetInfo.subnets), util.StringSlice(subnets), name) + } + var ipams []ipallocator.Interface + for _, subnet := range subnets { + ipam, err := allocator.ipamFunc(subnet) + if err != nil { + return fmt.Errorf("failed to initialize IPAM of subnet %s for %s: %w", subnet, name, err) + } + ipams = append(ipams, ipam) + } + allocator.cache[name] = subnetInfo{ + subnets: subnets, + ipams: ipams, + } + + return nil +} + +// DeleteSubnet from the allocator +func (allocator *allocator) DeleteSubnet(name string) { + allocator.Lock() + defer allocator.Unlock() + delete(allocator.cache, name) +} + +// GetSubnets of a given subnet set +func (allocator *allocator) GetSubnets(name string) ([]*net.IPNet, error) { + allocator.RLock() + defer allocator.RUnlock() + subnetInfo, ok := allocator.cache[name] + // make a deep-copy of the underlying slice and return so that there is no + // resource contention + if ok { + subnets := make([]*net.IPNet, len(subnetInfo.subnets)) + for i, subnet := range subnetInfo.subnets { + subnet := *subnet + subnets[i] = &subnet + } + return subnets, nil + } + return nil, ErrSubnetNotFound +} + +// AllocateUntilFull used for unit testing only, allocates the rest of the subnet +func (allocator *allocator) AllocateUntilFull(name string) error { + allocator.RLock() + defer allocator.RUnlock() + subnetInfo, ok := allocator.cache[name] + if !ok { + return fmt.Errorf("failed to allocate IPs for subnet %s: %w", name, ErrSubnetNotFound) + } else if len(subnetInfo.ipams) == 0 { + return fmt.Errorf("failed to allocate IPs for subnet %s: has no IPAM", name) + } + var err error + for err != ipallocator.ErrFull { + for _, ipam := range subnetInfo.ipams { + _, err = ipam.AllocateNext() + } + } + return nil +} + +// AllocateIPs will block off IPs in the ipnets slice as already allocated +// for a given subnet set +func (allocator *allocator) AllocateIPs(name string, ips []*net.IPNet) error { + if len(ips) == 0 { + return fmt.Errorf("failed to allocate IPs for %s: no IPs provided", name) + } + allocator.RLock() + defer allocator.RUnlock() + subnetInfo, ok := allocator.cache[name] + if !ok { + return fmt.Errorf("failed to allocate IPs %v for %s: %w", util.StringSlice(ips), name, ErrSubnetNotFound) + } else if len(subnetInfo.ipams) == 0 { + return fmt.Errorf("failed to allocate IPs %v for subnet %s: has no IPAM", util.StringSlice(ips), name) + } + + var err error + allocated := make(map[int]*net.IPNet) + defer func() { + if err != nil { + // iterate over range of already allocated indices and release + // ips allocated before the error occurred. + for relIdx, relIPNet := range allocated { + subnetInfo.ipams[relIdx].Release(relIPNet.IP) + if relIPNet.IP != nil { + klog.Warningf("Reserved IP %s was released for %s", relIPNet.IP, name) + } + } + } + }() + + for _, ipnet := range ips { + for idx, ipam := range subnetInfo.ipams { + cidr := ipam.CIDR() + if cidr.Contains(ipnet.IP) { + if _, ok = allocated[idx]; ok { + err = fmt.Errorf("failed to allocate IP %s for %s: attempted to reserve multiple IPs in the same IPAM instance", ipnet.IP, name) + return err + } + if err = ipam.Allocate(ipnet.IP); err != nil { + return err + } + allocated[idx] = ipnet + break + } + } + } + return nil +} + +// AllocateNextIPs allocates IP addresses from the given subnet set +func (allocator *allocator) AllocateNextIPs(name string) ([]*net.IPNet, error) { + allocator.RLock() + defer allocator.RUnlock() + var ipnets []*net.IPNet + var ip net.IP + var err error + subnetInfo, ok := allocator.cache[name] + + if !ok { + return nil, fmt.Errorf("failed to allocate new IPs for %s: %w", name, ErrSubnetNotFound) + } + + if len(subnetInfo.ipams) == 0 { + return nil, fmt.Errorf("failed to allocate new IPs for %s: has no IPAM", name) + } + + if len(subnetInfo.ipams) != len(subnetInfo.subnets) { + return nil, fmt.Errorf("failed to allocate new IPs for %s: number of subnets %d"+ + " don't match number of ipam instances %d", name, len(subnetInfo.subnets), len(subnetInfo.ipams)) + } + + defer func() { + if err != nil { + // iterate over range of already allocated indices and release + // ips allocated before the error occurred. + for relIdx, relIPNet := range ipnets { + subnetInfo.ipams[relIdx].Release(relIPNet.IP) + if relIPNet.IP != nil { + klog.Warningf("Reserved IP %s was released for %s", relIPNet.IP, name) + } + } + } + }() + + for idx, ipam := range subnetInfo.ipams { + ip, err = ipam.AllocateNext() + if err != nil { + return nil, err + } + ipnet := &net.IPNet{ + IP: ip, + Mask: subnetInfo.subnets[idx].Mask, + } + ipnets = append(ipnets, ipnet) + } + return ipnets, nil +} + +// ReleaseIPs marks the IPs in ipnets slice as available for allocation by +// releasing them from the IPAM pool of allocated IPs of the given subnet set. +// If there aren't IPs to release the method does not return an error. +func (allocator *allocator) ReleaseIPs(name string, ips []*net.IPNet) error { + allocator.RLock() + defer allocator.RUnlock() + if ips == nil || name == "" { + return nil + } + subnetInfo, ok := allocator.cache[name] + if !ok { + return fmt.Errorf("failed to release ips for %s: %w", name, ErrSubnetNotFound) + } + + for _, ipnet := range ips { + for _, ipam := range subnetInfo.ipams { + cidr := ipam.CIDR() + if cidr.Contains(ipnet.IP) { + ipam.Release(ipnet.IP) + break + } + } + } + return nil +} + +// ConditionalIPRelease determines if any IP is available to be released from an IPAM conditionally if func is true. +// It guarantees state of the allocator will not change while executing the predicate function +// TODO(trozet): add unit testing for this function +func (allocator *allocator) ConditionalIPRelease(name string, ips []*net.IPNet, predicate func() (bool, error)) (bool, error) { + allocator.RLock() + defer allocator.RUnlock() + if ips == nil || name == "" { + return false, nil + } + subnetInfo, ok := allocator.cache[name] + if !ok { + return false, nil + } + if len(subnetInfo.ipams) == 0 { + return false, nil + } + + // check if ipam has one of the ip addresses, and then execute the predicate function to determine + // if this IP should be released or not + for _, ipnet := range ips { + for _, ipam := range subnetInfo.ipams { + cidr := ipam.CIDR() + if cidr.Contains(ipnet.IP) { + if ipam.Has(ipnet.IP) { + return predicate() + } + } + } + } + + return false, nil +} diff --git a/go-controller/pkg/allocator/ip/subnet/allocator_test.go b/go-controller/pkg/allocator/ip/subnet/allocator_test.go new file mode 100644 index 0000000000..c6b5ae87b9 --- /dev/null +++ b/go-controller/pkg/allocator/ip/subnet/allocator_test.go @@ -0,0 +1,195 @@ +package subnet + +import ( + "testing" + + ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" +) + +var _ = ginkgo.Describe("Subnet IP allocator operations", func() { + var ( + allocator Allocator + ) + + ginkgo.BeforeEach(func() { + allocator = NewAllocator() + }) + + ginkgo.Context("when adding subnets", func() { + ginkgo.It("creates each IPAM and reserves IPs correctly", func() { + subnetName := "subnet1" + subnets := []string{ + "10.1.1.0/24", + "2000::/64", + } + + expectedIPs := []string{"10.1.1.1", "2000::1"} + + err := allocator.AddOrUpdateSubnet(subnetName, ovntest.MustParseIPNets(subnets...)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ips, err := allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for i, ip := range ips { + gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) + } + }) + + ginkgo.It("handles updates to the subnets correctly", func() { + subnetName := "subnet1" + subnets := []string{ + "10.1.1.0/24", + "2000::/64", + } + + expectedIPs := []string{"10.1.1.1", "2000::1"} + + err := allocator.AddOrUpdateSubnet(subnetName, ovntest.MustParseIPNets(subnets...)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ips, err := allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for i, ip := range ips { + gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) + } + subnets = []string{"10.1.2.0/24"} + expectedIPs = []string{"10.1.2.1"} + err = allocator.AddOrUpdateSubnet(subnetName, ovntest.MustParseIPNets(subnets...)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ips, err = allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for i, ip := range ips { + gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) + } + }) + }) + + ginkgo.Context("when allocating IP addresses", func() { + ginkgo.It("IPAM for each subnet allocates IPs contiguously", func() { + subnetName := "subnet1" + subnets := []string{ + "10.1.1.0/24", + "2000::/64", + } + + expectedIPAllocations := [][]string{ + {"10.1.1.1", "2000::1"}, + {"10.1.1.2", "2000::2"}, + } + + err := allocator.AddOrUpdateSubnet(subnetName, ovntest.MustParseIPNets(subnets...)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for _, expectedIPs := range expectedIPAllocations { + ips, err := allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for i, ip := range ips { + gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) + } + } + }) + + ginkgo.It("IPAM allocates, releases, and reallocates IPs correctly", func() { + subnetName := "subnet1" + subnets := []string{ + "10.1.1.0/24", + } + + expectedIPAllocations := [][]string{ + {"10.1.1.1"}, + {"10.1.1.2"}, + } + + err := allocator.AddOrUpdateSubnet(subnetName, ovntest.MustParseIPNets(subnets...)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for _, expectedIPs := range expectedIPAllocations { + ips, err := allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for i, ip := range ips { + gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) + } + err = allocator.ReleaseIPs(subnetName, ips) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = allocator.AllocateIPs(subnetName, ips) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + }) + + ginkgo.It("releases IPs for other subnets when any other subnet allocation fails", func() { + subnetName := "subnet1" + subnets := []string{ + "10.1.1.0/24", + "10.1.2.0/29", + } + + expectedIPAllocations := [][]string{ + {"10.1.1.1", "10.1.2.1"}, + {"10.1.1.2", "10.1.2.2"}, + {"10.1.1.3", "10.1.2.3"}, + {"10.1.1.4", "10.1.2.4"}, + {"10.1.1.5", "10.1.2.5"}, + } + + err := allocator.AddOrUpdateSubnet(subnetName, ovntest.MustParseIPNets(subnets...)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // exhaust valid ips in second subnet + for _, expectedIPs := range expectedIPAllocations { + ips, err := allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for i, ip := range ips { + gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) + } + } + ips, err := allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + expectedIPAllocation := [][]string{ + {"10.1.1.6", "10.1.2.6"}, + } + for _, expectedIPs := range expectedIPAllocation { + for i, ip := range ips { + gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) + } + } + + // now try one more allocation and expect it to fail + ips, err = allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).To(gomega.MatchError(ipam.ErrFull)) + gomega.Expect(ips).To(gomega.BeEmpty()) + }) + + ginkgo.It("fails correctly when trying to block a previously allocated IP", func() { + subnetName := "subnet1" + subnets := []string{ + "10.1.1.0/24", + "2000::/64", + } + + expectedIPs := []string{ + "10.1.1.1/24", + "2000::1/64", + } + + err := allocator.AddOrUpdateSubnet(subnetName, ovntest.MustParseIPNets(subnets...)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ips, err := allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for i, ip := range ips { + gomega.Expect(ip.String()).To(gomega.Equal(expectedIPs[i])) + } + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = allocator.AllocateIPs(subnetName, ovntest.MustParseIPNets(expectedIPs...)) + gomega.Expect(err).To(gomega.MatchError(ipam.ErrAllocated)) + }) + + }) + +}) + +func TestSubnetIPAllocator(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + ginkgo.RunSpecs(t, "Subnet IP allocator Operations Suite") +} diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index 1b6b770f24..dbe2e50eb9 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -387,7 +387,7 @@ func (bnc *BaseNetworkController) createNodeLogicalSwitch(nodeName string, hostS } // Add the switch to the logical switch cache - return bnc.lsManager.AddSwitch(logicalSwitch.Name, hostSubnets) + return bnc.lsManager.AddOrUpdateSwitch(logicalSwitch.Name, hostSubnets) } // UpdateNodeAnnotationWithRetry update node's annotation with the given node annotations. diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 606c7998b1..1bf4e9a85b 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -800,7 +800,7 @@ func (bnc *BaseNetworkController) assignPodAddresses(switchName string) (net.Har if !bnc.doesNetworkRequireIPAM() { klog.V(5).Infof("layer2 topology without subnet; will only generate the MAC address for the pod NIC") - mac, err := logicalswitchmanager.GenerateRandMAC() + mac, err := util.GenerateRandMAC() if err != nil { return nil, nil, err } diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index 07ab52c1dd..28354facb3 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -298,7 +298,7 @@ func (oc *BaseSecondaryLayer2NetworkController) InitializeLogicalSwitch(switchNa return nil, fmt.Errorf("failed to create logical switch %+v: %v", logicalSwitch, err) } - if err = oc.lsManager.AddSwitch(switchName, hostSubnets); err != nil { + if err = oc.lsManager.AddOrUpdateSwitch(switchName, hostSubnets); err != nil { return nil, err } diff --git a/go-controller/pkg/ovn/egressgw_test.go b/go-controller/pkg/ovn/egressgw_test.go index e5d21de4a6..b4394ff12a 100644 --- a/go-controller/pkg/ovn/egressgw_test.go +++ b/go-controller/pkg/ovn/egressgw_test.go @@ -1044,7 +1044,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn) - fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1206,7 +1206,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn) - fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1374,7 +1374,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn) - fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err = fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1543,7 +1543,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { apbExternalRouteCRList, ) t.populateLogicalSwitchCache(fakeOvn) - fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1876,7 +1876,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn) - fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2009,7 +2009,7 @@ var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { }, ) t.populateLogicalSwitchCache(fakeOvn) - fakeOvn.controller.lsManager.AddSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch("node2", []*net.IPNet{ovntest.MustParseIPNet("10.128.2.0/24")}) injectNode(fakeOvn) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index b78c224240..8197d28b8f 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -3508,7 +3508,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Pod{egressPod1}, }, ) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -3778,7 +3778,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) fakeOvn.controller.WatchPods() fakeOvn.controller.WatchEgressIPNamespaces() fakeOvn.controller.WatchEgressIPPods() @@ -4138,8 +4138,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - fakeOvn.controller.lsManager.AddSwitch(node2.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch(node2.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPNamespaces() diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go index 378b591a69..cfc1eaa911 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go @@ -1,109 +1,54 @@ package logicalswitchmanager import ( - "crypto/rand" - "errors" "fmt" "net" - "reflect" - "sync" - allocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/bitmap" ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" - "k8s.io/klog/v2" - utilnet "k8s.io/utils/net" ) -// SwitchNotFound is used to inform the logical switch was not found in the cache -var SwitchNotFound = errors.New("switch not found") - -// logicalSwitchInfo contains information corresponding to the switch. It holds the -// subnet allocations (v4 and v6) as well as the IPAM allocator instances for each -// subnet managed for this switch -type logicalSwitchInfo struct { - hostSubnets []*net.IPNet - ipams []ipam.Interface - noHostSubnet bool -} - -type ipamFactoryFunc func(*net.IPNet) (ipam.Interface, error) +var SwitchNotFound = subnet.ErrSubnetNotFound // LogicalSwitchManager provides switch info management APIs including IPAM for the host subnets type LogicalSwitchManager struct { - cache map[string]logicalSwitchInfo - // A RW mutex for LogicalSwitchManager which holds logicalSwitch information - sync.RWMutex - ipamFunc ipamFactoryFunc + allocator subnet.Allocator + reserveIPs bool } -// NewIPAMAllocator provides an ipam interface which can be used for IPAM -// allocations for a given cidr using a contiguous allocation strategy. -// It also pre-allocates certain special subnet IPs such as the .1, .2, and .3 -// addresses as reserved. -func NewIPAMAllocator(cidr *net.IPNet) (ipam.Interface, error) { - subnetRange, err := ipam.NewAllocatorCIDRRange(cidr, func(max int, rangeSpec string) (allocator.Interface, error) { - return allocator.NewRoundRobinAllocationMap(max, rangeSpec), nil - }) - if err != nil { - return nil, err - } - if err := reserveIPs(cidr, subnetRange); err != nil { - klog.Errorf("Failed reserving IPs for subnet %s, err: %v", cidr, err) - return nil, err - } - return subnetRange, nil -} - -// Helper function to reserve certain subnet IPs as special -// These are the .1, .2 and .3 addresses in particular -func reserveIPs(subnet *net.IPNet, ipam ipam.Interface) error { - gwIfAddr := util.GetNodeGatewayIfAddr(subnet) - err := ipam.Allocate(gwIfAddr.IP) - if err != nil { - klog.Errorf("Unable to allocate subnet's gateway IP: %s", gwIfAddr.IP) - return err - } - mgmtIfAddr := util.GetNodeManagementIfAddr(subnet) - err = ipam.Allocate(mgmtIfAddr.IP) - if err != nil { - klog.Errorf("Unable to allocate subnet's management IP: %s", mgmtIfAddr.IP) - return err +// Initializes a new logical switch manager for L3 networks +func NewLogicalSwitchManager() *LogicalSwitchManager { + return &LogicalSwitchManager{ + allocator: subnet.NewAllocator(), + reserveIPs: true, } - return nil } -// Initializes a new logical switch manager -func NewLogicalSwitchManager() *LogicalSwitchManager { +func NewL2SwitchManager() *LogicalSwitchManager { return &LogicalSwitchManager{ - cache: make(map[string]logicalSwitchInfo), - RWMutex: sync.RWMutex{}, - ipamFunc: NewIPAMAllocator, + allocator: subnet.NewAllocator(), } } -// AddSwitch adds/updates a switch to the logical switch manager for subnet -// and IPAM management. -func (manager *LogicalSwitchManager) AddSwitch(switchName string, hostSubnets []*net.IPNet) error { - manager.Lock() - defer manager.Unlock() - if lsi, ok := manager.cache[switchName]; ok && !reflect.DeepEqual(lsi.hostSubnets, hostSubnets) { - klog.Warningf("Logical switch %s already in cache with subnet %s; replacing with %s", switchName, - util.JoinIPNets(lsi.hostSubnets, ","), util.JoinIPNets(hostSubnets, ",")) +// AddOrUpdateSwitch adds/updates a switch to the logical switch manager for +// subnet and IPAM management. +func (manager *LogicalSwitchManager) AddOrUpdateSwitch(switchName string, hostSubnets []*net.IPNet) error { + err := manager.allocator.AddOrUpdateSubnet(switchName, hostSubnets) + if err != nil { + return err } - var ipams []ipam.Interface - for _, subnet := range hostSubnets { - ipam, err := manager.ipamFunc(subnet) - if err != nil { - klog.Errorf("IPAM for subnet %s was not initialized for switch %q", subnet, switchName) - return err + if manager.reserveIPs { + for _, hostSubnet := range hostSubnets { + err = manager.allocator.AllocateIPs(switchName, []*net.IPNet{util.GetNodeGatewayIfAddr(hostSubnet)}) + if err != nil { + return err + } + err = manager.allocator.AllocateIPs(switchName, []*net.IPNet{util.GetNodeManagementIfAddr(hostSubnet)}) + if err != nil { + return err + } } - ipams = append(ipams, ipam) - } - manager.cache[switchName] = logicalSwitchInfo{ - hostSubnets: hostSubnets, - ipams: ipams, - noHostSubnet: len(hostSubnets) == 0, } return nil @@ -115,215 +60,82 @@ func (manager *LogicalSwitchManager) AddNoHostSubnetSwitch(switchName string) er // setting the hostSubnets slice argument to nil in the cache means an object // exists for the switch but it was not assigned a hostSubnet by ovn-kubernetes // this will be true for switches created on nodes that are marked as host-subnet only. - return manager.AddSwitch(switchName, nil) + return manager.allocator.AddOrUpdateSubnet(switchName, nil) } // Remove a switch from the the logical switch manager func (manager *LogicalSwitchManager) DeleteSwitch(switchName string) { - manager.Lock() - defer manager.Unlock() - delete(manager.cache, switchName) + manager.allocator.DeleteSubnet(switchName) } // Given a switch name, checks if the switch is a noHostSubnet switch func (manager *LogicalSwitchManager) IsNonHostSubnetSwitch(switchName string) bool { - manager.RLock() - defer manager.RUnlock() - lsi, ok := manager.cache[switchName] - return ok && lsi.noHostSubnet + subnets, err := manager.allocator.GetSubnets(switchName) + return err == nil && len(subnets) == 0 } // Given a switch name, get all its host-subnets func (manager *LogicalSwitchManager) GetSwitchSubnets(switchName string) []*net.IPNet { - manager.RLock() - defer manager.RUnlock() - lsi, ok := manager.cache[switchName] - // make a deep-copy of the underlying slice and return so that there is no - // resource contention - if ok && len(lsi.hostSubnets) > 0 { - subnets := make([]*net.IPNet, len(lsi.hostSubnets)) - for i, hsn := range lsi.hostSubnets { - subnet := *hsn - subnets[i] = &subnet - } - return subnets - } - return nil + subnets, _ := manager.allocator.GetSubnets(switchName) + return subnets } // AllocateUntilFull used for unit testing only, allocates the rest of the switch subnet func (manager *LogicalSwitchManager) AllocateUntilFull(switchName string) error { - manager.RLock() - defer manager.RUnlock() - lsi, ok := manager.cache[switchName] - if !ok { - return fmt.Errorf("unable to allocate IPs for switch: %s: %w", switchName, SwitchNotFound) - } else if len(lsi.ipams) == 0 { - return fmt.Errorf("unable to allocate IPs for switch: %s because logical switch manager has no IPAM", switchName) - } - var err error - for err != ipam.ErrFull { - for _, ipam := range lsi.ipams { - _, err = ipam.AllocateNext() - } - } - return nil + return manager.allocator.AllocateUntilFull(switchName) } // AllocateIPs will block off IPs in the ipnets slice as already allocated // for a given switch func (manager *LogicalSwitchManager) AllocateIPs(switchName string, ipnets []*net.IPNet) error { - if len(ipnets) == 0 { - return fmt.Errorf("unable to allocate empty IPs") - } - manager.RLock() - defer manager.RUnlock() - lsi, ok := manager.cache[switchName] - if !ok { - return fmt.Errorf("unable to allocate IPs: %v for switch %s: %w", ipnets, switchName, SwitchNotFound) - } else if len(lsi.ipams) == 0 { - return fmt.Errorf("unable to allocate IPs: %v for switch: %s: logical switch manager has no IPAM", - ipnets, switchName) - - } - - var err error - allocated := make(map[int]*net.IPNet) - defer func() { - if err != nil { - // iterate over range of already allocated indices and release - // ips allocated before the error occurred. - for relIdx, relIPNet := range allocated { - lsi.ipams[relIdx].Release(relIPNet.IP) - if relIPNet.IP != nil { - klog.Warningf("Reserved IP: %s was released", relIPNet.IP.String()) - } - } - } - }() - - for _, ipnet := range ipnets { - for idx, ipam := range lsi.ipams { - cidr := ipam.CIDR() - if cidr.Contains(ipnet.IP) { - if _, ok = allocated[idx]; ok { - err = fmt.Errorf("error attempting to reserve multiple IPs in the same IPAM instance") - return err - } - if err = ipam.Allocate(ipnet.IP); err != nil { - return err - } - allocated[idx] = ipnet - break - } - } - } - return nil + return manager.allocator.AllocateIPs(switchName, ipnets) } // AllocateNextIPs allocates IP addresses from each of the host subnets // for a given switch func (manager *LogicalSwitchManager) AllocateNextIPs(switchName string) ([]*net.IPNet, error) { - manager.RLock() - defer manager.RUnlock() - var ipnets []*net.IPNet - var ip net.IP - var err error - lsi, ok := manager.cache[switchName] - - if !ok { - return nil, fmt.Errorf("failed to allocate IPs for switch %s: %w", switchName, SwitchNotFound) - } - - if len(lsi.ipams) == 0 { - return nil, fmt.Errorf("failed to allocate IPs for switch %s because there is no IPAM instance", switchName) - } - - if len(lsi.ipams) != len(lsi.hostSubnets) { - return nil, fmt.Errorf("failed to allocate IPs for switch %s because host subnet instances: %d"+ - " don't match ipam instances: %d", switchName, len(lsi.hostSubnets), len(lsi.ipams)) - } - - defer func() { - if err != nil { - // iterate over range of already allocated indices and release - // ips allocated before the error occurred. - for relIdx, relIPNet := range ipnets { - lsi.ipams[relIdx].Release(relIPNet.IP) - if relIPNet.IP != nil { - klog.Warningf("Reserved IP: %s was released", relIPNet.IP.String()) - } - } - } - }() - - for idx, ipam := range lsi.ipams { - ip, err = ipam.AllocateNext() - if err != nil { - return nil, err - } - ipnet := &net.IPNet{ - IP: ip, - Mask: lsi.hostSubnets[idx].Mask, - } - ipnets = append(ipnets, ipnet) - } - return ipnets, nil + return manager.allocator.AllocateNextIPs(switchName) } func (manager *LogicalSwitchManager) AllocateHybridOverlay(switchName string, hybridOverlayAnnotation []string) ([]*net.IPNet, error) { + var err error + var allocatedAddresses []*net.IPNet + if len(hybridOverlayAnnotation) > 0 { - var allocateAddresses []*net.IPNet for _, ip := range hybridOverlayAnnotation { - allocateAddresses = append(allocateAddresses, &net.IPNet{IP: net.ParseIP(ip).To4(), Mask: net.CIDRMask(32, 32)}) + allocatedAddresses = append(allocatedAddresses, &net.IPNet{IP: net.ParseIP(ip).To4(), Mask: net.CIDRMask(32, 32)}) } // attempt to allocate the IP address that is annotated on the node. The only way there would be a collision is if the annotations of podIP or hybridOverlayDRIP // where manually edited and we do not support that - err := manager.AllocateIPs(switchName, allocateAddresses) + err = manager.AllocateIPs(switchName, allocatedAddresses) if err != nil && err != ipam.ErrAllocated { return nil, err } - return allocateAddresses, nil + return allocatedAddresses, nil } - // if we are not provided with any addresses - manager.RLock() - defer manager.RUnlock() - lsi, ok := manager.cache[switchName] - if !ok { - return nil, fmt.Errorf("unable to allocate hybrid overlay for switch %s: %w", switchName, SwitchNotFound) + // if we are not provided with any addresses, try to allocate the well known address + hostSubnets := manager.GetSwitchSubnets(switchName) + for _, hostSubnet := range hostSubnets { + allocatedAddresses = append(allocatedAddresses, util.GetNodeHybridOverlayIfAddr(hostSubnet)) } - // determine if ipams are ipv4 - var ipv4IPAMS []ipam.Interface - for _, ipam := range lsi.ipams { - if utilnet.IsIPv4(ipam.CIDR().IP) { - ipv4IPAMS = append(ipv4IPAMS, ipam) - } + err = manager.AllocateIPs(switchName, allocatedAddresses) + if err != nil && err != ipam.ErrAllocated { + return nil, fmt.Errorf("cannot allocate hybrid overlay interface addresses %s for switch %s: %w", + util.StringSlice(allocatedAddresses), + switchName, + err) } - var allocatedAddresses []*net.IPNet - for _, ipv4IPAM := range ipv4IPAMS { - hostSubnet := ipv4IPAM.CIDR() - potentialHybridIFAddress := util.GetNodeHybridOverlayIfAddr(&hostSubnet) - err := ipv4IPAM.Allocate(potentialHybridIFAddress.IP) - if err == ipam.ErrAllocated { - // allocate NextIP - allocatedipv4, err := ipv4IPAM.AllocateNext() - if err != nil { - _ = manager.ReleaseIPs(switchName, allocatedAddresses) - return nil, fmt.Errorf("cannot allocate hybrid overlay interface address for switch/subnet %s/%s (%+v)", switchName, hostSubnet, err) - } - if err == nil { - allocatedAddresses = append(allocatedAddresses, &net.IPNet{IP: allocatedipv4.To4(), Mask: net.CIDRMask(32, 32)}) + // otherwise try to allocate any IP + if err == ipam.ErrAllocated { + allocatedAddresses, err = manager.AllocateNextIPs(switchName) + } - } - } else if err != nil { - _ = manager.ReleaseIPs(switchName, allocatedAddresses) - return nil, fmt.Errorf("cannot allocate hybrid overlay interface address for switch/subnet %s/%s (%+v)", switchName, hostSubnet, err) - } else { - allocatedAddresses = append(allocatedAddresses, &net.IPNet{IP: potentialHybridIFAddress.IP.To4(), Mask: net.CIDRMask(32, 32)}) - } + if err != nil { + return nil, fmt.Errorf("cannot allocate new hybrid overlay interface addresses for switch %s: %w", switchName, err) } + return allocatedAddresses, nil } @@ -331,95 +143,12 @@ func (manager *LogicalSwitchManager) AllocateHybridOverlay(switchName string, hy // by releasing them from the IPAM pool of allocated IPs. // If there aren't IPs to release the method does not return an error. func (manager *LogicalSwitchManager) ReleaseIPs(switchName string, ipnets []*net.IPNet) error { - manager.RLock() - defer manager.RUnlock() - if ipnets == nil || switchName == "" { - klog.V(5).Infof("Switch name is empty or ip slice to release is nil") - return nil - } - lsi, ok := manager.cache[switchName] - if !ok { - return fmt.Errorf("unable to release ips for switch %s: %w", switchName, SwitchNotFound) - } - - for _, ipnet := range ipnets { - for _, ipam := range lsi.ipams { - cidr := ipam.CIDR() - if cidr.Contains(ipnet.IP) { - ipam.Release(ipnet.IP) - break - } - } - } - return nil + return manager.allocator.ReleaseIPs(switchName, ipnets) } // ConditionalIPRelease determines if any IP is available to be released from an IPAM conditionally if func is true. // It guarantees state of the allocator will not change while executing the predicate function // TODO(trozet): add unit testing for this function func (manager *LogicalSwitchManager) ConditionalIPRelease(switchName string, ipnets []*net.IPNet, predicate func() (bool, error)) (bool, error) { - manager.RLock() - defer manager.RUnlock() - if ipnets == nil || switchName == "" { - klog.V(5).Infof("Switch name is empty or ip slice to release is nil") - return false, nil - } - lsi, ok := manager.cache[switchName] - if !ok { - return false, nil - } - if len(lsi.ipams) == 0 { - return false, nil - } - - // check if ipam has one of the ip addresses, and then execute the predicate function to determine - // if this IP should be released or not - for _, ipnet := range ipnets { - for _, ipam := range lsi.ipams { - cidr := ipam.CIDR() - if cidr.Contains(ipnet.IP) { - if ipam.Has(ipnet.IP) { - return predicate() - } - } - } - } - - return false, nil -} - -// NewL2SwitchManager initializes a new layer2 logical switch manager, -// only manage subnet for the one specified switch -func NewL2SwitchManager() *LogicalSwitchManager { - return &LogicalSwitchManager{ - cache: make(map[string]logicalSwitchInfo), - RWMutex: sync.RWMutex{}, - ipamFunc: NewL2IPAMAllocator, - } -} - -// NewLayer2IPAMAllocator provides an ipam interface which can be used for layer2 switch IPAM -// allocations for the specified cidr using a contiguous allocation strategy. -func NewL2IPAMAllocator(cidr *net.IPNet) (ipam.Interface, error) { - subnetRange, err := ipam.NewAllocatorCIDRRange(cidr, func(max int, rangeSpec string) (allocator.Interface, error) { - return allocator.NewRoundRobinAllocationMap(max, rangeSpec), nil - }) - if err != nil { - return nil, err - } - return subnetRange, nil -} - -// GenerateRandMAC generates a random unicast and locally administered MAC address. -// LOOTED FROM https://github.com/cilium/cilium/blob/v1.12.6/pkg/mac/mac.go#L106 -func GenerateRandMAC() (net.HardwareAddr, error) { - buf := make([]byte, 6) - if _, err := rand.Read(buf); err != nil { - return nil, fmt.Errorf("unable to retrieve 6 rnd bytes: %s", err) - } - - // Set locally administered addresses bit and reset multicast bit - buf[0] = (buf[0] | 0x02) & 0xfe - - return buf, nil + return manager.allocator.ConditionalIPRelease(switchName, ipnets, predicate) } diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager_test.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager_test.go index f72870b437..d8998fee7a 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager_test.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager_test.go @@ -4,9 +4,9 @@ import ( "net" "github.com/urfave/cli/v2" - "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -16,19 +16,7 @@ import ( // test function that returns if an IP address is allocated func (manager *LogicalSwitchManager) isAllocatedIP(switchName, ip string) bool { - manager.RLock() - defer manager.RUnlock() - - lsi, ok := manager.cache[switchName] - if !ok { - return false - } - for _, ipam := range lsi.ipams { - if ipam.Has(net.ParseIP(ip)) { - return true - } - } - return false + return manager.AllocateIPs(switchName, []*net.IPNet{ovntest.MustParseIPNet(ip)}) == ipallocator.ErrAllocated } // AllocateNextIPv4s will allocate the next IPv4 addresses from each of the host subnets @@ -77,35 +65,6 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { }) ginkgo.Context("when adding node", func() { - ginkgo.It("creates IPAM for each subnet and reserves IPs correctly", func() { - app.Action = func(ctx *cli.Context) error { - _, err := config.InitConfig(ctx, fexec, nil) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - testNode := testNodeSubnetData{ - switchName: "testNode1", - subnets: []string{ - "10.1.1.0/24", - "2000::/64", - }, - } - - expectedIPs := []string{"10.1.1.3", "2000::3"} - - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - ips, err := lsManager.AllocateNextIPs(testNode.switchName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - for i, ip := range ips { - gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) - } - - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) ginkgo.It("creates IPAM for each subnet and reserves IPs correctly when HybridOverlay is enabled and address is passed", func() { app.Action = func(ctx *cli.Context) error { _, err := config.InitConfig(ctx, fexec, nil) @@ -118,12 +77,12 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { "2000::/64", }, } - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddOrUpdateSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) allocatedHybridOverlayDRIP, err := lsManager.AllocateHybridOverlay(testNode.switchName, []string{"10.1.1.53"}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(net.ParseIP("10.1.1.53").To4()).To(gomega.Equal(allocatedHybridOverlayDRIP[0].IP)) - gomega.Expect(true).To(gomega.Equal(lsManager.isAllocatedIP(testNode.switchName, "10.1.1.53"))) + gomega.Expect(true).To(gomega.Equal(lsManager.isAllocatedIP(testNode.switchName, "10.1.1.53/32"))) return nil } @@ -143,14 +102,14 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { }, } - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddOrUpdateSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) allocatedHybridOverlayDRIP, err := lsManager.AllocateHybridOverlay(testNode.switchName, []string{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(net.ParseIP("10.1.1.3").To4()).To(gomega.Equal(allocatedHybridOverlayDRIP[0].IP)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(true).To(gomega.Equal(lsManager.isAllocatedIP(testNode.switchName, "10.1.1.3"))) + gomega.Expect(true).To(gomega.Equal(lsManager.isAllocatedIP(testNode.switchName, "10.1.1.3/32"))) return nil } @@ -170,7 +129,7 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { }, } - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) + err = lsManager.AddOrUpdateSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = lsManager.AllocateIPs(testNode.switchName, []*net.IPNet{ {IP: net.ParseIP("10.1.1.3").To4(), Mask: net.CIDRMask(32, 32)}, @@ -179,221 +138,15 @@ var _ = ginkgo.Describe("OVN Logical Switch Manager operations", func() { allocatedHybridOverlayDRIP, err := lsManager.AllocateHybridOverlay(testNode.switchName, []string{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) // 10.1.1.4 is the next ip address - gomega.Expect(net.ParseIP("10.1.1.4").To4()).To(gomega.Equal(allocatedHybridOverlayDRIP[0].IP)) - - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(true).To(gomega.Equal(lsManager.isAllocatedIP(testNode.switchName, "10.1.1.3"))) - - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("manages no host subnet nodes correctly", func() { - app.Action = func(ctx *cli.Context) error { - _, err := config.InitConfig(ctx, fexec, nil) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - testNode := testNodeSubnetData{ - switchName: "testNode1", - subnets: []string{}, - } - - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - noHostSubnet := lsManager.IsNonHostSubnetSwitch(testNode.switchName) - gomega.Expect(noHostSubnet).To(gomega.BeTrue()) - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("handles updates to the host subnets correctly", func() { - app.Action = func(ctx *cli.Context) error { - _, err := config.InitConfig(ctx, fexec, nil) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - testNode := testNodeSubnetData{ - switchName: "testNode1", - subnets: []string{ - "10.1.1.0/24", - "2000::/64", - }, - } - - expectedIPs := []string{"10.1.1.3", "2000::3"} - - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - ips, err := lsManager.AllocateNextIPs(testNode.switchName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - for i, ip := range ips { - gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) - } - testNode.subnets = []string{"10.1.2.0/24"} - expectedIPs = []string{"10.1.2.3"} - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - ips, err = lsManager.AllocateNextIPs(testNode.switchName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - for i, ip := range ips { - gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) - } - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - }) + gomega.Expect("10.1.1.4").To(gomega.Equal(allocatedHybridOverlayDRIP[0].IP.String())) - ginkgo.Context("when allocating IP addresses", func() { - ginkgo.It("IPAM for each subnet allocates IPs contiguously", func() { - app.Action = func(ctx *cli.Context) error { - _, err := config.InitConfig(ctx, fexec, nil) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - testNode := testNodeSubnetData{ - switchName: "testNode1", - subnets: []string{ - "10.1.1.0/24", - "2000::/64", - }, - } - - expectedIPAllocations := [][]string{ - {"10.1.1.3", "2000::3"}, - {"10.1.1.4", "2000::4"}, - } + gomega.Expect(true).To(gomega.Equal(lsManager.isAllocatedIP(testNode.switchName, "10.1.1.3/32"))) - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - for _, expectedIPs := range expectedIPAllocations { - ips, err := lsManager.AllocateNextIPs(testNode.switchName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - for i, ip := range ips { - gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) - } - } return nil } err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - - ginkgo.It("IPAM allocates, releases, and reallocates IPs correctly", func() { - app.Action = func(ctx *cli.Context) error { - _, err := config.InitConfig(ctx, fexec, nil) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - testNode := testNodeSubnetData{ - switchName: "testNode1", - subnets: []string{ - "10.1.1.0/24", - }, - } - - expectedIPAllocations := [][]string{ - {"10.1.1.3"}, - {"10.1.1.4"}, - } - - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - for _, expectedIPs := range expectedIPAllocations { - ips, err := lsManager.AllocateNextIPs(testNode.switchName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - for i, ip := range ips { - gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) - } - err = lsManager.ReleaseIPs(testNode.switchName, ips) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = lsManager.AllocateIPs(testNode.switchName, ips) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - } - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("releases IPs for other host subnet nodes when any host subnets allocation fails", func() { - app.Action = func(ctx *cli.Context) error { - _, err := config.InitConfig(ctx, fexec, nil) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - testNode := testNodeSubnetData{ - switchName: "testNode1", - subnets: []string{ - "10.1.1.0/24", - "10.1.2.0/29", - }, - } - config.HybridOverlay.Enabled = true - expectedIPAllocations := [][]string{ - {"10.1.1.3", "10.1.2.3"}, - {"10.1.1.4", "10.1.2.4"}, - {"10.1.1.5", "10.1.2.5"}, - } - - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // exhaust valid ips in second subnet - for _, expectedIPs := range expectedIPAllocations { - ips, err := lsManager.AllocateNextIPs(testNode.switchName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - for i, ip := range ips { - gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) - } - } - ips, err := lsManager.AllocateNextIPv4s(testNode.switchName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedIPAllocation := [][]string{ - {"10.1.1.6", "10.1.2.6"}, - } - for _, expectedIPs := range expectedIPAllocation { - for i, ip := range ips { - gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) - } - } - - // now try one more allocation and expect it to fail - ips, err = lsManager.AllocateNextIPs(testNode.switchName) - gomega.Expect(err).To(gomega.HaveOccurred()) - gomega.Expect(len(ips)).To(gomega.Equal(0)) - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("fails correctly when trying to block a previously allocated IP", func() { - app.Action = func(ctx *cli.Context) error { - _, err := config.InitConfig(ctx, fexec, nil) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - testNode := testNodeSubnetData{ - switchName: "testNode1", - subnets: []string{ - "10.1.1.0/24", - "2000::/64", - }, - } - - allocatedIPs := []string{ - "10.1.1.2/24", - "2000::2/64", - } - allocatedIPNets := ovntest.MustParseIPNets(allocatedIPs...) - err = lsManager.AddSwitch(testNode.switchName, ovntest.MustParseIPNets(testNode.subnets...)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = lsManager.AllocateIPs(testNode.switchName, allocatedIPNets) - klog.Errorf("Error: %v", err) - gomega.Expect(err).To(gomega.HaveOccurred()) - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - }) diff --git a/go-controller/pkg/ovn/multipolicy_test.go b/go-controller/pkg/ovn/multipolicy_test.go index 3067139407..7f053971db 100644 --- a/go-controller/pkg/ovn/multipolicy_test.go +++ b/go-controller/pkg/ovn/multipolicy_test.go @@ -141,13 +141,13 @@ func (p testPod) populateSecondaryNetworkLogicalSwitchCache(fakeOvn *FakeOVN, oc switch ocInfo.bnc.TopologyType() { case ovntypes.Layer3Topology: podInfo := p.secondaryPodInfos[ocInfo.bnc.GetNetworkName()] - err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(p.nodeName), []*net.IPNet{ovntest.MustParseIPNet(podInfo.nodeSubnet)}) + err = ocInfo.bnc.lsManager.AddOrUpdateSwitch(ocInfo.bnc.GetNetworkScopedName(p.nodeName), []*net.IPNet{ovntest.MustParseIPNet(podInfo.nodeSubnet)}) case ovntypes.Layer2Topology: subnet := ocInfo.bnc.Subnets()[0] - err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch), []*net.IPNet{subnet.CIDR}) + err = ocInfo.bnc.lsManager.AddOrUpdateSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLayer2Switch), []*net.IPNet{subnet.CIDR}) case ovntypes.LocalnetTopology: subnet := ocInfo.bnc.Subnets()[0] - err = ocInfo.bnc.lsManager.AddSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLocalnetSwitch), []*net.IPNet{subnet.CIDR}) + err = ocInfo.bnc.lsManager.AddOrUpdateSwitch(ocInfo.bnc.GetNetworkScopedName(ovntypes.OVNLocalnetSwitch), []*net.IPNet{subnet.CIDR}) } gomega.Expect(err).NotTo(gomega.HaveOccurred()) } diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index 9040d01d8d..9f2c5ba638 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -204,7 +204,7 @@ func newTPod(nodeName, nodeSubnet, nodeMgtIP, nodeGWIP, podName, podIP, podMAC, func (p testPod) populateLogicalSwitchCache(fakeOvn *FakeOVN) { gomega.Expect(p.nodeName).NotTo(gomega.Equal("")) - err := fakeOvn.controller.lsManager.AddSwitch(p.nodeName, []*net.IPNet{ovntest.MustParseIPNet(p.nodeSubnet)}) + err := fakeOvn.controller.lsManager.AddOrUpdateSwitch(p.nodeName, []*net.IPNet{ovntest.MustParseIPNet(p.nodeSubnet)}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } @@ -1731,7 +1731,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { }, ) - fakeOvn.controller.lsManager.AddSwitch(testNode.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch(testNode.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) err := fakeOvn.controller.WatchNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchPods() @@ -1962,7 +1962,7 @@ var _ = ginkgo.Describe("OVN Pod Operations", func() { Items: []v1.Pod{}, }, ) - fakeOvn.controller.lsManager.AddSwitch(testNodeWithLS.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddOrUpdateSwitch(testNodeWithLS.Name, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) // expect stale logical switch port removed if reconciliation is successful diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 9ae3623a52..841ef143c5 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -1,6 +1,7 @@ package util import ( + "crypto/rand" "crypto/sha256" "errors" "fmt" @@ -302,3 +303,17 @@ func ContainsCIDR(ipnet1, ipnet2 *net.IPNet) bool { mask2, _ := ipnet2.Mask.Size() return mask1 <= mask2 && ipnet1.Contains(ipnet2.IP) } + +// GenerateRandMAC generates a random unicast and locally administered MAC address. +// LOOTED FROM https://github.com/cilium/cilium/blob/v1.12.6/pkg/mac/mac.go#L106 +func GenerateRandMAC() (net.HardwareAddr, error) { + buf := make([]byte, 6) + if _, err := rand.Read(buf); err != nil { + return nil, fmt.Errorf("unable to retrieve 6 rnd bytes: %s", err) + } + + // Set locally administered addresses bit and reset multicast bit + buf[0] = (buf[0] | 0x02) & 0xfe + + return buf, nil +} diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index a91be09533..99ca0b8cde 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -431,3 +431,13 @@ func GetNBZone(nbClient libovsdbclient.Client) (string, error) { return nbGlobal.Name, nil } + +// StringSlice converts to a slice of the string representation of the input +// items +func StringSlice[T fmt.Stringer](items []T) []string { + s := make([]string, len(items)) + for i := range items { + s[i] = items[i].String() + } + return s +} From 18f9dd6e32be39d6911e8146647291cab3af3930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Thu, 25 May 2023 11:51:04 +0000 Subject: [PATCH 10/31] Pre-allocate subnets in subnet ip allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the ability to pre-allocate subnets to the subnet ip allocator as it is a common need: we use this for a variety of pruposes, including reserving well known addresses in the default network or L3 secondary networks, as well as reserving user requested address for L2 secondary networks. Note that this is just a pre-allocation. The subnet ip allocator does not distinguish between pre-allocated IPs vs normally allocated IPs and will return the same ErrAllocated error on re-allocation. In the process, refactor GetIPFullMask to be based on net.IPMask and avoided parsing in a few places. Signed-off-by: Jaime Caamaño Ruiz --- .../pkg/allocator/ip/subnet/allocator.go | 36 +++++++++++++++++-- .../pkg/allocator/ip/subnet/allocator_test.go | 22 ++++++++++++ ...ase_secondary_layer2_network_controller.go | 16 +-------- .../ovn/controller/apbroute/network_client.go | 26 ++++++-------- go-controller/pkg/ovn/egressgw.go | 25 +++++-------- go-controller/pkg/ovn/egressip.go | 8 ++--- go-controller/pkg/ovn/gateway_init.go | 2 +- .../logical_switch_manager.go | 20 +++++------ .../ovn/zone_interconnect/zone_ic_handler.go | 2 +- go-controller/pkg/util/net.go | 12 +++++++ go-controller/pkg/util/pod_annotation.go | 2 +- .../pkg/util/pod_annotation_unit_test.go | 2 +- go-controller/pkg/util/util.go | 20 ++++------- 13 files changed, 110 insertions(+), 83 deletions(-) diff --git a/go-controller/pkg/allocator/ip/subnet/allocator.go b/go-controller/pkg/allocator/ip/subnet/allocator.go index 469144703f..5d2d98dfab 100644 --- a/go-controller/pkg/allocator/ip/subnet/allocator.go +++ b/go-controller/pkg/allocator/ip/subnet/allocator.go @@ -7,6 +7,7 @@ import ( "reflect" "sync" + iputils "github.com/containernetworking/plugins/pkg/ip" bitmapallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/bitmap" ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -16,7 +17,7 @@ import ( // Allocator manages the allocation of IP within specific set of subnets // identified by a name. Allocator should be threadsafe. type Allocator interface { - AddOrUpdateSubnet(name string, subnets []*net.IPNet) error + AddOrUpdateSubnet(name string, subnets []*net.IPNet, excludeSubnets ...*net.IPNet) error DeleteSubnet(name string) GetSubnets(name string) ([]*net.IPNet, error) AllocateUntilFull(name string) error @@ -68,7 +69,7 @@ func NewAllocator() *allocator { } // AddOrUpdateSubnet set to the allocator for IPAM management, or update it. -func (allocator *allocator) AddOrUpdateSubnet(name string, subnets []*net.IPNet) error { +func (allocator *allocator) AddOrUpdateSubnet(name string, subnets []*net.IPNet, excludeSubnets ...*net.IPNet) error { allocator.Lock() defer allocator.Unlock() if subnetInfo, ok := allocator.cache[name]; ok && !reflect.DeepEqual(subnetInfo.subnets, subnets) { @@ -87,6 +88,22 @@ func (allocator *allocator) AddOrUpdateSubnet(name string, subnets []*net.IPNet) ipams: ipams, } + for _, excludeSubnet := range excludeSubnets { + var excluded bool + for i, subnet := range subnets { + if util.ContainsCIDR(subnet, excludeSubnet) { + err := reserveSubnets(excludeSubnet, ipams[i]) + if err != nil { + return fmt.Errorf("failed to exclude subnet %s for %s: %w", excludeSubnet, name, err) + } + } + excluded = true + } + if !excluded { + return fmt.Errorf("failed to exclude subnet %s for %s: not contained in any of the subnets", excludeSubnet, name) + } + } + return nil } @@ -183,6 +200,21 @@ func (allocator *allocator) AllocateIPs(name string, ips []*net.IPNet) error { return nil } +// reserveSubnets reserves subnet IPs +func reserveSubnets(subnet *net.IPNet, ipam ipallocator.Interface) error { + // FIXME: allocate IP ranges when https://github.com/ovn-org/ovn-kubernetes/issues/3369 is fixed + for ip := subnet.IP; subnet.Contains(ip); ip = iputils.NextIP(ip) { + if ipam.Reserved(ip) { + continue + } + err := ipam.Allocate(ip) + if err != nil { + return fmt.Errorf("failed to reserve IP %s: %w", ip, err) + } + } + return nil +} + // AllocateNextIPs allocates IP addresses from the given subnet set func (allocator *allocator) AllocateNextIPs(name string) ([]*net.IPNet, error) { allocator.RLock() diff --git a/go-controller/pkg/allocator/ip/subnet/allocator_test.go b/go-controller/pkg/allocator/ip/subnet/allocator_test.go index c6b5ae87b9..4f035a2534 100644 --- a/go-controller/pkg/allocator/ip/subnet/allocator_test.go +++ b/go-controller/pkg/allocator/ip/subnet/allocator_test.go @@ -67,6 +67,28 @@ var _ = ginkgo.Describe("Subnet IP allocator operations", func() { gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) } }) + + ginkgo.It("excludes subnets correctly", func() { + subnetName := "subnet1" + subnets := []string{ + "10.1.1.0/24", + } + excludes := []string{ + "10.1.1.0/29", + } + + expectedIPs := []string{"10.1.1.8"} + + err := allocator.AddOrUpdateSubnet(subnetName, ovntest.MustParseIPNets(subnets...), ovntest.MustParseIPNets(excludes...)...) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ips, err := allocator.AllocateNextIPs(subnetName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for i, ip := range ips { + gomega.Expect(ip.IP.String()).To(gomega.Equal(expectedIPs[i])) + } + }) + }) ginkgo.Context("when allocating IP addresses", func() { diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index 28354facb3..cc9949e1ed 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -7,7 +7,6 @@ import ( "strconv" "time" - iputils "github.com/containernetworking/plugins/pkg/ip" mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" @@ -298,22 +297,9 @@ func (oc *BaseSecondaryLayer2NetworkController) InitializeLogicalSwitch(switchNa return nil, fmt.Errorf("failed to create logical switch %+v: %v", logicalSwitch, err) } - if err = oc.lsManager.AddOrUpdateSwitch(switchName, hostSubnets); err != nil { + if err = oc.lsManager.AddOrUpdateSwitch(switchName, hostSubnets, excludeSubnets...); err != nil { return nil, err } - // FIXME: allocate IP ranges when https://github.com/ovn-org/ovn-kubernetes/issues/3369 is fixed - for _, excludeSubnet := range excludeSubnets { - for excludeIP := excludeSubnet.IP; excludeSubnet.Contains(excludeIP); excludeIP = iputils.NextIP(excludeIP) { - var ipMask net.IPMask - if excludeIP.To4() != nil { - ipMask = net.CIDRMask(32, 32) - } else { - ipMask = net.CIDRMask(128, 128) - } - _ = oc.lsManager.AllocateIPs(switchName, []*net.IPNet{{IP: excludeIP, Mask: ipMask}}) - } - } - return &logicalSwitch, nil } diff --git a/go-controller/pkg/ovn/controller/apbroute/network_client.go b/go-controller/pkg/ovn/controller/apbroute/network_client.go index c47ca2bbac..47841e135f 100644 --- a/go-controller/pkg/ovn/controller/apbroute/network_client.go +++ b/go-controller/pkg/ovn/controller/apbroute/network_client.go @@ -194,12 +194,12 @@ func (nb *northBoundClient) addGatewayIPs(pod *v1.Pod, egress gatewayInfoList) e } podIPs := make([]*net.IPNet, 0) for _, podIP := range pod.Status.PodIPs { - podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() - cidr := podIPStr + util.GetIPFullMask(podIPStr) - _, ipNet, err := net.ParseCIDR(cidr) - if err != nil { - return fmt.Errorf("failed to parse CIDR: %s, error: %v", cidr, err) + ip := utilnet.ParseIPSloppy(podIP.IP) + ipNet := &net.IPNet{ + IP: ip, + Mask: util.GetIPFullMask(ip), } + ipNet = util.IPsToNetworkIPs(ipNet)[0] podIPs = append(podIPs, ipNet) } if len(podIPs) == 0 { @@ -289,7 +289,7 @@ func (nb *northBoundClient) addGWRoutesForPod(gateways []*gatewayInfo, podIfAddr routesAdded++ continue } - mask := util.GetIPFullMask(podIP) + mask := util.GetIPFullMaskString(podIP) if err := nb.createOrUpdateBFDStaticRoute(gateway.BFDEnabled, gw, podIP, gr, port, mask); err != nil { return err } @@ -445,7 +445,7 @@ func (nb *northBoundClient) updateExternalGWInfoCacheForPodIPWithGatewayIP(podIP if foundGR, ok := routeInfo.PodExternalRoutes[podIP][gwIP]; ok && foundGR == gr { return nil } - mask := util.GetIPFullMask(podIP) + mask := util.GetIPFullMaskString(podIP) portPrefix, err := nb.extSwitchPrefix(nodeName) if err != nil { @@ -519,7 +519,7 @@ func (nb *northBoundClient) deletePodGWRoute(routeInfo *ExternalRouteInfo, podIP return nil } - mask := util.GetIPFullMask(podIP) + mask := util.GetIPFullMaskString(podIP) if err := nb.deleteLogicalRouterStaticRoute(podIP, mask, gw, gr); err != nil { return fmt.Errorf("unable to delete pod %s ECMP route to GR %s, GW: %s: %w", routeInfo.PodName, gr, gw, err) @@ -699,18 +699,12 @@ func buildPodSNAT(extIPs, podIPNets []*net.IPNet) ([]*nbdb.NAT, error) { var nat *nbdb.NAT for _, podIPNet := range podIPNets { - podIP := podIPNet.IP.String() - mask := util.GetIPFullMask(podIP) - _, fullMaskPodNet, err := net.ParseCIDR(podIP + mask) - if err != nil { - return nil, fmt.Errorf("invalid IP: %s and mask: %s combination, error: %v", podIP, mask, err) - } + fullMaskPodNet := util.IPsToNetworkIPs(podIPNet)[0] if len(extIPs) == 0 { nat = libovsdbops.BuildSNAT(nil, fullMaskPodNet, "", nil) } else { for _, gwIPNet := range extIPs { - gwIP := gwIPNet.IP.String() - if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIP) { + if utilnet.IsIPv6CIDR(gwIPNet) != utilnet.IsIPv6CIDR(podIPNet) { continue } nat = libovsdbops.BuildSNAT(&gwIPNet.IP, fullMaskPodNet, "", nil) diff --git a/go-controller/pkg/ovn/egressgw.go b/go-controller/pkg/ovn/egressgw.go index 6d1da10b6a..30dba82cd7 100644 --- a/go-controller/pkg/ovn/egressgw.go +++ b/go-controller/pkg/ovn/egressgw.go @@ -224,13 +224,9 @@ func (oc *DefaultNetworkController) addGWRoutesForNamespace(namespace string, eg } podIPs := make([]*net.IPNet, 0) for _, podIP := range pod.Status.PodIPs { - podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() - cidr := podIPStr + util.GetIPFullMask(podIPStr) - _, ipNet, err := net.ParseCIDR(cidr) - if err != nil { - return fmt.Errorf("failed to parse CIDR: %s, error: %v", cidr, err) - } - podIPs = append(podIPs, ipNet) + podIP := &net.IPNet{IP: utilnet.ParseIPSloppy(podIP.IP)} + podIP.Mask = util.GetIPFullMask(podIP.IP) + podIPs = append(podIPs, podIP) } if len(podIPs) == 0 { klog.Warningf("Will not add gateway routes pod %s/%s. IPs not found!", pod.Namespace, pod.Name) @@ -330,7 +326,7 @@ func (oc *DefaultNetworkController) deletePodGWRoute(routeInfo *apbroutecontroll return nil } } - mask := util.GetIPFullMask(podIP) + mask := util.GetIPFullMaskString(podIP) if err := oc.deleteLogicalRouterStaticRoute(podIP, mask, gw, gr); err != nil { return fmt.Errorf("unable to delete pod %s ECMP route to GR %s, GW: %s: %w", routeInfo.PodName, gr, gw, err) @@ -548,7 +544,7 @@ func (oc *DefaultNetworkController) addGWRoutesForPod(gateways []*gatewayInfo, p routesAdded++ continue } - mask := util.GetIPFullMask(podIP) + mask := util.GetIPFullMaskString(podIP) if err := oc.createBFDStaticRoute(gateway.bfdEnabled, gw, podIP, gr, port, mask); err != nil { return err @@ -607,18 +603,15 @@ func buildPodSNAT(extIPs, podIPNets []*net.IPNet) ([]*nbdb.NAT, error) { var nat *nbdb.NAT for _, podIPNet := range podIPNets { - podIP := podIPNet.IP.String() - mask := util.GetIPFullMask(podIP) - _, fullMaskPodNet, err := net.ParseCIDR(podIP + mask) - if err != nil { - return nil, fmt.Errorf("invalid IP: %s and mask: %s combination, error: %v", podIP, mask, err) + fullMaskPodNet := &net.IPNet{ + IP: podIPNet.IP, + Mask: util.GetIPFullMask(podIPNet.IP), } if len(extIPs) == 0 { nat = libovsdbops.BuildSNAT(nil, fullMaskPodNet, "", nil) } else { for _, gwIPNet := range extIPs { - gwIP := gwIPNet.IP.String() - if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIP) { + if utilnet.IsIPv6CIDR(gwIPNet) != utilnet.IsIPv6CIDR(podIPNet) { continue } nat = libovsdbops.BuildSNAT(&gwIPNet.IP, fullMaskPodNet, "", nil) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 26641b31c7..2fc207cde3 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -2074,11 +2074,9 @@ func DeleteLegacyDefaultNoRerouteNodePolicies(nbClient libovsdbclient.Client, no } func buildSNATFromEgressIPStatus(podIP net.IP, status egressipv1.EgressIPStatusItem, egressIPName string) (*nbdb.NAT, error) { - podIPStr := podIP.String() - mask := util.GetIPFullMask(podIPStr) - _, logicalIP, err := net.ParseCIDR(podIPStr + mask) - if err != nil { - return nil, fmt.Errorf("failed to parse podIP: %s, error: %v", podIP.String(), err) + logicalIP := &net.IPNet{ + IP: podIP, + Mask: util.GetIPFullMask(podIP), } externalIP := net.ParseIP(status.EgressIP) logicalPort := types.K8sPrefix + status.Node diff --git a/go-controller/pkg/ovn/gateway_init.go b/go-controller/pkg/ovn/gateway_init.go index 9b163d6c90..8e81cb2d7d 100644 --- a/go-controller/pkg/ovn/gateway_init.go +++ b/go-controller/pkg/ovn/gateway_init.go @@ -163,7 +163,7 @@ func (oc *DefaultNetworkController) gatewayInit(nodeName string, clusterIPSubnet oldExternalIPs := strings.Split(physicalIPs, ",") oldExtIPs = make([]net.IP, len(oldExternalIPs)) for i, oldExternalIP := range oldExternalIPs { - cidr := oldExternalIP + util.GetIPFullMask(oldExternalIP) + cidr := oldExternalIP + util.GetIPFullMaskString(oldExternalIP) ip, _, err := net.ParseCIDR(cidr) if err != nil { return fmt.Errorf("invalid cidr:%s error: %v", cidr, err) diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go index cfc1eaa911..0efe677d90 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go @@ -31,27 +31,23 @@ func NewL2SwitchManager() *LogicalSwitchManager { } } -// AddOrUpdateSwitch adds/updates a switch to the logical switch manager for -// subnet and IPAM management. -func (manager *LogicalSwitchManager) AddOrUpdateSwitch(switchName string, hostSubnets []*net.IPNet) error { +// AddOrUpdateSwitch adds/updates a switch to the logical switch manager for subnet +// and IPAM management. +func (manager *LogicalSwitchManager) AddOrUpdateSwitch(switchName string, hostSubnets []*net.IPNet, excludeSubnets ...*net.IPNet) error { err := manager.allocator.AddOrUpdateSubnet(switchName, hostSubnets) if err != nil { return err } if manager.reserveIPs { for _, hostSubnet := range hostSubnets { - err = manager.allocator.AllocateIPs(switchName, []*net.IPNet{util.GetNodeGatewayIfAddr(hostSubnet)}) - if err != nil { - return err - } - err = manager.allocator.AllocateIPs(switchName, []*net.IPNet{util.GetNodeManagementIfAddr(hostSubnet)}) - if err != nil { - return err + for _, ip := range []*net.IPNet{util.GetNodeGatewayIfAddr(hostSubnet), util.GetNodeManagementIfAddr(hostSubnet)} { + excludeSubnets = append(excludeSubnets, + &net.IPNet{IP: ip.IP, Mask: util.GetIPFullMask(ip.IP)}, + ) } } } - - return nil + return manager.allocator.AddOrUpdateSubnet(switchName, hostSubnets, excludeSubnets...) } // AddNoHostSubnetSwitch adds/updates a switch without any host subnets diff --git a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go index f553686f2e..147feaaa70 100644 --- a/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go +++ b/go-controller/pkg/ovn/zone_interconnect/zone_ic_handler.go @@ -627,7 +627,7 @@ func (zic *ZoneInterconnectHandler) getStaticRoutes(ipPrefixes []*net.IPNet, nex } p := "" if fullMask { - p = prefix.IP.String() + util.GetIPFullMask(prefix.IP.String()) + p = prefix.IP.String() + util.GetIPFullMaskString(prefix.IP.String()) } else { p = prefix.String() } diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 841ef143c5..0d61e838a8 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -317,3 +317,15 @@ func GenerateRandMAC() (net.HardwareAddr, error) { return buf, nil } + +// IPsToNetworkIPs returns the network CIDRs of the provided IP CIDRs +func IPsToNetworkIPs(ips ...*net.IPNet) []*net.IPNet { + nets := make([]*net.IPNet, len(ips)) + for i := range ips { + nets[i] = &net.IPNet{ + IP: ips[i].IP.Mask(ips[i].Mask), + Mask: ips[i].Mask, + } + } + return nets +} diff --git a/go-controller/pkg/util/pod_annotation.go b/go-controller/pkg/util/pod_annotation.go index d148b31e33..400bbf6e0e 100644 --- a/go-controller/pkg/util/pod_annotation.go +++ b/go-controller/pkg/util/pod_annotation.go @@ -259,7 +259,7 @@ func GetPodCIDRsWithFullMask(pod *v1.Pod, nInfo NetInfo) ([]*net.IPNet, error) { for _, podIP := range podIPs { ipNet := net.IPNet{ IP: podIP, - Mask: GetFullNetMask(podIP), + Mask: GetIPFullMask(podIP), } ips = append(ips, &ipNet) } diff --git a/go-controller/pkg/util/pod_annotation_unit_test.go b/go-controller/pkg/util/pod_annotation_unit_test.go index 5e2b8f1011..98b4c160c6 100644 --- a/go-controller/pkg/util/pod_annotation_unit_test.go +++ b/go-controller/pkg/util/pod_annotation_unit_test.go @@ -345,7 +345,7 @@ func TestGetPodIPsOfNetwork(t *testing.T) { expectedIP := tc.outExp[0] ipNet := net.IPNet{ IP: expectedIP, - Mask: GetFullNetMask(expectedIP), + Mask: GetIPFullMask(expectedIP), } assert.Equal(t, []*net.IPNet{&ipNet}, res2) } diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 99ca0b8cde..4f17fe08a5 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -59,8 +59,8 @@ func StringArg(context *cli.Context, name string) (string, error) { return val, nil } -// GetIPFullMask returns /32 if ip is IPV4 family and /128 if ip is IPV6 family -func GetIPFullMask(ip string) string { +// GetIPFullMaskString returns /32 if ip is IPV4 family and /128 if ip is IPV6 family +func GetIPFullMaskString(ip string) string { const ( // IPv4FullMask is the maximum prefix mask for an IPv4 address IPv4FullMask = "/32" @@ -74,19 +74,13 @@ func GetIPFullMask(ip string) string { return IPv4FullMask } -// GetFullNetMask returns a 32 bit netmask for IPv4 addresses and a 128 bit netmask for IPv6 addresses -func GetFullNetMask(ip net.IP) net.IPMask { - const ( - // IPv4FullMask is the maximum prefix mask for an IPv4 address - IPv4FullMask = 32 - // IPv6FullMask is the maximum prefix mask for an IPv6 address - IPv6FullMask = 128 - ) - +// GetIPFullMask returns a full IPv4 IPMask if ip is IPV4 family or a full IPv6 +// IPMask otherwise +func GetIPFullMask(ip net.IP) net.IPMask { if utilnet.IsIPv6(ip) { - return net.CIDRMask(IPv6FullMask, IPv6FullMask) + return net.CIDRMask(128, 128) } - return net.CIDRMask(IPv4FullMask, IPv4FullMask) + return net.CIDRMask(32, 32) } // GetLegacyK8sMgmtIntfName returns legacy management ovs-port name From 819473c1abe40153f268e0b9ec3061a4d38f6a92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Mon, 29 May 2023 13:34:24 +0000 Subject: [PATCH 11/31] Annotation errors compatible with errors pkg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jaime Caamaño Ruiz --- go-controller/pkg/util/util.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 4f17fe08a5..8bcc0e5236 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -1,6 +1,7 @@ package util import ( + "errors" "fmt" "hash/fnv" "net" @@ -174,38 +175,38 @@ type annotationNotSetError struct { msg string } -func (anse annotationNotSetError) Error() string { +func (anse *annotationNotSetError) Error() string { return anse.msg } // newAnnotationNotSetError returns an error for an annotation that is not set func newAnnotationNotSetError(format string, args ...interface{}) error { - return annotationNotSetError{msg: fmt.Sprintf(format, args...)} + return &annotationNotSetError{msg: fmt.Sprintf(format, args...)} } // IsAnnotationNotSetError returns true if the error indicates that an annotation is not set func IsAnnotationNotSetError(err error) bool { - _, ok := err.(annotationNotSetError) - return ok + var annotationNotSetError *annotationNotSetError + return errors.As(err, &annotationNotSetError) } type annotationAlreadySetError struct { msg string } -func (aase annotationAlreadySetError) Error() string { +func (aase *annotationAlreadySetError) Error() string { return aase.msg } // newAnnotationAlreadySetError returns an error for an annotation that is not set func newAnnotationAlreadySetError(format string, args ...interface{}) error { - return annotationAlreadySetError{msg: fmt.Sprintf(format, args...)} + return &annotationAlreadySetError{msg: fmt.Sprintf(format, args...)} } // IsAnnotationAlreadySetError returns true if the error indicates that an annotation is already set func IsAnnotationAlreadySetError(err error) bool { - _, ok := err.(annotationAlreadySetError) - return ok + var annotationAlreadySetError *annotationAlreadySetError + return errors.As(err, &annotationAlreadySetError) } // HashforOVN hashes the provided input to make it a valid addressSet or portGroup name. From 7345cd4cefdca7d57d450d147d686a9366dd4fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 5 Jul 2023 18:15:38 +0000 Subject: [PATCH 12/31] Refactor convertMultiNetPolicyToNetPolicy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that it can be tested independantly without have to setup a controller test harness. Signed-off-by: Jaime Caamaño Ruiz --- .../base_network_controller_multipolicy.go | 29 +-- ...ase_network_controller_multipolicy_test.go | 235 +++++++++++------- 2 files changed, 163 insertions(+), 101 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller_multipolicy.go b/go-controller/pkg/ovn/base_network_controller_multipolicy.go index ebb57ad1e1..01af59aace 100644 --- a/go-controller/pkg/ovn/base_network_controller_multipolicy.go +++ b/go-controller/pkg/ovn/base_network_controller_multipolicy.go @@ -60,7 +60,7 @@ func (bsnc *BaseSecondaryNetworkController) shouldApplyMultiPolicy(mpolicy *mnpa return false } -func convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) *knet.NetworkPolicy { +func convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy, allowPeerSelectors bool) (*knet.NetworkPolicy, error) { var policy knet.NetworkPolicy var ipb *knet.IPBlock @@ -80,6 +80,9 @@ func convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) *knet. } ingress.From = make([]knet.NetworkPolicyPeer, len(mingress.From)) for j, mfrom := range mingress.From { + if !allowPeerSelectors && isPeerSelector(mfrom) { + return nil, fmt.Errorf("invalid ingress peer %v in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", mfrom, mpolicy.Name) + } ipb = nil if mfrom.IPBlock != nil { ipb = &knet.IPBlock{CIDR: mfrom.IPBlock.CIDR, Except: mfrom.IPBlock.Except} @@ -104,6 +107,9 @@ func convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) *knet. } egress.To = make([]knet.NetworkPolicyPeer, len(megress.To)) for j, mto := range megress.To { + if !allowPeerSelectors && isPeerSelector(mto) { + return nil, fmt.Errorf("invalid egress peer %v in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", mto, mpolicy.Name) + } ipb = nil if mto.IPBlock != nil { ipb = &knet.IPBlock{CIDR: mto.IPBlock.CIDR, Except: mto.IPBlock.Except} @@ -120,27 +126,14 @@ func convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) *knet. for i, mpolicytype := range mpolicy.Spec.PolicyTypes { policy.Spec.PolicyTypes[i] = knet.PolicyType(mpolicytype) } - return &policy + return &policy, nil } func (bnc *BaseNetworkController) convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) (*knet.NetworkPolicy, error) { - if !bnc.doesNetworkRequireIPAM() { - var peers []mnpapi.MultiNetworkPolicyPeer - for _, rule := range mpolicy.Spec.Ingress { - peers = append(peers, rule.From...) - } - for _, rule := range mpolicy.Spec.Egress { - peers = append(peers, rule.To...) - } - for _, peer := range peers { - if doesPeerRequireNetworkIPAM(peer) { - return nil, fmt.Errorf("invalid peer %v in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", peer, mpolicy.Name) - } - } - } - return convertMultiNetPolicyToNetPolicy(mpolicy), nil + allowPeerSelectors := bnc.doesNetworkRequireIPAM() + return convertMultiNetPolicyToNetPolicy(mpolicy, allowPeerSelectors) } -func doesPeerRequireNetworkIPAM(peer mnpapi.MultiNetworkPolicyPeer) bool { +func isPeerSelector(peer mnpapi.MultiNetworkPolicyPeer) bool { return peer.PodSelector != nil || peer.NamespaceSelector != nil } diff --git a/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go b/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go index 4692b73fc7..971b30f88e 100644 --- a/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go +++ b/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go @@ -1,8 +1,6 @@ package ovn import ( - "fmt" - . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -10,25 +8,14 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" - netplumbersv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" - - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) var _ = Describe("convertMultiNetPolicyToNetPolicy", func() { const policyName = "pol33" - var nci *CommonNetworkControllerInfo - - BeforeEach(func() { - nci = &CommonNetworkControllerInfo{nbClient: nil} - }) - - It("translates an IPAM policy with namespace selectors", func() { - nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) - Expect(err).NotTo(HaveOccurred()) - bnc := NewSecondaryLayer2NetworkController(nci, nInfo) - Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithNamespaceSelector(policyName))).To( + It("translates an IPAM policy with ingress namespace selectors", func() { + allowPeerSelectors := true + Expect(convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIngressNamespaceSelector(policyName), allowPeerSelectors)).To( Equal( &netv1.NetworkPolicy{ ObjectMeta: metav1.ObjectMeta{Name: policyName}, @@ -45,11 +32,28 @@ var _ = Describe("convertMultiNetPolicyToNetPolicy", func() { })) }) - It("translates an IPAM policy with pod selectors", func() { - nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) - Expect(err).NotTo(HaveOccurred()) - bnc := NewSecondaryLayer2NetworkController(nci, nInfo) - Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithPodSelector(policyName))).To( + It("translates an IPAM policy with egress namespace selectors", func() { + allowPeerSelectors := true + Expect(convertMultiNetPolicyToNetPolicy(multiNetPolicyWithEgressNamespaceSelector(policyName), allowPeerSelectors)).To( + Equal( + &netv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{}, + Egress: []netv1.NetworkPolicyEgressRule{ + { + To: []netv1.NetworkPolicyPeer{{NamespaceSelector: sameLabelsEverywhere()}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + PolicyTypes: []netv1.PolicyType{}, + }, + })) + }) + + It("translates an IPAM policy with ingress pod selectors", func() { + allowPeerSelectors := true + Expect(convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIngressPodSelector(policyName), allowPeerSelectors)).To( Equal( &netv1.NetworkPolicy{ ObjectMeta: metav1.ObjectMeta{Name: policyName}, @@ -66,11 +70,28 @@ var _ = Describe("convertMultiNetPolicyToNetPolicy", func() { })) }) - It("translates an IPAM policy with `ipBlock` selectors", func() { - nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) - Expect(err).NotTo(HaveOccurred()) - bnc := NewSecondaryLayer2NetworkController(nci, nInfo) - Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIPBlock())).To(Equal( + It("translates an IPAM policy with egress pod selectors", func() { + allowPeerSelectors := true + Expect(convertMultiNetPolicyToNetPolicy(multiNetPolicyWithEgressPodSelector(policyName), allowPeerSelectors)).To( + Equal( + &netv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{}, + Egress: []netv1.NetworkPolicyEgressRule{ + { + To: []netv1.NetworkPolicyPeer{{PodSelector: sameLabelsEverywhere()}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + PolicyTypes: []netv1.PolicyType{}, + }, + })) + }) + + It("translates an IPAM policy with ingress `ipBlock` rule", func() { + allowPeerSelectors := true + Expect(convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIngressIPBlock(), allowPeerSelectors)).To(Equal( &netv1.NetworkPolicy{ Spec: netv1.NetworkPolicySpec{ Ingress: []netv1.NetworkPolicyIngressRule{ @@ -86,11 +107,27 @@ var _ = Describe("convertMultiNetPolicyToNetPolicy", func() { )) }) - It("translates an IPAM-less policy with `ipBlock` selectors", func() { - nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) - Expect(err).NotTo(HaveOccurred()) - bnc := NewSecondaryLayer2NetworkController(nci, nInfo) - Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIPBlock())).To( + It("translates an IPAM policy with egress `ipBlock` rule", func() { + allowPeerSelectors := true + Expect(convertMultiNetPolicyToNetPolicy(multiNetPolicyWithEgressIPBlock(), allowPeerSelectors)).To(Equal( + &netv1.NetworkPolicy{ + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{}, + Egress: []netv1.NetworkPolicyEgressRule{ + { + To: []netv1.NetworkPolicyPeer{{IPBlock: &netv1.IPBlock{CIDR: "10.10.0.0/16"}}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + PolicyTypes: []netv1.PolicyType{}, + }, + }, + )) + }) + + It("translates an IPAM-less policy with ingress `ipBlock` rule", func() { + allowPeerSelectors := false + Expect(convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIngressIPBlock(), allowPeerSelectors)).To( Equal( &netv1.NetworkPolicy{ Spec: netv1.NetworkPolicySpec{ @@ -107,23 +144,47 @@ var _ = Describe("convertMultiNetPolicyToNetPolicy", func() { )) }) - It("*fails* to translate an IPAM-less policy with pod selector peers", func() { - nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) - Expect(err).NotTo(HaveOccurred()) - bnc := NewSecondaryLayer2NetworkController(nci, nInfo) - _, err = bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithPodSelector(policyName)) - Expect(err).To( - MatchError( - MatchRegexp(fmt.Sprintf("invalid peer .* in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", policyName)))) + It("translates an IPAM-less policy with egress `ipBlock` rule", func() { + allowPeerSelectors := false + Expect(convertMultiNetPolicyToNetPolicy(multiNetPolicyWithEgressIPBlock(), allowPeerSelectors)).To( + Equal( + &netv1.NetworkPolicy{ + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{}, + Egress: []netv1.NetworkPolicyEgressRule{ + { + To: []netv1.NetworkPolicyPeer{{IPBlock: &netv1.IPBlock{CIDR: "10.10.0.0/16"}}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + PolicyTypes: []netv1.PolicyType{}, + }, + }, + )) + }) + + It("*fails* to translate an IPAM-less policy with ingress pod selector peers", func() { + allowPeerSelectors := false + _, err := convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIngressPodSelector(policyName), allowPeerSelectors) + Expect(err).To(HaveOccurred()) }) - It("translates an IPAM-less policy with namespace selector peers", func() { - nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) - Expect(err).NotTo(HaveOccurred()) - bnc := NewSecondaryLayer2NetworkController(nci, nInfo) - _, err = bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithNamespaceSelector(policyName)) - Expect(err).To(MatchError( - MatchRegexp(fmt.Sprintf("invalid peer .* in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", policyName)))) + It("*fails* to translate an IPAM-less policy with egress pod selector peers", func() { + allowPeerSelectors := false + _, err := convertMultiNetPolicyToNetPolicy(multiNetPolicyWithEgressPodSelector(policyName), allowPeerSelectors) + Expect(err).To(HaveOccurred()) + }) + + It("*fails* translates an IPAM-less policy with ingress namespace selector peers", func() { + allowPeerSelectors := false + _, err := convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIngressNamespaceSelector(policyName), allowPeerSelectors) + Expect(err).To(HaveOccurred()) + }) + + It("*fails* translates an IPAM-less policy with egress namespace selector peers", func() { + allowPeerSelectors := false + _, err := convertMultiNetPolicyToNetPolicy(multiNetPolicyWithEgressNamespaceSelector(policyName), allowPeerSelectors) + Expect(err).To(HaveOccurred()) }) }) @@ -133,48 +194,30 @@ func sameLabelsEverywhere() *metav1.LabelSelector { } } -func ipamNetAttachDef() *netplumbersv1.NetworkAttachmentDefinition { - return &netplumbersv1.NetworkAttachmentDefinition{ - ObjectMeta: metav1.ObjectMeta{ - Name: "flatl2", - Namespace: "default", - }, - Spec: netplumbersv1.NetworkAttachmentDefinitionSpec{ - Config: `{ - "cniVersion": "0.4.0", - "name": "flatl2", - "netAttachDefName": "default/flatl2", - "topology": "layer2", - "type": "ovn-k8s-cni-overlay", - "subnets": "192.100.200.0/24" - }`, +func multiNetPolicyWithIngressIPBlock() *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + Spec: v1beta1.MultiNetworkPolicySpec{ + Ingress: []v1beta1.MultiNetworkPolicyIngressRule{ + { + From: []v1beta1.MultiNetworkPolicyPeer{ + { + IPBlock: &v1beta1.IPBlock{ + CIDR: "10.10.0.0/16", + }, + }, + }, + }, + }, }, } } -func ipamlessNetAttachDef() *netplumbersv1.NetworkAttachmentDefinition { - return &netplumbersv1.NetworkAttachmentDefinition{ - ObjectMeta: metav1.ObjectMeta{ - Name: "flatl2", - Namespace: "default", - }, - Spec: netplumbersv1.NetworkAttachmentDefinitionSpec{ - Config: `{ - "cniVersion": "0.4.0", - "name": "flatl2", - "netAttachDefName": "default/flatl2", - "topology": "layer2", - "type": "ovn-k8s-cni-overlay" - }`, - }, - } -} -func multiNetPolicyWithIPBlock() *v1beta1.MultiNetworkPolicy { +func multiNetPolicyWithEgressIPBlock() *v1beta1.MultiNetworkPolicy { return &v1beta1.MultiNetworkPolicy{ Spec: v1beta1.MultiNetworkPolicySpec{ - Ingress: []v1beta1.MultiNetworkPolicyIngressRule{ + Egress: []v1beta1.MultiNetworkPolicyEgressRule{ { - From: []v1beta1.MultiNetworkPolicyPeer{ + To: []v1beta1.MultiNetworkPolicyPeer{ { IPBlock: &v1beta1.IPBlock{ CIDR: "10.10.0.0/16", @@ -187,7 +230,7 @@ func multiNetPolicyWithIPBlock() *v1beta1.MultiNetworkPolicy { } } -func multiNetPolicyWithPodSelector(policyName string) *v1beta1.MultiNetworkPolicy { +func multiNetPolicyWithIngressPodSelector(policyName string) *v1beta1.MultiNetworkPolicy { return &v1beta1.MultiNetworkPolicy{ ObjectMeta: metav1.ObjectMeta{Name: policyName}, Spec: v1beta1.MultiNetworkPolicySpec{ @@ -200,7 +243,20 @@ func multiNetPolicyWithPodSelector(policyName string) *v1beta1.MultiNetworkPolic } } -func multiNetPolicyWithNamespaceSelector(policyName string) *v1beta1.MultiNetworkPolicy { +func multiNetPolicyWithEgressPodSelector(policyName string) *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: v1beta1.MultiNetworkPolicySpec{ + Egress: []v1beta1.MultiNetworkPolicyEgressRule{ + { + To: []v1beta1.MultiNetworkPolicyPeer{{PodSelector: sameLabelsEverywhere()}}, + }, + }, + }, + } +} + +func multiNetPolicyWithIngressNamespaceSelector(policyName string) *v1beta1.MultiNetworkPolicy { return &v1beta1.MultiNetworkPolicy{ ObjectMeta: metav1.ObjectMeta{Name: policyName}, Spec: v1beta1.MultiNetworkPolicySpec{ @@ -212,3 +268,16 @@ func multiNetPolicyWithNamespaceSelector(policyName string) *v1beta1.MultiNetwor }, } } + +func multiNetPolicyWithEgressNamespaceSelector(policyName string) *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: v1beta1.MultiNetworkPolicySpec{ + Egress: []v1beta1.MultiNetworkPolicyEgressRule{ + { + To: []v1beta1.MultiNetworkPolicyPeer{{NamespaceSelector: sameLabelsEverywhere()}}, + }, + }, + }, + } +} From 379d78564304cd4a5c5a43931b1b4794cb163187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 4 Jul 2023 11:59:03 +0000 Subject: [PATCH 13/31] Add PodAnnotation update utility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moves most of the PodAnnotation setting logic from the base network controller to an utility so that it can be used from cluster manager as well. Signed-off-by: Jaime Caamaño Ruiz --- go-controller/pkg/allocator/ip/allocator.go | 5 + .../pkg/allocator/ip/subnet/allocator.go | 36 ++ .../pkg/allocator/pod/pod_annotation.go | 257 ++++++++++ .../pkg/allocator/pod/pod_annotation_test.go | 471 +++++++++++++++++ .../pkg/ovn/base_network_controller.go | 6 +- .../pkg/ovn/base_network_controller_pods.go | 483 ++++++++---------- .../logical_switch_manager.go | 5 + .../secondary_layer2_network_controller.go | 7 + .../secondary_layer3_network_controller.go | 8 + .../secondary_localnet_network_controller.go | 10 +- go-controller/pkg/util/multi_network.go | 4 + go-controller/pkg/util/net.go | 24 + go-controller/pkg/util/pod.go | 64 +++ go-controller/pkg/util/pod_annotation.go | 157 ++++++ .../pkg/util/pod_annotation_unit_test.go | 5 +- go-controller/pkg/util/pod_test.go | 116 +++++ 16 files changed, 1388 insertions(+), 270 deletions(-) create mode 100644 go-controller/pkg/allocator/pod/pod_annotation.go create mode 100644 go-controller/pkg/allocator/pod/pod_annotation_test.go create mode 100644 go-controller/pkg/util/pod.go create mode 100644 go-controller/pkg/util/pod_test.go diff --git a/go-controller/pkg/allocator/ip/allocator.go b/go-controller/pkg/allocator/ip/allocator.go index 936027cdf6..e644cb0a29 100644 --- a/go-controller/pkg/allocator/ip/allocator.go +++ b/go-controller/pkg/allocator/ip/allocator.go @@ -43,6 +43,11 @@ var ( ErrAllocated = errors.New("provided IP is already allocated") ) +// IsErrAllocated returns true if err is of type ErrAllocated +func IsErrAllocated(err error) bool { + return errors.Is(err, ErrAllocated) +} + type ErrNotInRange struct { ValidRange string } diff --git a/go-controller/pkg/allocator/ip/subnet/allocator.go b/go-controller/pkg/allocator/ip/subnet/allocator.go index 5d2d98dfab..9a1788c3bf 100644 --- a/go-controller/pkg/allocator/ip/subnet/allocator.go +++ b/go-controller/pkg/allocator/ip/subnet/allocator.go @@ -25,6 +25,14 @@ type Allocator interface { AllocateNextIPs(name string) ([]*net.IPNet, error) ReleaseIPs(name string, ips []*net.IPNet) error ConditionalIPRelease(name string, ips []*net.IPNet, predicate func() (bool, error)) (bool, error) + ForSubnet(name string) NamedAllocator +} + +// NamedAllocator manages the allocation of IPs within a specific subnet +type NamedAllocator interface { + AllocateIPs(ips []*net.IPNet) error + AllocateNextIPs() ([]*net.IPNet, error) + ReleaseIPs(ips []*net.IPNet) error } // ErrSubnetNotFound is used to inform the subnet is not being managed @@ -322,3 +330,31 @@ func (allocator *allocator) ConditionalIPRelease(name string, ips []*net.IPNet, return false, nil } + +// ForSubnet returns an IP allocator for the specified subnet +func (allocator *allocator) ForSubnet(name string) NamedAllocator { + return &IPAllocator{ + name: name, + allocator: allocator, + } +} + +type IPAllocator struct { + allocator *allocator + name string +} + +// AllocateIPs allocates the requested IPs +func (ipAllocator *IPAllocator) AllocateIPs(ips []*net.IPNet) error { + return ipAllocator.allocator.AllocateIPs(ipAllocator.name, ips) +} + +// AllocateNextIPs allocates the next available IPs +func (ipAllocator *IPAllocator) AllocateNextIPs() ([]*net.IPNet, error) { + return ipAllocator.allocator.AllocateNextIPs(ipAllocator.name) +} + +// ReleaseIPs release the provided IPs +func (ipAllocator *IPAllocator) ReleaseIPs(ips []*net.IPNet) error { + return ipAllocator.allocator.ReleaseIPs(ipAllocator.name, ips) +} diff --git a/go-controller/pkg/allocator/pod/pod_annotation.go b/go-controller/pkg/allocator/pod/pod_annotation.go new file mode 100644 index 0000000000..e61c42bef7 --- /dev/null +++ b/go-controller/pkg/allocator/pod/pod_annotation.go @@ -0,0 +1,257 @@ +package pod + +import ( + "fmt" + "net" + + v1 "k8s.io/api/core/v1" + listers "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" + + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// PodAnnotationAllocator is an utility to handle allocation of the PodAnnotation to Pods. +type PodAnnotationAllocator struct { + podLister listers.PodLister + kube kube.Interface + + netInfo util.NetInfo +} + +func NewPodAnnotationAllocator(netInfo util.NetInfo, podLister listers.PodLister, kube kube.Interface) *PodAnnotationAllocator { + return &PodAnnotationAllocator{ + podLister: podLister, + kube: kube, + netInfo: netInfo, + } +} + +// AllocatePodAnnotation allocates the PodAnnotation which includes IPs, a mac +// address, routes and gateways. Returns the allocated pod annotation and the +// updated pod. Returns a nil pod and the existing PodAnnotation if no updates +// are warranted to the pod. +// +// The allocation can be requested through the network selection element or +// derived from the allocator provided IPs. If the requested IPs cannot be +// honored, a new set of IPs will be allocated unless reallocateIP is set to +// false. +func (allocator *PodAnnotationAllocator) AllocatePodAnnotation( + ipAllocator subnet.NamedAllocator, + pod *v1.Pod, + network *nadapi.NetworkSelectionElement, + reallocateIP bool) ( + *v1.Pod, + *util.PodAnnotation, + error) { + + return allocatePodAnnotation( + allocator.podLister, + allocator.kube, + ipAllocator, + allocator.netInfo, + pod, + network, + reallocateIP, + ) + +} + +func allocatePodAnnotation( + podLister listers.PodLister, + kube kube.Interface, + ipAllocator subnet.NamedAllocator, + netInfo util.NetInfo, + pod *v1.Pod, + network *nadapi.NetworkSelectionElement, + reallocateIP bool) ( + updatedPod *v1.Pod, + podAnnotation *util.PodAnnotation, + err error) { + + allocateToPodWithRollback := func(pod *v1.Pod) (*v1.Pod, func(), error) { + var rollback func() + pod, podAnnotation, rollback, err = allocatePodAnnotationWithRollback( + ipAllocator, + netInfo, + pod, + network, + reallocateIP) + return pod, rollback, err + } + + err = util.UpdatePodWithRetryOrRollback( + podLister, + kube, + pod, + allocateToPodWithRollback, + ) + + if err != nil { + return nil, nil, err + } + + return pod, podAnnotation, nil +} + +// allocatePodAnnotationWithRollback allocates the PodAnnotation which includes +// IPs, a mac address, routes and gateways. Returns the allocated pod annotation +// and a pod with that annotation set. Returns a nil pod and the existing +// PodAnnotation if no updates are warranted to the pod. + +// The allocation can be requested through the network selection element or +// derived from the allocator provided IPs. If no IP allocation is required, set +// allocateIP to false. If the requested IPs cannot be honored, a new set of IPs +// will be allocated unless reallocateIP is set to false. + +// A rollback function is returned to rollback the IP allocation if there was +// any. + +// This function is designed to be used in AllocateToPodWithRollbackFunc +// implementations. Use an inlined implementation if you want to extract +// information from it as a side-effect. +func allocatePodAnnotationWithRollback( + ipAllocator subnet.NamedAllocator, + netInfo util.NetInfo, + pod *v1.Pod, + network *nadapi.NetworkSelectionElement, + reallocateIP bool) ( + updatedPod *v1.Pod, + podAnnotation *util.PodAnnotation, + rollback func(), + err error) { + + nadName := types.DefaultNetworkName + if netInfo.IsSecondary() { + nadName = util.GetNADName(network.Namespace, network.Name) + } + podDesc := fmt.Sprintf("%s/%s/%s", nadName, pod.Namespace, pod.Name) + + // the IPs we allocate in this function need to be released back to the IPAM + // pool if there is some error in any step past the point the IPs were + // assigned via the IPAM manager. Note we are using a named return variable + // for defer to work correctly. + var releaseIPs []*net.IPNet + rollback = func() { + if len(releaseIPs) == 0 { + return + } + err := ipAllocator.ReleaseIPs(releaseIPs) + if err != nil { + klog.Errorf("Error when releasing IPs %v: %w", util.StringSlice(releaseIPs), err) + releaseIPs = nil + return + } + klog.V(5).Infof("Released IPs %v", util.StringSlice(releaseIPs)) + releaseIPs = nil + } + defer func() { + if err != nil { + rollback() + } + }() + + hasIPAM := util.DoesNetworkRequireIPAM(netInfo) + hasIPRequest := network != nil && len(network.IPRequest) > 0 + hasStaticIPRequest := hasIPRequest && !reallocateIP + + if hasIPAM && hasStaticIPRequest { + // for now we can't tell apart already allocated IPs from IPs excluded + // from allocation so we can't really honor static IP requests when + // there is IPAM as we don't really know if the requested IP should not + // be allocated or was already allocated by the same pod + err = fmt.Errorf("cannot allocate a static IP request with IPAM for pod %s", podDesc) + return + } + + podAnnotation, _ = util.UnmarshalPodAnnotation(pod.Annotations, nadName) + if podAnnotation == nil { + podAnnotation = &util.PodAnnotation{} + } + + // work on a tentative pod annotation based on the existing one + tentative := &util.PodAnnotation{ + IPs: podAnnotation.IPs, + MAC: podAnnotation.MAC, + } + + // we need to update the annotation if it is missing IPs or MAC + needsAnnotationUpdate := len(tentative.IPs) == 0 && (hasIPAM || hasIPRequest) + needsAnnotationUpdate = needsAnnotationUpdate || len(tentative.MAC) == 0 + reallocateOnNonStaticIPRequest := len(tentative.IPs) == 0 && hasIPRequest && !hasStaticIPRequest + + if len(tentative.IPs) == 0 { + if hasIPRequest { + tentative.IPs, err = util.ParseIPNets(network.IPRequest) + if err != nil { + return + } + } + } + + if hasIPAM { + if len(tentative.IPs) > 0 { + if err = ipAllocator.AllocateIPs(tentative.IPs); err != nil && !ip.IsErrAllocated(err) { + err = fmt.Errorf("failed to ensure requested or annotated IPs %v for pod %s: %w", + util.StringSlice(tentative.IPs), podDesc, err) + if !reallocateOnNonStaticIPRequest { + return + } + klog.Warning(err.Error()) + needsAnnotationUpdate = true + tentative.IPs = nil + } + + if err == nil { + // copy the IPs that would need to be released + releaseIPs = util.CopyIPNets(tentative.IPs) + } + + // IPs allocated or we will allocate a new set of IPs, reset the error + err = nil + } + + if len(tentative.IPs) == 0 { + tentative.IPs, err = ipAllocator.AllocateNextIPs() + if err != nil { + err = fmt.Errorf("failed to assign pod addresses for pod %s: %w", podDesc, err) + return + } + + // copy the IPs that would need to be released + releaseIPs = util.CopyIPNets(tentative.IPs) + } + } + + if needsAnnotationUpdate { + // handle mac address + if network != nil && network.MacRequest != "" { + tentative.MAC, err = net.ParseMAC(network.MacRequest) + } else if len(tentative.IPs) > 0 { + tentative.MAC = util.IPAddrToHWAddr(tentative.IPs[0].IP) + } else { + tentative.MAC, err = util.GenerateRandMAC() + } + if err != nil { + return + } + + // handle routes & gateways + err = util.AddRoutesGatewayIP(netInfo, pod, tentative, network) + if err != nil { + return + } + + updatedPod = pod + updatedPod.Annotations, err = util.MarshalPodAnnotation(updatedPod.Annotations, tentative, nadName) + podAnnotation = tentative + } + + return +} diff --git a/go-controller/pkg/allocator/pod/pod_annotation_test.go b/go-controller/pkg/allocator/pod/pod_annotation_test.go new file mode 100644 index 0000000000..3b18d6bb9d --- /dev/null +++ b/go-controller/pkg/allocator/pod/pod_annotation_test.go @@ -0,0 +1,471 @@ +package pod + +import ( + "errors" + "net" + "testing" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + cnitypes "github.com/containernetworking/cni/pkg/types" + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + + ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + "github.com/onsi/gomega" +) + +type allocatorStub struct { + netxtIPs []*net.IPNet + allocateIPsError error + releasedIPs []*net.IPNet +} + +func (a *allocatorStub) AllocateIPs(ips []*net.IPNet) error { + return a.allocateIPsError +} + +func (a *allocatorStub) AllocateNextIPs() ([]*net.IPNet, error) { + return a.netxtIPs, nil +} + +func (a *allocatorStub) ReleaseIPs(ips []*net.IPNet) error { + a.releasedIPs = ips + return nil +} + +func (a *allocatorStub) IsErrAllocated(err error) bool { + return errors.Is(err, ipam.ErrAllocated) +} + +func Test_allocatePodAnnotationWithRollback(t *testing.T) { + randomMac, err := util.GenerateRandMAC() + if err != nil { + t.Fatalf("failed to generate random mac") + } + + requestedMAC := "01:02:03:04:05:06" + requestedMACParsed, err := net.ParseMAC(requestedMAC) + if err != nil { + t.Fatalf("failed to generate random mac") + } + + type args struct { + ipAllocator subnet.NamedAllocator + network *nadapi.NetworkSelectionElement + reallocate bool + } + tests := []struct { + name string + args args + ipam bool + podAnnotation *util.PodAnnotation + invalidNetworkAnnotation bool + wantUpdatedPod bool + wantGeneratedMac bool + wantPodAnnotation *util.PodAnnotation + wantReleasedIPs []*net.IPNet + wantReleasedIPsOnRollback []*net.IPNet + wantErr bool + }{ + { + // on secondary L2 networks with no IPAM, we expect to generate a + // random mac + name: "expect generated mac, no IPAM", + wantUpdatedPod: true, + wantGeneratedMac: true, + }, + { + // on secondary L2 networks with no IPAM, if the pod is already + // annotated with a random MAC, we expect no further changes + name: "expect no updates, has mac, no IPAM", + podAnnotation: &util.PodAnnotation{ + MAC: randomMac, + }, + wantPodAnnotation: &util.PodAnnotation{ + MAC: randomMac, + }, + }, + { + // on secondary L2 network with no IPAM, honor static IP requests + // present in the network selection annotation + name: "expect requested static IP, no gateway, no IPAM", + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.4/24"}, + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.4/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.4/24")[0].IP), + }, + }, + { + // on secondary L2 network with no IPAM, honor static IP and gateway + // requests present in the network selection annotation + name: "expect requested static IP, with gateway, no IPAM", + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.4/24"}, + GatewayRequest: ovntest.MustParseIPs("192.168.0.1"), + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.4/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.4/24")[0].IP), + Gateways: ovntest.MustParseIPs("192.168.0.1"), + }, + }, + { + // on networks with IPAM, expect error if static IP request present + // in the network selection annotation + name: "expect error, static ip request, IPAM", + ipam: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.3/24"}, + }, + }, + wantUpdatedPod: true, + wantErr: true, + }, + { + // on networks with IPAM, expect a normal IP, MAC and gateway + // allocation + name: "expect new IP", + ipam: true, + args: args{ + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: ovntest.MustParseIPNet("100.64.0.0/16"), + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + }, + wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + { + // on networks with IPAM, if pod is already annotated, expect no + // further updates but do allocate the IP + name: "expect no updates, annotated, IPAM", + ipam: true, + podAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + }, + args: args{ + ipAllocator: &allocatorStub{}, + }, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + }, + wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + { + // on networks with IPAM, if pod is already annotated, expect no + // further updates and no error if the IP is already allocated + name: "expect no updates, annotated, already allocated, IPAM", + ipam: true, + podAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + }, + args: args{ + ipAllocator: &allocatorStub{ + allocateIPsError: ipam.ErrAllocated, + }, + }, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + }, + }, + { + // on networks with IPAM, if pod is already annotated, expect error + // if allocation fails + name: "expect error, annotated, allocation fails, IPAM", + ipam: true, + podAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + }, + args: args{ + ipAllocator: &allocatorStub{ + allocateIPsError: errors.New("Allocate IPs failed"), + }, + }, + wantErr: true, + }, + { + // on networks with IPAM, try to honor IP request allowing to + // re-allocater on error + name: "expect requested non-static IP, IPAM", + ipam: true, + args: args{ + reallocate: true, + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.4/24"}, + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.4/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.4/24")[0].IP), + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: ovntest.MustParseIPNet("100.64.0.0/16"), + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + }, + wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.4/24"), + }, + { + // on networks with IPAM, try to honor IP request that is already + // allocated + name: "expect requested non-static IP, already allocated, IPAM", + ipam: true, + args: args{ + reallocate: true, + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.4/24"}, + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + allocateIPsError: ipam.ErrAllocated, + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.4/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.4/24")[0].IP), + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: ovntest.MustParseIPNet("100.64.0.0/16"), + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + }, + }, + { + // on networks with IPAM, trying to honor IP request but + // re-allocating on error + name: "expect reallocate to new IP, error on requested non-static IP, IPAM", + ipam: true, + args: args{ + reallocate: true, + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"192.168.0.4/24"}, + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + allocateIPsError: errors.New("Allocate IPs failed"), + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: ovntest.MustParseIPNet("100.64.0.0/16"), + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + }, + wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + { + // on networks with IPAM, expect error on an invalid IP request + name: "expect error, invalid requested IP, no IPAM", + ipam: false, + args: args{ + network: &nadapi.NetworkSelectionElement{ + IPRequest: []string{"ivalid"}, + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + wantErr: true, + }, + { + // on networks with IPAM, expect error on an invalid MAC request + name: "expect error, invalid requested MAC, IPAM", + ipam: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + MacRequest: "ivalid", + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + wantErr: true, + wantReleasedIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + { + // on networks with IPAM, honor a MAC request through the network + // selection element + name: "expect requested MAC", + ipam: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + MacRequest: requestedMAC, + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + wantUpdatedPod: true, + wantPodAnnotation: &util.PodAnnotation{ + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: requestedMACParsed, + Gateways: []net.IP{ovntest.MustParseIP("192.168.0.1").To4()}, + Routes: []util.PodRoute{ + { + Dest: ovntest.MustParseIPNet("100.64.0.0/16"), + NextHop: ovntest.MustParseIP("192.168.0.1").To4(), + }, + }, + }, + wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + { + // on networks with IPAM, expect error on an invalid network + // selection element + name: "expect error, invalid network annotation, IPAM", + ipam: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + MacRequest: "ivalid", + }, + ipAllocator: &allocatorStub{ + netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + }, + invalidNetworkAnnotation: true, + wantErr: true, + wantReleasedIPs: ovntest.MustParseIPNets("192.168.0.3/24"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var err error + + g := gomega.NewWithT(t) + + network := tt.args.network + if network == nil { + network = &nadapi.NetworkSelectionElement{} + } + network.Name = "network" + network.Namespace = "namespace" + + var netInfo util.NetInfo + netInfo = &util.DefaultNetInfo{} + nadName := types.DefaultNetworkName + if !tt.ipam { + nadName = util.GetNADName(network.Namespace, network.Name) + netInfo, err = util.NewNetInfo(&ovncnitypes.NetConf{ + Topology: types.LocalnetTopology, + NetConf: cnitypes.NetConf{ + Name: network.Name, + }, + NADName: nadName, + }) + if err != nil { + t.Fatalf("failed to create NetInfo: %v", err) + } + } + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + Namespace: "namespace", + }, + } + if tt.podAnnotation != nil { + pod.Annotations, err = util.MarshalPodAnnotation(nil, tt.podAnnotation, nadName) + if err != nil { + t.Fatalf("failed to set pod annotations: %v", err) + } + } + + if tt.invalidNetworkAnnotation { + pod.ObjectMeta.Annotations = map[string]string{ + nadapi.NetworkAttachmentAnnot: "", + } + } + + pod, podAnnotation, rollback, err := allocatePodAnnotationWithRollback( + tt.args.ipAllocator, + netInfo, + pod, + network, + tt.args.reallocate, + ) + + if tt.args.ipAllocator != nil { + releasedIPs := tt.args.ipAllocator.(*allocatorStub).releasedIPs + g.Expect(releasedIPs).To(gomega.Equal(tt.wantReleasedIPs), "Release on error behaved unexpectedly") + tt.args.ipAllocator.(*allocatorStub).releasedIPs = nil + + rollback() + releasedIPs = tt.args.ipAllocator.(*allocatorStub).releasedIPs + g.Expect(releasedIPs).To(gomega.Equal(tt.wantReleasedIPsOnRollback), "Release on rollback behaved unexpectedly") + } + + if tt.wantErr { + // check the expected error after we have checked above that the + // rollback has behaved as expected + g.Expect(err).To(gomega.HaveOccurred(), "Expected error") + return + } + g.Expect(err).NotTo(gomega.HaveOccurred(), "Did not expect error") + + if tt.wantGeneratedMac { + g.Expect(podAnnotation).NotTo(gomega.BeNil(), "Expected updated pod annotation") + g.Expect(podAnnotation.IPs).To(gomega.BeNil(), "Did not expect IPs") + g.Expect(podAnnotation.MAC[0]&2).To(gomega.BeEquivalentTo(2), "Expected local MAC") + return + } + + g.Expect(podAnnotation).To(gomega.Equal(tt.wantPodAnnotation)) + + if tt.wantUpdatedPod { + g.Expect(pod).NotTo(gomega.BeNil(), "Expected an updated pod") + } + }) + } +} diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index dbe2e50eb9..d162a6205c 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -11,6 +11,7 @@ import ( libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/ovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -96,6 +97,9 @@ type BaseNetworkController struct { // A cache of all logical switches seen by the watcher and their subnets lsManager *lsm.LogicalSwitchManager + // An utility to allocate the PodAnnotation to pods + podAnnotationAllocator *pod.PodAnnotationAllocator + // A cache of all logical ports known to the controller logicalPortCache *portCache @@ -694,7 +698,7 @@ func (bnc *BaseNetworkController) recordNodeErrorEvent(node *kapi.Node, nodeErr } func (bnc *BaseNetworkController) doesNetworkRequireIPAM() bool { - return !((bnc.TopologyType() == types.Layer2Topology || bnc.TopologyType() == types.LocalnetTopology) && len(bnc.Subnets()) == 0) + return util.DoesNetworkRequireIPAM(bnc.NetInfo) } func (bnc *BaseNetworkController) buildPortGroup(hashName, name string, ports []*nbdb.LogicalSwitchPort, acls []*nbdb.ACL) *nbdb.PortGroup { diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 1bf4e9a85b..30b1928927 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -9,7 +9,7 @@ import ( nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + subnetipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" logicalswitchmanager "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -17,9 +17,7 @@ import ( kapi "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/util/retry" "k8s.io/klog/v2" - utilnet "k8s.io/utils/net" libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/ovsdb" @@ -374,119 +372,6 @@ func (bnc *BaseNetworkController) waitForNodeLogicalSwitchSubnetsInCache(switchN return nil } -func joinSubnetToRoute(isIPv6 bool, gatewayIP net.IP) util.PodRoute { - joinSubnet := config.Gateway.V4JoinSubnet - if isIPv6 { - joinSubnet = config.Gateway.V6JoinSubnet - } - _, subnet, err := net.ParseCIDR(joinSubnet) - if err != nil { - // Join subnet should have been validated already by config - panic(fmt.Sprintf("Failed to parse join subnet %q: %v", joinSubnet, err)) - } - - return util.PodRoute{ - Dest: subnet, - NextHop: gatewayIP, - } -} - -func (bnc *BaseNetworkController) addRoutesGatewayIP(pod *kapi.Pod, network *nadapi.NetworkSelectionElement, - podAnnotation *util.PodAnnotation, nodeSubnets []*net.IPNet) error { - if bnc.IsSecondary() { - // for secondary network, see if its network-attachment's annotation has default-route key. - // If present, then we need to add default route for it - podAnnotation.Gateways = append(podAnnotation.Gateways, network.GatewayRequest...) - topoType := bnc.TopologyType() - switch topoType { - case ovntypes.Layer2Topology, ovntypes.LocalnetTopology: - // no route needed for directly connected subnets - return nil - case ovntypes.Layer3Topology: - for _, podIfAddr := range podAnnotation.IPs { - isIPv6 := utilnet.IsIPv6CIDR(podIfAddr) - nodeSubnet, err := util.MatchFirstIPNetFamily(isIPv6, nodeSubnets) - if err != nil { - return err - } - gatewayIPnet := util.GetNodeGatewayIfAddr(nodeSubnet) - for _, clusterSubnet := range bnc.Subnets() { - if isIPv6 == utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { - podAnnotation.Routes = append(podAnnotation.Routes, util.PodRoute{ - Dest: clusterSubnet.CIDR, - NextHop: gatewayIPnet.IP, - }) - } - } - } - return nil - } - return fmt.Errorf("topology type %s not supported", topoType) - } - - // if there are other network attachments for the pod, then check if those network-attachment's - // annotation has default-route key. If present, then we need to skip adding default route for - // OVN interface - networks, err := util.GetK8sPodAllNetworkSelections(pod) - if err != nil { - return fmt.Errorf("error while getting network attachment definition for [%s/%s]: %v", - pod.Namespace, pod.Name, err) - } - otherDefaultRouteV4 := false - otherDefaultRouteV6 := false - for _, network := range networks { - for _, gatewayRequest := range network.GatewayRequest { - if utilnet.IsIPv6(gatewayRequest) { - otherDefaultRouteV6 = true - } else { - otherDefaultRouteV4 = true - } - } - } - - for _, podIfAddr := range podAnnotation.IPs { - isIPv6 := utilnet.IsIPv6CIDR(podIfAddr) - nodeSubnet, err := util.MatchFirstIPNetFamily(isIPv6, nodeSubnets) - if err != nil { - return err - } - - gatewayIPnet := util.GetNodeGatewayIfAddr(nodeSubnet) - - // Ensure default pod network traffic always goes to OVN - for _, clusterSubnet := range config.Default.ClusterSubnets { - if isIPv6 == utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { - podAnnotation.Routes = append(podAnnotation.Routes, util.PodRoute{ - Dest: clusterSubnet.CIDR, - NextHop: gatewayIPnet.IP, - }) - } - } - - // Ensure default service network traffic always goes to OVN - for _, serviceSubnet := range config.Kubernetes.ServiceCIDRs { - if isIPv6 == utilnet.IsIPv6CIDR(serviceSubnet) { - podAnnotation.Routes = append(podAnnotation.Routes, util.PodRoute{ - Dest: serviceSubnet, - NextHop: gatewayIPnet.IP, - }) - } - } - - otherDefaultRoute := otherDefaultRouteV4 - if isIPv6 { - otherDefaultRoute = otherDefaultRouteV6 - } - if !otherDefaultRoute { - podAnnotation.Gateways = append(podAnnotation.Gateways, gatewayIPnet.IP) - } - - // Ensure default join subnet traffic always goes to OVN - podAnnotation.Routes = append(podAnnotation.Routes, joinSubnetToRoute(isIPv6, gatewayIPnet.IP)) - } - return nil -} - // podExpectedInLogicalCache returns true if pod should be added to oc.logicalPortCache. // For some pods, like hostNetwork pods, overlay node pods, or completed pods waiting for them to be added // to oc.logicalPortCache will never succeed. @@ -551,12 +436,8 @@ func (bnc *BaseNetworkController) addLogicalPortToNetwork(pod *kapi.Pod, nadName portName := bnc.GetLogicalPortName(pod, nadName) klog.Infof("[%s] creating logical port %s for pod on switch %s", podDesc, portName, switchName) - var podMac net.HardwareAddr - var podIfAddrs []*net.IPNet var addresses []string - var releaseIPs bool lspExist := false - needsIP := true // Check if the pod's logical switch port already exists. If it // does don't re-add the port to OVN as this will change its @@ -611,138 +492,27 @@ func (bnc *BaseNetworkController) addLogicalPortToNetwork(pod *kapi.Pod, nadName // rescheduled. lsp.Options["requested-chassis"] = pod.Spec.NodeName - podAnnotation, err = util.UnmarshalPodAnnotation(pod.Annotations, nadName) - - // the IPs we allocate in this function need to be released back to the - // IPAM pool if there is some error in any step of addLogicalPort past - // the point the IPs were assigned via the IPAM manager. - // this needs to be done only when releaseIPs is set to true (the case where - // we truly have assigned podIPs in this call) AND when there is no error in - // the rest of the functionality of addLogicalPort. It is important to use a - // named return variable for defer to work correctly. - - defer func() { - if releaseIPs && err != nil { - if relErr := bnc.lsManager.ReleaseIPs(switchName, podIfAddrs); relErr != nil { - klog.Errorf("Error when releasing IPs %s for switch: %s, err: %q", - util.JoinIPNetIPs(podIfAddrs, " "), switchName, relErr) - } else { - klog.Infof("Released IPs: %s for node: %s", util.JoinIPNetIPs(podIfAddrs, " "), switchName) - } - } - }() - - if err == nil { - podMac = podAnnotation.MAC - podIfAddrs = podAnnotation.IPs - - // If the pod already has annotations use the existing static - // IP/MAC from the annotation. - lsp.DynamicAddresses = nil - - if bnc.doesNetworkRequireIPAM() { - // ensure we have reserved the IPs in the annotation - if err = bnc.lsManager.AllocateIPs(switchName, podIfAddrs); err != nil && err != ipallocator.ErrAllocated { - return nil, nil, nil, false, fmt.Errorf("unable to ensure IPs allocated for already annotated pod: %s, IPs: %s, error: %v", - podDesc, util.JoinIPNetIPs(podIfAddrs, " "), err) - } else { - needsIP = false - } - } else if len(podIfAddrs) > 0 { - return nil, nil, nil, false, fmt.Errorf("IPAMless network with IPs present in the annotations; rejecting to handle this request") - } + // Although we have different code to allocate the pod annotation for the + // default network and secondary networks, at the time of this writing they + // are functionally equivalent and the only reason to keep them separated is + // to make sure the secondary network code has no bugs before we switch to + // it for the default network as well. If at all possible, keep them + // functionally equivalent going forward. + var annotationUpdated bool + if bnc.IsSecondary() { + podAnnotation, annotationUpdated, err = bnc.allocatePodAnnotationForSecondaryNetwork(pod, existingLSP, nadName, network) + } else { + podAnnotation, annotationUpdated, err = bnc.allocatePodAnnotation(pod, existingLSP, podDesc, nadName, network) } - // It is possible that IPs have already been allocated for this pod and annotation has been updated, then the last - // addLogicalPortToNetwork() failed afterwards. In the current retry attempt, if the input pod argument got from - // the informer cache still lags behind, we would fail to get the updated pod annotation. Just continue to allocate - // new IPs and this function will eventually fail in updatePodAnnotationWithRetry() with ErrOverridePodIPs - // when it tries to override the pod IP annotation. Newly allocated IPs will be released then. - if needsIP { - if existingLSP != nil { - // try to get the MAC and IPs from existing OVN port first - podMac, podIfAddrs, err = bnc.getPortAddresses(switchName, existingLSP) - if err != nil { - return nil, nil, nil, false, fmt.Errorf("failed to get pod addresses for pod %s on node: %s, err: %v", - podDesc, switchName, err) - } - } - needsNewMacOrIPAllocation := false - - // ensure we have reserved the IPs found in OVN - if len(podIfAddrs) == 0 { - needsNewMacOrIPAllocation = true - } else if bnc.doesNetworkRequireIPAM() { - if err = bnc.lsManager.AllocateIPs(switchName, podIfAddrs); err != nil && err != ipallocator.ErrAllocated { - klog.Warningf("Unable to allocate IPs %s found on existing OVN port: %s, for pod %s on switch: %s"+ - " error: %v", util.JoinIPNetIPs(podIfAddrs, " "), portName, podDesc, switchName, err) - - needsNewMacOrIPAllocation = true - } - } - if needsNewMacOrIPAllocation { - if network != nil && network.IPRequest != nil && !bnc.doesNetworkRequireIPAM() { - klog.V(5).Infof("Will use static IP addresses for pod %s on a flatL2 topology without subnet defined", podDesc) - podIfAddrs, err = calculateStaticIPs(podDesc, network.IPRequest) - if err != nil { - return nil, nil, nil, false, err - } - podMac = util.IPAddrToHWAddr(podIfAddrs[0].IP) - } else { - // Previous attempts to use already configured IPs failed, need to assign new - generatedPodMac, generatedPodIfAddrs, err := bnc.assignPodAddresses(switchName) - if err != nil { - return nil, nil, nil, false, fmt.Errorf("failed to assign pod addresses for pod %s on switch: %s, err: %v", - podDesc, switchName, err) - } - if podMac == nil { - podMac = generatedPodMac - } - if len(generatedPodIfAddrs) > 0 { - podIfAddrs = generatedPodIfAddrs - } - } - } - - releaseIPs = true - // handle error cases separately first to ensure binding to err, otherwise the - // defer will fail - if network != nil && network.MacRequest != "" { - podMac, err = calculateStaticMAC(podDesc, network.MacRequest) - if err != nil { - return nil, nil, nil, false, err - } - } - podAnnotation = &util.PodAnnotation{ - IPs: podIfAddrs, - MAC: podMac, - } - var nodeSubnets []*net.IPNet - if nodeSubnets = bnc.lsManager.GetSwitchSubnets(switchName); nodeSubnets == nil && bnc.doesNetworkRequireIPAM() { - return nil, nil, nil, false, fmt.Errorf("cannot retrieve subnet for assigning gateway routes for pod %s, switch: %s", - podDesc, switchName) - } - err = bnc.addRoutesGatewayIP(pod, network, podAnnotation, nodeSubnets) - if err != nil { - return nil, nil, nil, false, err - } - - klog.V(5).Infof("Annotation values: ip=%v ; mac=%s ; gw=%s", - podIfAddrs, podMac, podAnnotation.Gateways) - annoStart := time.Now() - err = bnc.updatePodAnnotationWithRetry(pod, podAnnotation, nadName) - podAnnoTime := time.Since(annoStart) - klog.Infof("[%s] addLogicalPort annotation time took %v", podDesc, podAnnoTime) - if err != nil { - return nil, nil, nil, false, err - } - releaseIPs = false + if err != nil { + return nil, nil, nil, false, err } // set addresses on the port // LSP addresses in OVN are a single space-separated value - addresses = []string{podMac.String()} - for _, podIfAddr := range podIfAddrs { + addresses = []string{podAnnotation.MAC.String()} + for _, podIfAddr := range podAnnotation.IPs { addresses[0] = addresses[0] + " " + podIfAddr.IP.String() } @@ -765,28 +535,17 @@ func (bnc *BaseNetworkController) addLogicalPortToNetwork(pod *kapi.Pod, nadName fmt.Errorf("error creating logical switch port %+v on switch %+v: %+v", *lsp, *ls, err) } - return ops, lsp, podAnnotation, needsIP && !lspExist, nil + return ops, lsp, podAnnotation, annotationUpdated && !lspExist, nil } func (bnc *BaseNetworkController) updatePodAnnotationWithRetry(origPod *kapi.Pod, podInfo *util.PodAnnotation, nadName string) error { - resultErr := retry.RetryOnConflict(util.OvnConflictBackoff, func() error { - // Informer cache should not be mutated, so get a copy of the object - pod, err := bnc.watchFactory.GetPod(origPod.Namespace, origPod.Name) - if err != nil { - return err - } - - cpod := pod.DeepCopy() - cpod.Annotations, err = util.MarshalPodAnnotation(cpod.Annotations, podInfo, nadName) - if err != nil { - return err - } - return bnc.kube.UpdatePod(cpod) - }) - if resultErr != nil { - return fmt.Errorf("failed to update annotation on pod %s/%s: %v", origPod.Namespace, origPod.Name, resultErr) - } - return nil + return util.UpdatePodAnnotationWithRetry( + bnc.watchFactory.PodCoreInformer().Lister(), + bnc.kube, + origPod, + podInfo, + nadName, + ) } // Given a switch, gets the next set of addresses (from the IPAM) for each of the node's @@ -947,3 +706,197 @@ func calculateStaticMAC(podDesc string, mac string) (net.HardwareAddr, error) { } return podMac, nil } + +// allocatePodAnnotation and update the corresponding pod annotation. +func (bnc *BaseNetworkController) allocatePodAnnotation(pod *kapi.Pod, existingLSP *nbdb.LogicalSwitchPort, podDesc, nadName string, network *nadapi.NetworkSelectionElement) (*util.PodAnnotation, bool, error) { + var releaseIPs bool + var podMac net.HardwareAddr + var podIfAddrs []*net.IPNet + needsIP := true + + switchName := pod.Spec.NodeName + + podAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, nadName) + + // the IPs we allocate in this function need to be released back to the + // IPAM pool if there is some error in any step of addLogicalPort past + // the point the IPs were assigned via the IPAM manager. + // this needs to be done only when releaseIPs is set to true (the case where + // we truly have assigned podIPs in this call) AND when there is no error in + // the rest of the functionality of addLogicalPort. It is important to use a + // named return variable for defer to work correctly. + + defer func() { + if releaseIPs && err != nil { + if relErr := bnc.lsManager.ReleaseIPs(switchName, podIfAddrs); relErr != nil { + klog.Errorf("Error when releasing IPs %s for switch: %s, err: %q", + util.JoinIPNetIPs(podIfAddrs, " "), switchName, relErr) + } else { + klog.Infof("Released IPs: %s for node: %s", util.JoinIPNetIPs(podIfAddrs, " "), switchName) + } + } + }() + + if err == nil { + podMac = podAnnotation.MAC + podIfAddrs = podAnnotation.IPs + + if bnc.doesNetworkRequireIPAM() { + // ensure we have reserved the IPs in the annotation + if err = bnc.lsManager.AllocateIPs(switchName, podIfAddrs); err != nil && err != ipallocator.ErrAllocated { + return nil, false, fmt.Errorf("unable to ensure IPs allocated for already annotated pod: %s, IPs: %s, error: %v", + podDesc, util.JoinIPNetIPs(podIfAddrs, " "), err) + } else { + needsIP = false + } + } else if len(podIfAddrs) > 0 { + return nil, false, fmt.Errorf("IPAMless network with IPs present in the annotations; rejecting to handle this request") + } + } + + // It is possible that IPs have already been allocated for this pod and annotation has been updated, then the last + // addLogicalPortToNetwork() failed afterwards. In the current retry attempt, if the input pod argument got from + // the informer cache still lags behind, we would fail to get the updated pod annotation. Just continue to allocate + // new IPs and this function will eventually fail in updatePodAnnotationWithRetry() with ErrOverridePodIPs + // when it tries to override the pod IP annotation. Newly allocated IPs will be released then. + if needsIP { + if existingLSP != nil { + // try to get the MAC and IPs from existing OVN port first + podMac, podIfAddrs, err = bnc.getPortAddresses(switchName, existingLSP) + if err != nil { + return nil, false, fmt.Errorf("failed to get pod addresses for pod %s on node: %s, err: %v", + podDesc, switchName, err) + } + } + needsNewMacOrIPAllocation := false + + // ensure we have reserved the IPs found in OVN + if len(podIfAddrs) == 0 { + needsNewMacOrIPAllocation = true + } else if bnc.doesNetworkRequireIPAM() { + if err = bnc.lsManager.AllocateIPs(switchName, podIfAddrs); err != nil && err != ipallocator.ErrAllocated { + klog.Warningf("Unable to allocate IPs %s found on existing OVN port: %s, for pod %s on switch: %s"+ + " error: %v", util.JoinIPNetIPs(podIfAddrs, " "), bnc.GetLogicalPortName(pod, nadName), podDesc, switchName, err) + + needsNewMacOrIPAllocation = true + } + } + if needsNewMacOrIPAllocation { + if network != nil && network.IPRequest != nil && !bnc.doesNetworkRequireIPAM() { + klog.V(5).Infof("Will use static IP addresses for pod %s on a flatL2 topology without subnet defined", podDesc) + podIfAddrs, err = calculateStaticIPs(podDesc, network.IPRequest) + if err != nil { + return nil, false, err + } + podMac = util.IPAddrToHWAddr(podIfAddrs[0].IP) + } else { + // Previous attempts to use already configured IPs failed, need to assign new + generatedPodMac, generatedPodIfAddrs, err := bnc.assignPodAddresses(switchName) + if err != nil { + return nil, false, fmt.Errorf("failed to assign pod addresses for pod %s on switch: %s, err: %v", + podDesc, switchName, err) + } + if podMac == nil { + podMac = generatedPodMac + } + if len(generatedPodIfAddrs) > 0 { + podIfAddrs = generatedPodIfAddrs + } + } + } + + releaseIPs = true + // handle error cases separately first to ensure binding to err, otherwise the + // defer will fail + if network != nil && network.MacRequest != "" { + podMac, err = calculateStaticMAC(podDesc, network.MacRequest) + if err != nil { + return nil, false, err + } + } + podAnnotation = &util.PodAnnotation{ + IPs: podIfAddrs, + MAC: podMac, + } + var nodeSubnets []*net.IPNet + if nodeSubnets = bnc.lsManager.GetSwitchSubnets(switchName); nodeSubnets == nil && bnc.doesNetworkRequireIPAM() { + return nil, false, fmt.Errorf("cannot retrieve subnet for assigning gateway routes for pod %s, switch: %s", + podDesc, switchName) + } + err = util.AddRoutesGatewayIP(bnc.NetInfo, pod, podAnnotation, network) + if err != nil { + return nil, false, err + } + + klog.V(5).Infof("Annotation values: ip=%v ; mac=%s ; gw=%s", + podIfAddrs, podMac, podAnnotation.Gateways) + annoStart := time.Now() + err = bnc.updatePodAnnotationWithRetry(pod, podAnnotation, nadName) + podAnnoTime := time.Since(annoStart) + klog.Infof("[%s] addLogicalPort annotation time took %v", podDesc, podAnnoTime) + if err != nil { + return nil, false, err + } + releaseIPs = false + } + + return podAnnotation, needsIP, nil +} + +// allocatePodAnnotationForSecondaryNetwork and update the corresponding pod +// annotation. +func (bnc *BaseNetworkController) allocatePodAnnotationForSecondaryNetwork(pod *kapi.Pod, lsp *nbdb.LogicalSwitchPort, nadName string, network *nadapi.NetworkSelectionElement) (*util.PodAnnotation, bool, error) { + switchName, err := bnc.getExpectedSwitchName(pod) + if err != nil { + return nil, false, err + } + + if network == nil { + network = &nadapi.NetworkSelectionElement{} + } + + var reallocate bool + if lsp != nil && len(network.IPRequest) == 0 { + mac, ips, err := bnc.getPortAddresses(switchName, lsp) + if err != nil { + return nil, false, fmt.Errorf("failed to get pod addresses for pod %s/%s/%s on node %s, err: %v", + nadName, pod.Namespace, pod.Name, switchName, err) + } + network.MacRequest = mac.String() + network.IPRequest = util.StringSlice(ips) + reallocate = true + + klog.V(5).Infof("Will attempt to use LSP IP addresses %v and mac %s for pod %s/%s/%s", + network.IPRequest, network.MacRequest, nadName, pod.Namespace, pod.Name) + } + + var ipAllocator subnetipallocator.NamedAllocator + if bnc.doesNetworkRequireIPAM() { + ipAllocator = bnc.lsManager.ForSwitch(switchName) + } + + updatedPod, podAnnotation, err := bnc.podAnnotationAllocator.AllocatePodAnnotation( + ipAllocator, + pod, + network, + reallocate, + ) + + if err != nil { + return nil, false, err + } + + if updatedPod != nil { + klog.V(5).Infof("Allocated IP addresses %v, mac address %s, gateways %v and routes %s for pod %s/%s on nad %s", + util.StringSlice(podAnnotation.IPs), + podAnnotation.MAC, + util.StringSlice(podAnnotation.Gateways), + util.StringSlice(podAnnotation.Routes), + pod.Namespace, pod.Name, nadName, + ) + + return podAnnotation, true, nil + } + + return podAnnotation, false, nil +} diff --git a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go index 0efe677d90..ef2f1c1a4f 100644 --- a/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go +++ b/go-controller/pkg/ovn/logical_switch_manager/logical_switch_manager.go @@ -148,3 +148,8 @@ func (manager *LogicalSwitchManager) ReleaseIPs(switchName string, ipnets []*net func (manager *LogicalSwitchManager) ConditionalIPRelease(switchName string, ipnets []*net.IPNet, predicate func() (bool, error)) (bool, error) { return manager.allocator.ConditionalIPRelease(switchName, ipnets, predicate) } + +// ForSubnet return an IP allocator for the specified switch +func (manager *LogicalSwitchManager) ForSwitch(switchName string) subnet.NamedAllocator { + return manager.allocator.ForSubnet(switchName) +} diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller.go b/go-controller/pkg/ovn/secondary_layer2_network_controller.go index 4e5a8c4f85..a9dbd74803 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller.go @@ -4,6 +4,7 @@ import ( "context" "sync" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" @@ -48,6 +49,12 @@ func NewSecondaryLayer2NetworkController(cnci *CommonNetworkControllerInfo, netI }, } + podAnnotationAllocator := pod.NewPodAnnotationAllocator( + netInfo, + cnci.watchFactory.PodCoreInformer().Lister(), + cnci.kube) + oc.podAnnotationAllocator = podAnnotationAllocator + // disable multicast support for secondary networks // TBD: changes needs to be made to support multicast in secondary networks oc.multicastSupport = false diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index 2f381aef6a..f5b801e06e 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -10,6 +10,7 @@ import ( mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" "github.com/ovn-org/libovsdb/ovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" @@ -287,6 +288,13 @@ func NewSecondaryLayer3NetworkController(cnci *CommonNetworkControllerInfo, netI syncZoneICFailed: sync.Map{}, zoneICHandler: zoneICHandler, } + + podAnnotationAllocator := pod.NewPodAnnotationAllocator( + netInfo, + cnci.watchFactory.PodCoreInformer().Lister(), + cnci.kube) + oc.podAnnotationAllocator = podAnnotationAllocator + // disable multicast support for secondary networks // TBD: changes needs to be made to support multicast in secondary networks oc.multicastSupport = false diff --git a/go-controller/pkg/ovn/secondary_localnet_network_controller.go b/go-controller/pkg/ovn/secondary_localnet_network_controller.go index f0f014ed36..a5a259d233 100644 --- a/go-controller/pkg/ovn/secondary_localnet_network_controller.go +++ b/go-controller/pkg/ovn/secondary_localnet_network_controller.go @@ -2,6 +2,9 @@ package ovn import ( "context" + "sync" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" @@ -9,7 +12,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" - "sync" "k8s.io/klog/v2" ) @@ -49,6 +51,12 @@ func NewSecondaryLocalnetNetworkController(cnci *CommonNetworkControllerInfo, ne }, } + podAnnotationAllocator := pod.NewPodAnnotationAllocator( + netInfo, + cnci.watchFactory.PodCoreInformer().Lister(), + cnci.kube) + oc.podAnnotationAllocator = podAnnotationAllocator + // disable multicast support for secondary networks // TBD: changes needs to be made to support multicast in secondary networks oc.multicastSupport = false diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index b2a8461306..47d8acedd4 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -479,3 +479,7 @@ func GetPodNADToNetworkMapping(pod *kapi.Pod, nInfo NetInfo) (bool, map[string]* func IsMultiNetworkPoliciesSupportEnabled() bool { return config.OVNKubernetesFeature.EnableMultiNetwork && config.OVNKubernetesFeature.EnableMultiNetworkPolicy } + +func DoesNetworkRequireIPAM(netInfo NetInfo) bool { + return !((netInfo.TopologyType() == types.Layer2Topology || netInfo.TopologyType() == types.LocalnetTopology) && len(netInfo.Subnets()) == 0) +} diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 0d61e838a8..21c39386a2 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -304,6 +304,20 @@ func ContainsCIDR(ipnet1, ipnet2 *net.IPNet) bool { return mask1 <= mask2 && ipnet1.Contains(ipnet2.IP) } +// ParseIPNets parses the provided string formatted CIDRs +func ParseIPNets(strs []string) ([]*net.IPNet, error) { + ipnets := make([]*net.IPNet, len(strs)) + for i := range strs { + ip, ipnet, err := utilnet.ParseCIDRSloppy(strs[i]) + if err != nil { + return nil, err + } + ipnet.IP = ip + ipnets[i] = ipnet + } + return ipnets, nil +} + // GenerateRandMAC generates a random unicast and locally administered MAC address. // LOOTED FROM https://github.com/cilium/cilium/blob/v1.12.6/pkg/mac/mac.go#L106 func GenerateRandMAC() (net.HardwareAddr, error) { @@ -318,6 +332,16 @@ func GenerateRandMAC() (net.HardwareAddr, error) { return buf, nil } +// CopyIPNets copies the provided slice of IPNet +func CopyIPNets(ipnets []*net.IPNet) []*net.IPNet { + copy := make([]*net.IPNet, len(ipnets)) + for i := range ipnets { + ipnet := *ipnets[i] + copy[i] = &ipnet + } + return copy +} + // IPsToNetworkIPs returns the network CIDRs of the provided IP CIDRs func IPsToNetworkIPs(ips ...*net.IPNet) []*net.IPNet { nets := make([]*net.IPNet, len(ips)) diff --git a/go-controller/pkg/util/pod.go b/go-controller/pkg/util/pod.go new file mode 100644 index 0000000000..13028f312f --- /dev/null +++ b/go-controller/pkg/util/pod.go @@ -0,0 +1,64 @@ +package util + +import ( + "fmt" + "time" + + v1 "k8s.io/api/core/v1" + listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" +) + +// AllocateToPodWithRollbackFunc is a function used to allocate a resource to a +// pod that depends on the current state of the pod, and possibly updating it. +// To be used with UpdatePodWithAllocationOrRollback. Implementations can return +// a nil pod if no update is warranted. Implementations can also return a +// rollback function that will be invoked if the pod update fails. +type AllocateToPodWithRollbackFunc func(pod *v1.Pod) (*v1.Pod, func(), error) + +// UpdatePodWithRetryOrRollback updates the pod with the result of the +// allocate function. If the pod update fails, it applies the rollback provided by +// the allocate function. +func UpdatePodWithRetryOrRollback(podLister listers.PodLister, kube kube.Interface, pod *v1.Pod, allocate AllocateToPodWithRollbackFunc) error { + start := time.Now() + var updated bool + + err := retry.RetryOnConflict(OvnConflictBackoff, func() error { + pod, err := podLister.Pods(pod.Namespace).Get(pod.Name) + if err != nil { + return err + } + + // Informer cache should not be mutated, so copy the object + pod = pod.DeepCopy() + pod, rollback, err := allocate(pod) + if err != nil { + return err + } + + if pod == nil { + return nil + } + + updated = true + err = kube.UpdatePod(pod) + if err != nil && rollback != nil { + rollback() + } + + return err + }) + + if err != nil { + return fmt.Errorf("failed to update pod %s/%s: %w", pod.Namespace, pod.Name, err) + } + + if updated { + klog.Infof("[%s/%s] pod update took %v", pod.Namespace, pod.Name, time.Since(start)) + } + + return nil +} diff --git a/go-controller/pkg/util/pod_annotation.go b/go-controller/pkg/util/pod_annotation.go index 400bbf6e0e..28046c5464 100644 --- a/go-controller/pkg/util/pod_annotation.go +++ b/go-controller/pkg/util/pod_annotation.go @@ -8,9 +8,12 @@ import ( nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" nadutils "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" v1 "k8s.io/api/core/v1" + listers "k8s.io/client-go/listers/core/v1" utilnet "k8s.io/utils/net" ) @@ -75,6 +78,10 @@ type PodRoute struct { NextHop net.IP } +func (r PodRoute) String() string { + return fmt.Sprintf("%s %s", r.Dest, r.NextHop) +} + // Internal struct used to marshal PodAnnotation to the pod annotation type podAnnotation struct { IPs []string `json:"ip_addresses"` @@ -379,3 +386,153 @@ func GetK8sPodAllNetworkSelections(pod *v1.Pod) ([]*nadapi.NetworkSelectionEleme } return networks, nil } + +// UpdatePodAnnotationWithRetry updates the pod annotation on the pod retrying +// on conflict +func UpdatePodAnnotationWithRetry(podLister listers.PodLister, kube kube.Interface, pod *v1.Pod, podAnnotation *PodAnnotation, nadName string) error { + updatePodAnnotationNoRollback := func(pod *v1.Pod) (*v1.Pod, func(), error) { + var err error + pod.Annotations, err = MarshalPodAnnotation(pod.Annotations, podAnnotation, nadName) + if err != nil { + return nil, nil, err + } + return pod, nil, nil + } + + return UpdatePodWithRetryOrRollback( + podLister, + kube, + pod, + updatePodAnnotationNoRollback, + ) +} + +// IsValidPodAnnotation tests whether the PodAnnotation is valid, currently true +// for any PodAnnotation with a MAC which is the only thing required to attach a +// pod. +func IsValidPodAnnotation(podAnnotation *PodAnnotation) bool { + return podAnnotation != nil && len(podAnnotation.MAC) > 0 +} + +func joinSubnetToRoute(isIPv6 bool, gatewayIP net.IP) PodRoute { + joinSubnet := config.Gateway.V4JoinSubnet + if isIPv6 { + joinSubnet = config.Gateway.V6JoinSubnet + } + _, subnet, err := net.ParseCIDR(joinSubnet) + if err != nil { + // Join subnet should have been validated already by config + panic(fmt.Sprintf("Failed to parse join subnet %q: %v", joinSubnet, err)) + } + + return PodRoute{ + Dest: subnet, + NextHop: gatewayIP, + } +} + +// addRoutesGatewayIP updates the provided pod annotation for the provided pod +// with the gateways derived from the allocated IPs +func AddRoutesGatewayIP( + netinfo NetInfo, + pod *v1.Pod, + podAnnotation *PodAnnotation, + network *nadapi.NetworkSelectionElement) error { + + // generate the nodeSubnets from the allocated IPs + nodeSubnets := IPsToNetworkIPs(podAnnotation.IPs...) + + if netinfo.IsSecondary() { + // for secondary network, see if its network-attachment's annotation has default-route key. + // If present, then we need to add default route for it + podAnnotation.Gateways = append(podAnnotation.Gateways, network.GatewayRequest...) + topoType := netinfo.TopologyType() + switch topoType { + case types.Layer2Topology, types.LocalnetTopology: + // no route needed for directly connected subnets + return nil + case types.Layer3Topology: + for _, podIfAddr := range podAnnotation.IPs { + isIPv6 := utilnet.IsIPv6CIDR(podIfAddr) + nodeSubnet, err := MatchFirstIPNetFamily(isIPv6, nodeSubnets) + if err != nil { + return err + } + gatewayIPnet := GetNodeGatewayIfAddr(nodeSubnet) + for _, clusterSubnet := range netinfo.Subnets() { + if isIPv6 == utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { + podAnnotation.Routes = append(podAnnotation.Routes, PodRoute{ + Dest: clusterSubnet.CIDR, + NextHop: gatewayIPnet.IP, + }) + } + } + } + return nil + } + return fmt.Errorf("topology type %s not supported", topoType) + } + + // if there are other network attachments for the pod, then check if those network-attachment's + // annotation has default-route key. If present, then we need to skip adding default route for + // OVN interface + networks, err := GetK8sPodAllNetworkSelections(pod) + if err != nil { + return fmt.Errorf("error while getting network attachment definition for [%s/%s]: %v", + pod.Namespace, pod.Name, err) + } + otherDefaultRouteV4 := false + otherDefaultRouteV6 := false + for _, network := range networks { + for _, gatewayRequest := range network.GatewayRequest { + if utilnet.IsIPv6(gatewayRequest) { + otherDefaultRouteV6 = true + } else { + otherDefaultRouteV4 = true + } + } + } + + for _, podIfAddr := range podAnnotation.IPs { + isIPv6 := utilnet.IsIPv6CIDR(podIfAddr) + nodeSubnet, err := MatchFirstIPNetFamily(isIPv6, nodeSubnets) + if err != nil { + return err + } + + gatewayIPnet := GetNodeGatewayIfAddr(nodeSubnet) + + // Ensure default pod network traffic always goes to OVN + for _, clusterSubnet := range config.Default.ClusterSubnets { + if isIPv6 == utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { + podAnnotation.Routes = append(podAnnotation.Routes, PodRoute{ + Dest: clusterSubnet.CIDR, + NextHop: gatewayIPnet.IP, + }) + } + } + + // Ensure default service network traffic always goes to OVN + for _, serviceSubnet := range config.Kubernetes.ServiceCIDRs { + if isIPv6 == utilnet.IsIPv6CIDR(serviceSubnet) { + podAnnotation.Routes = append(podAnnotation.Routes, PodRoute{ + Dest: serviceSubnet, + NextHop: gatewayIPnet.IP, + }) + } + } + + otherDefaultRoute := otherDefaultRouteV4 + if isIPv6 { + otherDefaultRoute = otherDefaultRouteV6 + } + if !otherDefaultRoute { + podAnnotation.Gateways = append(podAnnotation.Gateways, gatewayIPnet.IP) + } + + // Ensure default join subnet traffic always goes to OVN + podAnnotation.Routes = append(podAnnotation.Routes, joinSubnetToRoute(isIPv6, gatewayIPnet.IP)) + } + + return nil +} diff --git a/go-controller/pkg/util/pod_annotation_unit_test.go b/go-controller/pkg/util/pod_annotation_unit_test.go index 98b4c160c6..bc9addfdba 100644 --- a/go-controller/pkg/util/pod_annotation_unit_test.go +++ b/go-controller/pkg/util/pod_annotation_unit_test.go @@ -8,14 +8,13 @@ import ( "testing" cnitypes "github.com/containernetworking/cni/pkg/types" - "github.com/stretchr/testify/assert" - - v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" ) func TestMarshalPodAnnotation(t *testing.T) { diff --git a/go-controller/pkg/util/pod_test.go b/go-controller/pkg/util/pod_test.go new file mode 100644 index 0000000000..9d5e8d54af --- /dev/null +++ b/go-controller/pkg/util/pod_test.go @@ -0,0 +1,116 @@ +package util + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/mock" + + v1mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1" + v1 "k8s.io/api/core/v1" + + kubemocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube/mocks" +) + +func TestUpdatePodWithAllocationOrRollback(t *testing.T) { + tests := []struct { + name string + allocateRollback bool + getPodErr bool + allocateErr bool + updatePodErr bool + expectAllocation bool + expectRollback bool + expectUpdate bool + expectErr bool + }{ + { + name: "normal operation", + allocateRollback: true, + expectAllocation: true, + expectUpdate: true, + }, + { + name: "pod get fails", + allocateRollback: true, + getPodErr: true, + expectErr: true, + }, + { + name: "allocate fails", + expectAllocation: true, + allocateRollback: true, + allocateErr: true, + expectErr: true, + }, + { + name: "update pod fails", + expectAllocation: true, + allocateRollback: true, + updatePodErr: true, + expectRollback: true, + expectErr: true, + }, + { + name: "update pod fails no rollback", + expectAllocation: true, + updatePodErr: true, + expectErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + podListerMock := &v1mocks.PodLister{} + kubeMock := &kubemocks.Interface{} + podNamespaceLister := &v1mocks.PodNamespaceLister{} + + podListerMock.On("Pods", mock.AnythingOfType("string")).Return(podNamespaceLister) + + var rollbackDone bool + rollback := func() { + rollbackDone = true + } + + pod := &v1.Pod{} + + var allocated bool + allocate := func(pod *v1.Pod) (*v1.Pod, func(), error) { + allocated = true + if tt.allocateErr { + return pod, rollback, errors.New("Allocate error") + } + if tt.allocateRollback { + return pod, rollback, nil + } + return pod, nil, nil + } + + if tt.getPodErr { + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(nil, errors.New("Get pod error")) + } else { + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + } + + if tt.updatePodErr { + kubeMock.On("UpdatePod", pod).Return(errors.New("Update pod error")) + } else if tt.expectUpdate { + kubeMock.On("UpdatePod", pod).Return(nil) + } + + err := UpdatePodWithRetryOrRollback(podListerMock, kubeMock, &v1.Pod{}, allocate) + + if (err != nil) != tt.expectErr { + t.Errorf("UpdatePodWithAllocationOrRollback() error = %v, expectErr %v", err, tt.expectErr) + } + + if allocated != tt.expectAllocation { + t.Errorf("UpdatePodWithAllocationOrRollback() allocated = %v, expectAllocation %v", allocated, tt.expectAllocation) + } + + if rollbackDone != tt.expectRollback { + t.Errorf("UpdatePodWithAllocationOrRollback() rollbackDone = %v, expectRollback %v", rollbackDone, tt.expectRollback) + } + }) + } +} From 9b53357e08a4545e9a0ed436640173bc92f8f2ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Mon, 29 May 2023 13:36:24 +0000 Subject: [PATCH 14/31] Refactor DPU annotations to use UpdatePodWithRetryOrRollback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jaime Caamaño Ruiz --- go-controller/pkg/cni/cni_dpu.go | 37 +++++------ .../node/base_node_network_controller_dpu.go | 31 ++++------ .../base_node_network_controller_dpu_test.go | 15 +++-- .../informers/core/v1/PodInformer.go | 62 +++++++++++++++++++ go-controller/pkg/util/dpu_annotations.go | 45 ++++++++++++++ 5 files changed, 144 insertions(+), 46 deletions(-) create mode 100644 go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/PodInformer.go diff --git a/go-controller/pkg/cni/cni_dpu.go b/go-controller/pkg/cni/cni_dpu.go index 85fcf34464..bbca7fcb92 100644 --- a/go-controller/pkg/cni/cni_dpu.go +++ b/go-controller/pkg/cni/cni_dpu.go @@ -7,34 +7,27 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" corev1listers "k8s.io/client-go/listers/core/v1" - "k8s.io/client-go/util/retry" ) // updatePodDPUConnDetailsWithRetry update the pod annotation with the given connection details for the NAD in // the PodRequest. If the dpuConnDetails argument is nil, delete the NAD's DPU connection details annotation instead. func (pr *PodRequest) updatePodDPUConnDetailsWithRetry(kube kube.Interface, podLister corev1listers.PodLister, dpuConnDetails *util.DPUConnectionDetails) error { - resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - // Informer cache should not be mutated, so get a copy of the object - pod, err := podLister.Pods(pr.PodNamespace).Get(pr.PodName) - if err != nil { - return err - } - - cpod := pod.DeepCopy() - cpod.Annotations, err = util.MarshalPodDPUConnDetails(cpod.Annotations, dpuConnDetails, pr.nadName) - if err != nil { - if util.IsAnnotationAlreadySetError(err) { - return nil - } - return err - } - return kube.UpdatePod(cpod) - }) - if resultErr != nil { - return fmt.Errorf("failed to update %s annotation dpuConnDetails %+v on pod %s/%s for NAD %s: %v", - util.DPUConnectionDetailsAnnot, dpuConnDetails, pr.PodNamespace, pr.PodName, pr.nadName, resultErr) + pod, err := podLister.Pods(pr.PodNamespace).Get(pr.PodName) + if err != nil { + return err } - return nil + err = util.UpdatePodDPUConnDetailsWithRetry( + podLister, + kube, + pod, + dpuConnDetails, + pr.nadName, + ) + if util.IsAnnotationAlreadySetError(err) { + return nil + } + + return err } func (pr *PodRequest) addDPUConnectionDetailsAnnot(k kube.Interface, podLister corev1listers.PodLister, vfNetdevName string) error { diff --git a/go-controller/pkg/node/base_node_network_controller_dpu.go b/go-controller/pkg/node/base_node_network_controller_dpu.go index bd1fd2bdfe..1b94a9808c 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu.go @@ -10,7 +10,6 @@ import ( "k8s.io/apimachinery/pkg/util/wait" corev1listers "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" - "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni" @@ -216,26 +215,18 @@ func (bnnc *BaseNodeNetworkController) updatePodDPUConnStatusWithRetry(origPod * dpuConnStatus *util.DPUConnectionStatus, nadName string) error { podDesc := fmt.Sprintf("pod %s/%s", origPod.Namespace, origPod.Name) klog.Infof("Updating pod %s with connection status (%+v) for NAD %s", podDesc, dpuConnStatus, nadName) - resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - pod, err := bnnc.watchFactory.GetPod(origPod.Namespace, origPod.Name) - if err != nil { - return err - } - // Informer cache should not be mutated, so get a copy of the object - cpod := pod.DeepCopy() - cpod.Annotations, err = util.MarshalPodDPUConnStatus(cpod.Annotations, dpuConnStatus, nadName) - if err != nil { - if util.IsAnnotationAlreadySetError(err) { - return nil - } - return err - } - return bnnc.Kube.UpdatePod(cpod) - }) - if resultErr != nil { - return fmt.Errorf("failed to update %s annotation for %s: %v", util.DPUConnetionStatusAnnot, podDesc, resultErr) + err := util.UpdatePodDPUConnStatusWithRetry( + bnnc.watchFactory.PodCoreInformer().Lister(), + bnnc.Kube, + origPod, + dpuConnStatus, + nadName, + ) + if util.IsAnnotationAlreadySetError(err) { + return nil } - return nil + + return err } // addRepPort adds the representor of the VF to the ovs bridge diff --git a/go-controller/pkg/node/base_node_network_controller_dpu_test.go b/go-controller/pkg/node/base_node_network_controller_dpu_test.go index 006c77944e..9d72c5d3af 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu_test.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu_test.go @@ -15,6 +15,7 @@ import ( kubemocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube/mocks" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" linkMock "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/github.com/vishvananda/netlink" + coreinformermocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1" v1mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -86,6 +87,7 @@ var _ = Describe("Node DPU tests", func() { var factoryMock factorymocks.NodeWatchFactory var pod v1.Pod var dnnc *DefaultNodeNetworkController + var podInformer coreinformermocks.PodInformer var podLister v1mocks.PodLister var podNamespaceLister v1mocks.PodNamespaceLister var clientset *cni.ClientSet @@ -111,6 +113,7 @@ var _ = Describe("Node DPU tests", func() { cnnci := newCommonNodeNetworkControllerInfo(nil, &kubeMock, apbExternalRouteClient, &factoryMock, nil, "") dnnc = newDefaultNodeNetworkController(cnnci, nil, nil) + podInformer = coreinformermocks.PodInformer{} podNamespaceLister = v1mocks.PodNamespaceLister{} podLister = v1mocks.PodLister{} podLister.On("Pods", mock.AnythingOfType("string")).Return(&podNamespaceLister) @@ -328,13 +331,15 @@ var _ = Describe("Node DPU tests", func() { dcs := util.DPUConnectionStatus{ Status: "Ready", } - factoryMock.On("GetPod", pod.Namespace, pod.Name).Return(&pod, nil) cpod := pod.DeepCopy() cpod.Annotations, err = util.MarshalPodDPUConnStatus(cpod.Annotations, &dcs, types.DefaultNetworkName) Expect(err).ToNot(HaveOccurred()) - kubeMock.On("UpdatePod", cpod).Return(nil) + factoryMock.On("PodCoreInformer").Return(&podInformer) + podInformer.On("Lister").Return(&podLister) + podLister.On("Pods", mock.AnythingOfType("string")).Return(&podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) + kubeMock.On("UpdatePod", cpod).Return(nil) err = dnnc.addRepPort(&pod, &scd, ifInfo, clientset) Expect(err).ToNot(HaveOccurred()) @@ -349,11 +354,9 @@ var _ = Describe("Node DPU tests", func() { dcs := util.DPUConnectionStatus{ Status: "Ready", } - factoryMock.On("GetPod", pod.Namespace, pod.Name).Return(&pod, nil) cpod := pod.DeepCopy() cpod.Annotations, err = util.MarshalPodDPUConnStatus(cpod.Annotations, &dcs, types.DefaultNetworkName) Expect(err).ToNot(HaveOccurred()) - kubeMock.On("UpdatePod", cpod).Return(fmt.Errorf("failed to set pod annotations")) // Mock netlink/ovs calls for cleanup checkOVSPortPodInfo(execMock, vfRep, true, "15", "a8d09931", "default") netlinkOpsMock.On("LinkSetDown", vfLink).Return(nil) @@ -361,7 +364,11 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) + factoryMock.On("PodCoreInformer").Return(&podInformer) + podInformer.On("Lister").Return(&podLister) + podLister.On("Pods", mock.AnythingOfType("string")).Return(&podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) + kubeMock.On("UpdatePod", cpod).Return(fmt.Errorf("failed to set pod annotations")) err = dnnc.addRepPort(&pod, &scd, ifInfo, clientset) Expect(err).To(HaveOccurred()) diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/PodInformer.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/PodInformer.go new file mode 100644 index 0000000000..f2cff40c1a --- /dev/null +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/informers/core/v1/PodInformer.go @@ -0,0 +1,62 @@ +// Code generated by mockery v2.28.1. DO NOT EDIT. + +package mocks + +import ( + corev1 "k8s.io/client-go/listers/core/v1" + cache "k8s.io/client-go/tools/cache" + + mock "github.com/stretchr/testify/mock" +) + +// PodInformer is an autogenerated mock type for the PodInformer type +type PodInformer struct { + mock.Mock +} + +// Informer provides a mock function with given fields: +func (_m *PodInformer) Informer() cache.SharedIndexInformer { + ret := _m.Called() + + var r0 cache.SharedIndexInformer + if rf, ok := ret.Get(0).(func() cache.SharedIndexInformer); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(cache.SharedIndexInformer) + } + } + + return r0 +} + +// Lister provides a mock function with given fields: +func (_m *PodInformer) Lister() corev1.PodLister { + ret := _m.Called() + + var r0 corev1.PodLister + if rf, ok := ret.Get(0).(func() corev1.PodLister); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(corev1.PodLister) + } + } + + return r0 +} + +type mockConstructorTestingTNewPodInformer interface { + mock.TestingT + Cleanup(func()) +} + +// NewPodInformer creates a new instance of PodInformer. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewPodInformer(t mockConstructorTestingTNewPodInformer) *PodInformer { + mock := &PodInformer{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/go-controller/pkg/util/dpu_annotations.go b/go-controller/pkg/util/dpu_annotations.go index d4c67321f6..00f8b6b1e5 100644 --- a/go-controller/pkg/util/dpu_annotations.go +++ b/go-controller/pkg/util/dpu_annotations.go @@ -3,7 +3,12 @@ package util import ( "encoding/json" "fmt" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + + v1 "k8s.io/api/core/v1" + listers "k8s.io/client-go/listers/core/v1" ) /* @@ -201,3 +206,43 @@ func UnmarshalPodDPUConnStatus(annotations map[string]string, nadName string) (* } return &scs, nil } + +// UpdatePodDPUConnStatusWithRetry updates the DPU connection status annotation +// on the pod retrying on conflict +func UpdatePodDPUConnStatusWithRetry(podLister listers.PodLister, kube kube.Interface, pod *v1.Pod, dpuConnStatus *DPUConnectionStatus, nadName string) error { + updatePodAnnotationNoRollback := func(pod *v1.Pod) (*v1.Pod, func(), error) { + var err error + pod.Annotations, err = MarshalPodDPUConnStatus(pod.Annotations, dpuConnStatus, nadName) + if err != nil { + return nil, nil, err + } + return pod, nil, nil + } + + return UpdatePodWithRetryOrRollback( + podLister, + kube, + pod, + updatePodAnnotationNoRollback, + ) +} + +// UpdatePodDPUConnDetailsWithRetry updates the DPU connection details +// annotation on the pod retrying on conflict +func UpdatePodDPUConnDetailsWithRetry(podLister listers.PodLister, kube kube.Interface, pod *v1.Pod, dpuConnDetails *DPUConnectionDetails, nadName string) error { + updatePodAnnotationNoRollback := func(pod *v1.Pod) (*v1.Pod, func(), error) { + var err error + pod.Annotations, err = MarshalPodDPUConnDetails(pod.Annotations, dpuConnDetails, nadName) + if err != nil { + return nil, nil, err + } + return pod, nil, nil + } + + return UpdatePodWithRetryOrRollback( + podLister, + kube, + pod, + updatePodAnnotationNoRollback, + ) +} From d86604bf2074ca3a63600c4102e576c1f283bc5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Fri, 26 May 2023 08:25:49 +0000 Subject: [PATCH 15/31] Move subnet handling out of network cluster controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Network cluster controller will be in charge of coordinating more than just host subnets so move that functionality to the host subnet allocator itself. That functionality fits in the host subnet allocator which it was almost only a façade to the base subnet allocator. Functionaly equivalent. Fixes subnet count metric being used for all networks instead of just the default network. Signed-off-by: Jaime Caamaño Ruiz --- .../pkg/clustermanager/clustermanager.go | 4 +- .../network_cluster_controller.go | 326 +++-------------- .../network_cluster_controller_test.go | 9 +- .../secondary_network_cluster_manager.go | 6 +- .../secondary_network_unit_test.go | 2 +- .../subnetallocator/host_subnet_allocator.go | 344 +++++++++++++++--- .../host_subnet_allocator_test.go | 185 +++------- 7 files changed, 401 insertions(+), 475 deletions(-) diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index ab933bd248..1ee7c62c38 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -15,7 +15,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" - ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -49,8 +48,7 @@ type ClusterManager struct { // NewClusterManager creates a new cluster manager to manage the cluster nodes. func NewClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, identity string, wg *sync.WaitGroup, recorder record.EventRecorder) (*ClusterManager, error) { - defaultNetClusterController := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, - ovnClient, wf, config.HybridOverlay.Enabled, &util.DefaultNetInfo{}) + defaultNetClusterController := newNetworkClusterController(defaultNetworkID, &util.DefaultNetInfo{}, ovnClient, wf) zoneClusterController, err := newZoneClusterController(ovnClient, wf) if err != nil { diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index 6596c5d6c6..ccf8aeb551 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -3,35 +3,26 @@ package clustermanager import ( "context" "fmt" - "net" "reflect" "sync" corev1 "k8s.io/api/core/v1" cache "k8s.io/client-go/tools/cache" - "k8s.io/client-go/util/retry" "k8s.io/klog/v2" - hotypes "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/types" - houtil "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/subnetallocator" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" objretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) -// networkClusterController is the cluster controller for the networks. -// An instance of this struct is expected to be created for each network. -// A network is identified by its name and its unique id. -// It listens to the node events and does the following. -// - allocates subnet from the cluster subnet pool. It also allocates subnets -// from the hybrid overlay subnet pool if hybrid overlay is enabled. -// It stores these allocated subnets in the node annotation -// - stores the network id in each node's annotation. +// networkClusterController is the cluster controller for the networks. An +// instance of this struct is expected to be created for each network. A network +// is identified by its name and its unique id. It handles events at a cluster +// level to support the necessary configuration for the cluster networks. type networkClusterController struct { - kube kube.Interface watchFactory *factory.WatchFactory stopChan chan struct{} wg *sync.WaitGroup @@ -42,80 +33,66 @@ type networkClusterController struct { // retry framework for nodes retryNodes *objretry.RetryFramework - // name of the network - networkName string // unique id of the network networkID int - clusterSubnetAllocator *subnetallocator.HostSubnetAllocator - clusterSubnets []config.CIDRNetworkEntry - - enableHybridOverlaySubnetAllocator bool - hybridOverlaySubnetAllocator *subnetallocator.HostSubnetAllocator + hostSubnetAllocator *subnetallocator.HostSubnetAllocator util.NetInfo } -func newNetworkClusterController(networkName string, networkID int, clusterSubnets []config.CIDRNetworkEntry, - ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, - enableHybridOverlaySubnetAllocator bool, netInfo util.NetInfo) *networkClusterController { - +func newNetworkClusterController(networkID int, netInfo util.NetInfo, ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory) *networkClusterController { kube := &kube.Kube{ KClient: ovnClient.KubeClient, } wg := &sync.WaitGroup{} - var hybridOverlaySubnetAllocator *subnetallocator.HostSubnetAllocator - if enableHybridOverlaySubnetAllocator { - hybridOverlaySubnetAllocator = subnetallocator.NewHostSubnetAllocator() - } ncc := &networkClusterController{ - kube: kube, - watchFactory: wf, - stopChan: make(chan struct{}), - wg: wg, - networkName: networkName, - networkID: networkID, - clusterSubnetAllocator: subnetallocator.NewHostSubnetAllocator(), - clusterSubnets: clusterSubnets, - hybridOverlaySubnetAllocator: hybridOverlaySubnetAllocator, - enableHybridOverlaySubnetAllocator: enableHybridOverlaySubnetAllocator, - NetInfo: netInfo, + NetInfo: netInfo, + watchFactory: wf, + stopChan: make(chan struct{}), + wg: wg, + networkID: networkID, + } + + if ncc.hasNodeSubnetAllocation() { + ncc.hostSubnetAllocator = subnetallocator.NewHostSubnetAllocator(networkID, netInfo, wf.NodeCoreInformer().Lister(), kube) } ncc.initRetryFramework() return ncc } -func (ncc *networkClusterController) initRetryFramework() { - ncc.retryNodes = ncc.newRetryFramework(factory.NodeType, true) +func (ncc *networkClusterController) hasNodeSubnetAllocation() bool { + // we only do node subnet allocation on L3 topologies or default network + return ncc.TopologyType() == types.Layer3Topology || !ncc.IsSecondary() } -// Start the network cluster controller -// It does the following -// - initializes the network subnet allocator ranges -// and hybrid network subnet allocator ranges if hybrid overlay is enabled. -// - Starts watching the kubernetes nodes -func (ncc *networkClusterController) Start(ctx context.Context) error { - if err := ncc.clusterSubnetAllocator.InitRanges(ncc.clusterSubnets); err != nil { - return fmt.Errorf("failed to initialize cluster subnet allocator ranges: %w", err) +func (ncc *networkClusterController) initRetryFramework() { + if ncc.hasNodeSubnetAllocation() { + ncc.retryNodes = ncc.newRetryFramework(factory.NodeType, true) } +} - if ncc.enableHybridOverlaySubnetAllocator { - if err := ncc.hybridOverlaySubnetAllocator.InitRanges(config.HybridOverlay.ClusterSubnets); err != nil { - return fmt.Errorf("failed to initialize hybrid overlay subnet allocator ranges: %w", err) +// Start the network cluster controller. Depending on the cluster configuration +// and type of network, it does the following: +// - initializes the host subnet allocator and starts listening to node events +func (ncc *networkClusterController) Start(ctx context.Context) error { + if ncc.hasNodeSubnetAllocation() { + err := ncc.hostSubnetAllocator.InitRanges() + if err != nil { + return fmt.Errorf("failed to initialize host subnet ip allocator: %w", err) } - } - - nodeHandler, err := ncc.retryNodes.WatchResource() - if err != nil { - return fmt.Errorf("unable to watch nodes: %w", err) + nodeHandler, err := ncc.retryNodes.WatchResource() + if err != nil { + return fmt.Errorf("unable to watch pods: %w", err) + } + ncc.nodeHandler = nodeHandler } - ncc.nodeHandler = nodeHandler - return err + return nil } func (ncc *networkClusterController) Stop() { @@ -141,228 +118,14 @@ func (ncc *networkClusterController) newRetryFramework(objectType reflect.Type, return objretry.NewRetryFramework(ncc.stopChan, ncc.wg, ncc.watchFactory, resourceHandler) } -// hybridOverlayNodeEnsureSubnet allocates a subnet and sets the -// hybrid overlay subnet annotation. It returns any newly allocated subnet -// or an error. If an error occurs, the newly allocated subnet will be released. -func (ncc *networkClusterController) hybridOverlayNodeEnsureSubnet(node *corev1.Node, annotator kube.Annotator) (*net.IPNet, error) { - var existingSubnets []*net.IPNet - // Do not allocate a subnet if the node already has one - subnet, err := houtil.ParseHybridOverlayHostSubnet(node) - if err != nil { - // Log the error and try to allocate new subnets - klog.Warningf("Failed to get node %s hybrid overlay subnet annotation: %v", node.Name, err) - } else if subnet != nil { - existingSubnets = []*net.IPNet{subnet} - } - - // Allocate a new host subnet for this node - // FIXME: hybrid overlay is only IPv4 for now due to limitations on the Windows side - hostSubnets, allocatedSubnets, err := ncc.hybridOverlaySubnetAllocator.AllocateNodeSubnets(node.Name, existingSubnets, true, false) - if err != nil { - return nil, fmt.Errorf("error allocating hybrid overlay HostSubnet for node %s: %v", node.Name, err) - } - - if err := annotator.Set(hotypes.HybridOverlayNodeSubnet, hostSubnets[0].String()); err != nil { - if e := ncc.hybridOverlaySubnetAllocator.ReleaseNodeSubnets(node.Name, allocatedSubnets...); e != nil { - klog.Warningf("Failed to release hybrid over subnet for the node %s from the allocator : %w", node.Name, e) - } - return nil, fmt.Errorf("error setting hybrid overlay host subnet: %w", err) - } - - return hostSubnets[0], nil -} - -func (ncc *networkClusterController) releaseHybridOverlayNodeSubnet(nodeName string) { - ncc.hybridOverlaySubnetAllocator.ReleaseAllNodeSubnets(nodeName) - klog.Infof("Deleted hybrid overlay HostSubnets for node %s", nodeName) -} - -// handleAddUpdateNodeEvent handles the add or update node event -func (ncc *networkClusterController) handleAddUpdateNodeEvent(node *corev1.Node) error { - if util.NoHostSubnet(node) { - if ncc.enableHybridOverlaySubnetAllocator && houtil.IsHybridOverlayNode(node) { - annotator := kube.NewNodeAnnotator(ncc.kube, node.Name) - allocatedSubnet, err := ncc.hybridOverlayNodeEnsureSubnet(node, annotator) - if err != nil { - return fmt.Errorf("failed to update node %s hybrid overlay subnet annotation: %v", node.Name, err) - } - if err := annotator.Run(); err != nil { - // Release allocated subnet if any errors occurred - if allocatedSubnet != nil { - ncc.releaseHybridOverlayNodeSubnet(node.Name) - } - return fmt.Errorf("failed to set hybrid overlay annotations for node %s: %v", node.Name, err) - } - } - return nil - } - - return ncc.syncNodeNetworkAnnotations(node) -} - -// syncNodeNetworkAnnotations does 2 things -// - syncs the node's allocated subnets in the node subnet annotation -// - syncs the network id in the node network id annotation -func (ncc *networkClusterController) syncNodeNetworkAnnotations(node *corev1.Node) error { - ncc.clusterSubnetAllocator.Lock() - defer ncc.clusterSubnetAllocator.Unlock() - - existingSubnets, err := util.ParseNodeHostSubnetAnnotation(node, ncc.networkName) - if err != nil && !util.IsAnnotationNotSetError(err) { - // Log the error and try to allocate new subnets - klog.Warningf("Failed to get node %s host subnets annotations for network %s : %v", node.Name, ncc.networkName, err) - } - - networkID, err := util.ParseNetworkIDAnnotation(node, ncc.networkName) - if err != nil && !util.IsAnnotationNotSetError(err) { - // Log the error and try to allocate new subnets - klog.Warningf("Failed to get node %s network id annotations for network %s : %v", node.Name, ncc.networkName, err) - } - - // On return validExistingSubnets will contain any valid subnets that - // were already assigned to the node. allocatedSubnets will contain - // any newly allocated subnets required to ensure that the node has one subnet - // from each enabled IP family. - ipv4Mode, ipv6Mode := ncc.IPMode() - validExistingSubnets, allocatedSubnets, err := ncc.clusterSubnetAllocator.AllocateNodeSubnets(node.Name, existingSubnets, ipv4Mode, ipv6Mode) - if err != nil { - return err - } - - // If the existing subnets weren't OK, or new ones were allocated, update the node annotation. - // This happens in a couple cases: - // 1) new node: no existing subnets and one or more new subnets were allocated - // 2) dual-stack to single-stack conversion: two existing subnets but only one will be valid, and no allocated subnets - // 3) bad subnet annotation: one more existing subnets will be invalid and might have allocated a correct one - // Also update the node annotation if the networkID doesn't match - if len(existingSubnets) != len(validExistingSubnets) || len(allocatedSubnets) > 0 || ncc.networkID != networkID { - updatedSubnetsMap := map[string][]*net.IPNet{ncc.networkName: validExistingSubnets} - err = ncc.updateNodeNetworkAnnotationsWithRetry(node.Name, updatedSubnetsMap, ncc.networkID) - if err != nil { - if errR := ncc.clusterSubnetAllocator.ReleaseNodeSubnets(node.Name, allocatedSubnets...); errR != nil { - klog.Warningf("Error releasing node %s subnets: %v", node.Name, errR) - } - return err - } - } - - return nil -} - -// handleDeleteNode handles the delete node event -func (ncc *networkClusterController) handleDeleteNode(node *corev1.Node) error { - if ncc.enableHybridOverlaySubnetAllocator { - ncc.releaseHybridOverlayNodeSubnet(node.Name) - return nil - } - - ncc.clusterSubnetAllocator.Lock() - defer ncc.clusterSubnetAllocator.Unlock() - ncc.clusterSubnetAllocator.ReleaseAllNodeSubnets(node.Name) - return nil -} - -func (ncc *networkClusterController) syncNodes(nodes []interface{}) error { - ncc.clusterSubnetAllocator.Lock() - defer ncc.clusterSubnetAllocator.Unlock() - - for _, tmp := range nodes { - node, ok := tmp.(*corev1.Node) - if !ok { - return fmt.Errorf("spurious object in syncNodes: %v", tmp) - } - - if util.NoHostSubnet(node) { - if ncc.enableHybridOverlaySubnetAllocator && houtil.IsHybridOverlayNode(node) { - // this is a hybrid overlay node so mark as allocated from the hybrid overlay subnet allocator - hostSubnet, err := houtil.ParseHybridOverlayHostSubnet(node) - if err != nil { - klog.Errorf("Failed to parse hybrid overlay for node %s: %w", node.Name, err) - } else if hostSubnet != nil { - klog.V(5).Infof("Node %s contains subnets: %v", node.Name, hostSubnet) - if err := ncc.hybridOverlaySubnetAllocator.MarkSubnetsAllocated(node.Name, hostSubnet); err != nil { - klog.Errorf("Failed to mark the subnet %v as allocated in the hybrid subnet allocator for node %s: %v", hostSubnet, node.Name, err) - } - } - } - } else { - hostSubnets, _ := util.ParseNodeHostSubnetAnnotation(node, ncc.networkName) - if len(hostSubnets) > 0 { - klog.V(5).Infof("Node %s contains subnets: %v for network : %s", node.Name, hostSubnets, ncc.networkName) - if err := ncc.clusterSubnetAllocator.MarkSubnetsAllocated(node.Name, hostSubnets...); err != nil { - klog.Errorf("Failed to mark the subnet %v as allocated in the cluster subnet allocator for node %s: %v", hostSubnets, node.Name, err) - } - } else { - klog.V(5).Infof("Node %s contains no subnets for network : %s", node.Name, ncc.networkName) - } - } - } - - return nil -} - -// updateNodeNetworkAnnotationsWithRetry will update the node's subnet annotation and network id annotation -func (ncc *networkClusterController) updateNodeNetworkAnnotationsWithRetry(nodeName string, hostSubnetsMap map[string][]*net.IPNet, networkId int) error { - // Retry if it fails because of potential conflict which is transient. Return error in the - // case of other errors (say temporary API server down), and it will be taken care of by the - // retry mechanism. - resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - // Informer cache should not be mutated, so get a copy of the object - node, err := ncc.watchFactory.GetNode(nodeName) - if err != nil { - return err - } - - cnode := node.DeepCopy() - for netName, hostSubnets := range hostSubnetsMap { - cnode.Annotations, err = util.UpdateNodeHostSubnetAnnotation(cnode.Annotations, hostSubnets, netName) - if err != nil { - return fmt.Errorf("failed to update node %q annotation subnet %s", - node.Name, util.JoinIPNets(hostSubnets, ",")) - } - } - - cnode.Annotations, err = util.UpdateNetworkIDAnnotation(cnode.Annotations, ncc.networkName, networkId) - if err != nil { - return fmt.Errorf("failed to update node %q network id annotation %d for network %s", - node.Name, networkId, ncc.networkName) - } - return ncc.kube.UpdateNode(cnode) - }) - if resultErr != nil { - return fmt.Errorf("failed to update node %s annotation", nodeName) - } - return nil -} - // Cleanup the subnet annotations from the node for the secondary networks func (ncc *networkClusterController) Cleanup(netName string) error { if !ncc.IsSecondary() { return fmt.Errorf("default network can't be cleaned up") } - // remove hostsubnet annotation for this network - klog.Infof("Remove node-subnets annotation for network %s on all nodes", ncc.networkName) - existingNodes, err := ncc.watchFactory.GetNodes() - if err != nil { - return fmt.Errorf("error in retrieving the nodes: %v", err) - } - - for _, node := range existingNodes { - if util.NoHostSubnet(node) { - // Secondary network subnet is not allocated for a nohost subnet node - klog.V(5).Infof("Node %s is not managed by OVN", node.Name) - continue - } - hostSubnetsMap := map[string][]*net.IPNet{ncc.networkName: nil} - // passing util.InvalidNetworkID deletes the network id annotation for the network. - err = ncc.updateNodeNetworkAnnotationsWithRetry(node.Name, hostSubnetsMap, util.InvalidNetworkID) - if err != nil { - return fmt.Errorf("failed to clear node %q subnet annotation for network %s", - node.Name, ncc.networkName) - } - - ncc.clusterSubnetAllocator.ReleaseAllNodeSubnets(node.Name) + if ncc.hasNodeSubnetAllocation() { + return ncc.hostSubnetAllocator.Cleanup(netName) } return nil @@ -391,7 +154,7 @@ func (h *networkClusterControllerEventHandler) AddResource(obj interface{}, from if !ok { return fmt.Errorf("could not cast %T object to *corev1.Node", obj) } - if err = h.ncc.handleAddUpdateNodeEvent(node); err != nil { + if err = h.ncc.hostSubnetAllocator.HandleAddUpdateNodeEvent(node); err != nil { klog.Infof("Node add failed for %s, will try again later: %v", node.Name, err) return err @@ -414,7 +177,7 @@ func (h *networkClusterControllerEventHandler) UpdateResource(oldObj, newObj int if !ok { return fmt.Errorf("could not cast %T object to *corev1.Node", newObj) } - if err = h.ncc.handleAddUpdateNodeEvent(node); err != nil { + if err = h.ncc.hostSubnetAllocator.HandleAddUpdateNodeEvent(node); err != nil { klog.Infof("Node update failed for %s, will try again later: %v", node.Name, err) return err @@ -434,7 +197,7 @@ func (h *networkClusterControllerEventHandler) DeleteResource(obj, cachedObj int if !ok { return fmt.Errorf("could not cast obj of type %T to *knet.Node", obj) } - return h.ncc.handleDeleteNode(node) + return h.ncc.hostSubnetAllocator.HandleDeleteNode(node) } return nil } @@ -448,7 +211,7 @@ func (h *networkClusterControllerEventHandler) SyncFunc(objs []interface{}) erro } else { switch h.objType { case factory.NodeType: - syncFunc = h.ncc.syncNodes + syncFunc = h.ncc.hostSubnetAllocator.Sync default: return fmt.Errorf("no sync function for object type %s", h.objType) @@ -527,8 +290,7 @@ func (h *networkClusterControllerEventHandler) GetResourceFromInformerCache(key } switch h.objType { - case factory.NodeType, - factory.EgressNodeType: + case factory.NodeType: obj, err = h.ncc.watchFactory.GetNode(name) default: diff --git a/go-controller/pkg/clustermanager/network_cluster_controller_test.go b/go-controller/pkg/clustermanager/network_cluster_controller_test.go index ddf88a98ed..1cb1fae2cc 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller_test.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller_test.go @@ -75,8 +75,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { err = f.Start() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, - fakeClient, f, false, &util.DefaultNetInfo{}) + ncc := newNetworkClusterController(defaultNetworkID, &util.DefaultNetInfo{}, fakeClient, f) ncc.Start(ctx.Context) defer ncc.Stop() @@ -124,8 +123,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { err = f.Start() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, - fakeClient, f, false, &util.DefaultNetInfo{}) + ncc := newNetworkClusterController(defaultNetworkID, &util.DefaultNetInfo{}, fakeClient, f) ncc.Start(ctx.Context) defer ncc.Stop() @@ -176,8 +174,7 @@ var _ = ginkgo.Describe("Network Cluster Controller", func() { err = f.Start() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ncc := newNetworkClusterController(ovntypes.DefaultNetworkName, defaultNetworkID, config.Default.ClusterSubnets, - fakeClient, f, false, &util.DefaultNetInfo{}) + ncc := newNetworkClusterController(defaultNetworkID, &util.DefaultNetInfo{}, fakeClient, f) ncc.Start(ctx.Context) defer ncc.Stop() diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index cc8c55250e..9c61df1d80 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -100,8 +100,7 @@ func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetI return nil, fmt.Errorf("failed to create NetworkController for secondary layer3 network %s : %w", nInfo.GetNetworkName(), err) } - sncc := newNetworkClusterController(nInfo.GetNetworkName(), networkId, nInfo.Subnets(), - sncm.ovnClient, sncm.watchFactory, false, nInfo) + sncc := newNetworkClusterController(networkId, nInfo, sncm.ovnClient, sncm.watchFactory) return sncc, nil } @@ -163,6 +162,5 @@ func (sncm *secondaryNetworkClusterManager) CleanupDeletedNetworks(allController // newDummyNetworkController creates a dummy network controller used to clean up specific network func (sncm *secondaryNetworkClusterManager) newDummyLayer3NetworkController(netName string) nad.NetworkController { netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: ovntypes.Layer3Topology}) - return newNetworkClusterController(netInfo.GetNetworkName(), util.InvalidNetworkID, nil, sncm.ovnClient, sncm.watchFactory, - false, netInfo) + return newNetworkClusterController(util.InvalidNetworkID, netInfo, sncm.ovnClient, sncm.watchFactory) } diff --git a/go-controller/pkg/clustermanager/secondary_network_unit_test.go b/go-controller/pkg/clustermanager/secondary_network_unit_test.go index 3fe855a100..7bfda11e47 100644 --- a/go-controller/pkg/clustermanager/secondary_network_unit_test.go +++ b/go-controller/pkg/clustermanager/secondary_network_unit_test.go @@ -224,7 +224,7 @@ var _ = ginkgo.Describe("Secondary Layer3 Cluster Controller Manager", func() { // So testing the cleanup one at a time. netInfo, err := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer3Topology}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - oc := newNetworkClusterController(netInfo.GetNetworkName(), util.InvalidNetworkID, nil, sncm.ovnClient, sncm.watchFactory, false, netInfo) + oc := newNetworkClusterController(util.InvalidNetworkID, netInfo, sncm.ovnClient, sncm.watchFactory) nadControllers := []nad.NetworkController{oc} err = sncm.CleanupDeletedNetworks(nadControllers) diff --git a/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator.go b/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator.go index 718b6d69a9..85368ff56b 100644 --- a/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator.go +++ b/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator.go @@ -3,57 +3,329 @@ package subnetallocator import ( "fmt" "net" - "sync" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/util/retry" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + hotypes "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/types" + houtil "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) +// HostSubnetAllocator acts on node events handed off by the cluster network +// controller and does the following: +// - allocates subnet from the cluster subnet pool. It also allocates subnets +// from the hybrid overlay subnet pool if hybrid overlay is enabled. +// It stores these allocated subnets in the node annotation +// - stores the network id in each node's annotation. type HostSubnetAllocator struct { - sync.Mutex - // Don't inherit from BaseSubnetAllocator to ensure users of - // hostSubnetAllocator can't directly call the underlying methods - base SubnetAllocator + kube kube.Interface + nodeLister listers.NodeLister + + clusterSubnetAllocator SubnetAllocator + hybridOverlaySubnetAllocator SubnetAllocator + + // unique id of the network + networkID int + + netInfo util.NetInfo } -func NewHostSubnetAllocator() *HostSubnetAllocator { +func NewHostSubnetAllocator(networkID int, netInfo util.NetInfo, nodeLister listers.NodeLister, kube kube.Interface) *HostSubnetAllocator { return &HostSubnetAllocator{ - base: NewSubnetAllocator(), + kube: kube, + nodeLister: nodeLister, + networkID: networkID, + netInfo: netInfo, + clusterSubnetAllocator: NewSubnetAllocator(), + hybridOverlaySubnetAllocator: NewSubnetAllocator(), } } -func (sna *HostSubnetAllocator) InitRanges(subnets []config.CIDRNetworkEntry) error { - for _, entry := range subnets { - if err := sna.base.AddNetworkRange(entry.CIDR, entry.HostSubnetLength); err != nil { +func (sna *HostSubnetAllocator) InitRanges() error { + clusterSubnets := sna.netInfo.Subnets() + + for _, clusterSubnet := range clusterSubnets { + if err := sna.clusterSubnetAllocator.AddNetworkRange(clusterSubnet.CIDR, clusterSubnet.HostSubnetLength); err != nil { return err } - klog.V(5).Infof("Added network range %s to host subnet allocator", entry.CIDR) + klog.V(5).Infof("Added network range %s to cluster subnet allocator", clusterSubnet.CIDR) } - // update metrics for host subnets - v4count, _, v6count, _ := sna.base.Usage() - metrics.RecordSubnetCount(float64(v4count), float64(v6count)) + if sna.hasHybridOverlayAllocation() { + for _, hoSubnet := range config.HybridOverlay.ClusterSubnets { + if err := sna.hybridOverlaySubnetAllocator.AddNetworkRange(hoSubnet.CIDR, hoSubnet.HostSubnetLength); err != nil { + return err + } + klog.V(5).Infof("Added network range %s to hybrid overlay subnet allocator", hoSubnet.CIDR) + } + } + + // update metrics for cluster subnets + sna.recordSubnetCount() + return nil } -// MarkSubnetsAllocated will mark the given subnets as already allocated by -// the given owner. Marking is all-or-nothing; if marking one of the subnets -// fails then none of them are marked as allocated. -func (sna *HostSubnetAllocator) MarkSubnetsAllocated(nodeName string, subnets ...*net.IPNet) error { - if err := sna.base.MarkAllocatedNetworks(nodeName, subnets...); err != nil { +func (sna *HostSubnetAllocator) hasHybridOverlayAllocation() bool { + return config.HybridOverlay.Enabled && !sna.netInfo.IsSecondary() +} + +func (sna *HostSubnetAllocator) recordSubnetCount() { + // only for the default network + if !sna.netInfo.IsSecondary() { + v4count, _, v6count, _ := sna.clusterSubnetAllocator.Usage() + metrics.RecordSubnetCount(float64(v4count), float64(v6count)) + } +} + +func (sna *HostSubnetAllocator) recordSubnetUsage() { + // only for the default network + if !sna.netInfo.IsSecondary() { + v4count, _, v6count, _ := sna.clusterSubnetAllocator.Usage() + metrics.RecordSubnetUsage(float64(v4count), float64(v6count)) + } +} + +// hybridOverlayNodeEnsureSubnet allocates a subnet and sets the +// hybrid overlay subnet annotation. It returns any newly allocated subnet +// or an error. If an error occurs, the newly allocated subnet will be released. +func (sna *HostSubnetAllocator) hybridOverlayNodeEnsureSubnet(node *corev1.Node, annotator kube.Annotator) (*net.IPNet, error) { + var existingSubnets []*net.IPNet + // Do not allocate a subnet if the node already has one + subnet, err := houtil.ParseHybridOverlayHostSubnet(node) + if err != nil { + // Log the error and try to allocate new subnets + klog.Warningf("Failed to get node %s hybrid overlay subnet annotation: %v", node.Name, err) + } else if subnet != nil { + existingSubnets = []*net.IPNet{subnet} + } + + // Allocate a new host subnet for this node + // FIXME: hybrid overlay is only IPv4 for now due to limitations on the Windows side + hostSubnets, allocatedSubnets, err := sna.allocateNodeSubnets(sna.hybridOverlaySubnetAllocator, node.Name, existingSubnets, true, false) + if err != nil { + return nil, fmt.Errorf("error allocating hybrid overlay HostSubnet for node %s: %v", node.Name, err) + } + + if err := annotator.Set(hotypes.HybridOverlayNodeSubnet, hostSubnets[0].String()); err != nil { + if e := sna.hybridOverlaySubnetAllocator.ReleaseNetworks(node.Name, allocatedSubnets...); e != nil { + klog.Warningf("Failed to release hybrid over subnet for the node %s from the allocator : %w", node.Name, e) + } + return nil, fmt.Errorf("error setting hybrid overlay host subnet: %w", err) + } + + return hostSubnets[0], nil +} + +func (sna *HostSubnetAllocator) releaseHybridOverlayNodeSubnet(nodeName string) { + sna.hybridOverlaySubnetAllocator.ReleaseAllNetworks(nodeName) + klog.Infof("Deleted hybrid overlay HostSubnets for node %s", nodeName) +} + +// HandleAddUpdateNodeEvent handles the add or update node event +func (sna *HostSubnetAllocator) HandleAddUpdateNodeEvent(node *corev1.Node) error { + if util.NoHostSubnet(node) { + if sna.hasHybridOverlayAllocation() && houtil.IsHybridOverlayNode(node) { + annotator := kube.NewNodeAnnotator(sna.kube, node.Name) + allocatedSubnet, err := sna.hybridOverlayNodeEnsureSubnet(node, annotator) + if err != nil { + return fmt.Errorf("failed to update node %s hybrid overlay subnet annotation: %v", node.Name, err) + } + if err := annotator.Run(); err != nil { + // Release allocated subnet if any errors occurred + if allocatedSubnet != nil { + sna.releaseHybridOverlayNodeSubnet(node.Name) + } + return fmt.Errorf("failed to set hybrid overlay annotations for node %s: %v", node.Name, err) + } + } + return nil + } + + err := sna.syncNodeNetworkAnnotations(node) + sna.recordSubnetUsage() + return err +} + +// syncNodeNetworkAnnotations does 2 things +// - syncs the node's allocated subnets in the node subnet annotation +// - syncs the network id in the node network id annotation +func (sna *HostSubnetAllocator) syncNodeNetworkAnnotations(node *corev1.Node) error { + networkName := sna.netInfo.GetNetworkName() + + existingSubnets, err := util.ParseNodeHostSubnetAnnotation(node, networkName) + if err != nil && !util.IsAnnotationNotSetError(err) { + // Log the error and try to allocate new subnets + klog.Warningf("Failed to get node %s host subnets annotations for network %s : %v", node.Name, networkName, err) + } + + networkID, err := util.ParseNetworkIDAnnotation(node, networkName) + if err != nil && !util.IsAnnotationNotSetError(err) { + // Log the error and try to allocate new subnets + klog.Warningf("Failed to get node %s network id annotations for network %s : %v", node.Name, networkName, err) + } + + // On return validExistingSubnets will contain any valid subnets that + // were already assigned to the node. allocatedSubnets will contain + // any newly allocated subnets required to ensure that the node has one subnet + // from each enabled IP family. + ipv4Mode, ipv6Mode := sna.netInfo.IPMode() + validExistingSubnets, allocatedSubnets, err := sna.allocateNodeSubnets(sna.clusterSubnetAllocator, node.Name, existingSubnets, ipv4Mode, ipv6Mode) + if err != nil { return err } - _, v4used, _, v6used := sna.base.Usage() - metrics.RecordSubnetUsage(float64(v4used), float64(v6used)) + + // If the existing subnets weren't OK, or new ones were allocated, update the node annotation. + // This happens in a couple cases: + // 1) new node: no existing subnets and one or more new subnets were allocated + // 2) dual-stack to single-stack conversion: two existing subnets but only one will be valid, and no allocated subnets + // 3) bad subnet annotation: one more existing subnets will be invalid and might have allocated a correct one + // Also update the node annotation if the networkID doesn't match + if len(existingSubnets) != len(validExistingSubnets) || len(allocatedSubnets) > 0 || sna.networkID != networkID { + updatedSubnetsMap := map[string][]*net.IPNet{networkName: validExistingSubnets} + err = sna.updateNodeNetworkAnnotationsWithRetry(node.Name, updatedSubnetsMap, sna.networkID) + if err != nil { + if errR := sna.clusterSubnetAllocator.ReleaseNetworks(node.Name, allocatedSubnets...); errR != nil { + klog.Warningf("Error releasing node %s subnets: %v", node.Name, errR) + } + return err + } + } + return nil } -// AllocateNodeSubnets either validates existing node subnets against the allocators +// HandleDeleteNode handles the delete node event +func (sna *HostSubnetAllocator) HandleDeleteNode(node *corev1.Node) error { + if sna.hasHybridOverlayAllocation() { + sna.releaseHybridOverlayNodeSubnet(node.Name) + return nil + } + + sna.clusterSubnetAllocator.ReleaseAllNetworks(node.Name) + sna.recordSubnetUsage() + return nil +} + +func (sna *HostSubnetAllocator) Sync(nodes []interface{}) error { + defer sna.recordSubnetUsage() + + networkName := sna.netInfo.GetNetworkName() + + for _, tmp := range nodes { + node, ok := tmp.(*corev1.Node) + if !ok { + return fmt.Errorf("spurious object in syncNodes: %v", tmp) + } + + if util.NoHostSubnet(node) { + if sna.hasHybridOverlayAllocation() && houtil.IsHybridOverlayNode(node) { + // this is a hybrid overlay node so mark as allocated from the hybrid overlay subnet allocator + hostSubnet, err := houtil.ParseHybridOverlayHostSubnet(node) + if err != nil { + klog.Errorf("Failed to parse hybrid overlay for node %s: %w", node.Name, err) + } else if hostSubnet != nil { + klog.V(5).Infof("Node %s contains subnets: %v", node.Name, hostSubnet) + if err := sna.hybridOverlaySubnetAllocator.ReleaseNetworks(node.Name, hostSubnet); err != nil { + klog.Errorf("Failed to mark the subnet %v as allocated in the hybrid subnet allocator for node %s: %v", hostSubnet, node.Name, err) + } + } + } + } else { + hostSubnets, _ := util.ParseNodeHostSubnetAnnotation(node, networkName) + if len(hostSubnets) > 0 { + klog.V(5).Infof("Node %s contains subnets: %v for network : %s", node.Name, hostSubnets, networkName) + if err := sna.clusterSubnetAllocator.MarkAllocatedNetworks(node.Name, hostSubnets...); err != nil { + klog.Errorf("Failed to mark the subnet %v as allocated in the cluster subnet allocator for node %s: %v", hostSubnets, node.Name, err) + } + } else { + klog.V(5).Infof("Node %s contains no subnets for network : %s", node.Name, networkName) + } + } + } + + return nil +} + +// updateNodeNetworkAnnotationsWithRetry will update the node's subnet annotation and network id annotation +func (sna *HostSubnetAllocator) updateNodeNetworkAnnotationsWithRetry(nodeName string, hostSubnetsMap map[string][]*net.IPNet, networkId int) error { + // Retry if it fails because of potential conflict which is transient. Return error in the + // case of other errors (say temporary API server down), and it will be taken care of by the + // retry mechanism. + resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + // Informer cache should not be mutated, so get a copy of the object + node, err := sna.nodeLister.Get(nodeName) + if err != nil { + return err + } + + cnode := node.DeepCopy() + for netName, hostSubnets := range hostSubnetsMap { + cnode.Annotations, err = util.UpdateNodeHostSubnetAnnotation(cnode.Annotations, hostSubnets, netName) + if err != nil { + return fmt.Errorf("failed to update node %q annotation subnet %s", + node.Name, util.JoinIPNets(hostSubnets, ",")) + } + } + + networkName := sna.netInfo.GetNetworkName() + + cnode.Annotations, err = util.UpdateNetworkIDAnnotation(cnode.Annotations, networkName, networkId) + if err != nil { + return fmt.Errorf("failed to update node %q network id annotation %d for network %s", + node.Name, networkId, networkName) + } + return sna.kube.UpdateNode(cnode) + }) + if resultErr != nil { + return fmt.Errorf("failed to update node %s annotation", nodeName) + } + return nil +} + +// Cleanup the subnet annotations from the node +func (sna *HostSubnetAllocator) Cleanup(netName string) error { + networkName := sna.netInfo.GetNetworkName() + + // remove hostsubnet annotation for this network + klog.Infof("Remove node-subnets annotation for network %s on all nodes", networkName) + existingNodes, err := sna.nodeLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("error in retrieving the nodes: %v", err) + } + + for _, node := range existingNodes { + if util.NoHostSubnet(node) { + // Secondary network subnet is not allocated for a nohost subnet node + klog.V(5).Infof("Node %s is not managed by OVN", node.Name) + continue + } + + hostSubnetsMap := map[string][]*net.IPNet{networkName: nil} + // passing util.InvalidNetworkID deletes the network id annotation for the network. + err = sna.updateNodeNetworkAnnotationsWithRetry(node.Name, hostSubnetsMap, util.InvalidNetworkID) + if err != nil { + return fmt.Errorf("failed to clear node %q subnet annotation for network %s", + node.Name, networkName) + } + + sna.clusterSubnetAllocator.ReleaseAllNetworks(node.Name) + } + + return nil +} + +// allocateNodeSubnets either validates existing node subnets against the allocators // ranges, or allocates new subnets if the node doesn't have any yet, or returns an error -func (sna *HostSubnetAllocator) AllocateNodeSubnets(nodeName string, existingSubnets []*net.IPNet, ipv4Mode, ipv6Mode bool) ([]*net.IPNet, []*net.IPNet, error) { +func (sna *HostSubnetAllocator) allocateNodeSubnets(allocator SubnetAllocator, nodeName string, existingSubnets []*net.IPNet, ipv4Mode, ipv6Mode bool) ([]*net.IPNet, []*net.IPNet, error) { allocatedSubnets := []*net.IPNet{} // OVN can work in single-stack or dual-stack only. @@ -76,7 +348,7 @@ func (sna *HostSubnetAllocator) AllocateNodeSubnets(nodeName string, existingSub n := 0 for _, subnet := range existingSubnets { if (ipv4Mode && utilnet.IsIPv4CIDR(subnet) && !foundIPv4) || (ipv6Mode && utilnet.IsIPv6CIDR(subnet) && !foundIPv6) { - if err := sna.MarkSubnetsAllocated(nodeName, subnet); err == nil { + if err := allocator.MarkAllocatedNetworks(nodeName, subnet); err == nil { klog.Infof("Valid subnet %v allocated on node %s", subnet, nodeName) existingSubnets[n] = subnet n++ @@ -90,7 +362,7 @@ func (sna *HostSubnetAllocator) AllocateNodeSubnets(nodeName string, existingSub } // this subnet is no longer needed; release it klog.Infof("Releasing unused or invalid subnet %v on node %s", subnet, nodeName) - if err := sna.base.ReleaseNetworks(nodeName, subnet); err != nil { + if err := allocator.ReleaseNetworks(nodeName, subnet); err != nil { klog.Warningf("Failed to release subnet %v on node %s: %v", subnet, nodeName, err) } } @@ -109,7 +381,7 @@ func (sna *HostSubnetAllocator) AllocateNodeSubnets(nodeName string, existingSub if releaseAllocatedSubnets { for _, subnet := range allocatedSubnets { klog.Warningf("Releasing subnet %v on node %s", subnet, nodeName) - if errR := sna.base.ReleaseNetworks(nodeName, subnet); errR != nil { + if errR := allocator.ReleaseNetworks(nodeName, subnet); errR != nil { klog.Warningf("Error releasing subnet %v on node %s: %v", subnet, nodeName, errR) } } @@ -132,12 +404,12 @@ func (sna *HostSubnetAllocator) AllocateNodeSubnets(nodeName string, existingSub // allocate new subnets if needed if ipv4Mode && !foundIPv4 { - if err := allocateOneSubnet(sna.base.AllocateIPv4Network(nodeName)); err != nil { + if err := allocateOneSubnet(allocator.AllocateIPv4Network(nodeName)); err != nil { return nil, nil, err } } if ipv6Mode && !foundIPv6 { - if err := allocateOneSubnet(sna.base.AllocateIPv6Network(nodeName)); err != nil { + if err := allocateOneSubnet(allocator.AllocateIPv6Network(nodeName)); err != nil { return nil, nil, err } } @@ -151,9 +423,6 @@ func (sna *HostSubnetAllocator) AllocateNodeSubnets(nodeName string, existingSub nodeName, expectedHostSubnets, len(allocatedSubnets)) } - _, v4used, _, v6used := sna.base.Usage() - metrics.RecordSubnetUsage(float64(v4used), float64(v6used)) - hostSubnets := append(existingSubnets, allocatedSubnets...) klog.Infof("Allocated Subnets %v on Node %s", hostSubnets, nodeName) @@ -161,16 +430,3 @@ func (sna *HostSubnetAllocator) AllocateNodeSubnets(nodeName string, existingSub releaseAllocatedSubnets = false return hostSubnets, allocatedSubnets, nil } - -func (sna *HostSubnetAllocator) ReleaseNodeSubnets(nodeName string, subnets ...*net.IPNet) error { - err := sna.base.ReleaseNetworks(nodeName, subnets...) - _, v4used, _, v6used := sna.base.Usage() - metrics.RecordSubnetUsage(float64(v4used), float64(v6used)) - return err -} - -func (sna *HostSubnetAllocator) ReleaseAllNodeSubnets(nodeName string) { - sna.base.ReleaseAllNetworks(nodeName) - _, v4used, _, v6used := sna.base.Usage() - metrics.RecordSubnetUsage(float64(v4used), float64(v6used)) -} diff --git a/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator_test.go b/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator_test.go index 8dd333beea..744133e133 100644 --- a/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator_test.go +++ b/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator_test.go @@ -6,8 +6,13 @@ import ( "reflect" "testing" + cnitypes "github.com/containernetworking/cni/pkg/types" + + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) func rangesFromStrings(ranges []string, networkLens []int) ([]config.CIDRNetworkEntry, error) { @@ -203,25 +208,39 @@ func TestController_allocateNodeSubnets(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - sna := NewHostSubnetAllocator() - ranges, err := rangesFromStrings(tt.networkRanges, tt.networkLens) if err != nil { t.Fatal(err) } - if err := sna.InitRanges(ranges); err != nil { + config.Default.ClusterSubnets = ranges + + netInfo, err := util.NewNetInfo( + &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: types.DefaultNetworkName}, + }, + ) + if err != nil { + t.Fatal(err) + } + + sna := &HostSubnetAllocator{ + netInfo: netInfo, + clusterSubnetAllocator: NewSubnetAllocator(), + } + + if err := sna.InitRanges(); err != nil { t.Fatalf("Failed to initialize network ranges: %v", err) } if tt.alreadyOwned != nil { - err = sna.MarkSubnetsAllocated(tt.alreadyOwned.owner, ovntest.MustParseIPNets(tt.alreadyOwned.subnet)...) + err := sna.clusterSubnetAllocator.MarkAllocatedNetworks(tt.alreadyOwned.owner, ovntest.MustParseIPNets(tt.alreadyOwned.subnet)...) if err != nil { t.Fatal(err) } } // test network allocation works correctly - got, allocated, err := sna.AllocateNodeSubnets("testnode", tt.existingNets, tt.configIPv4, tt.configIPv6) + got, allocated, err := sna.allocateNodeSubnets(sna.clusterSubnetAllocator, "testnode", tt.existingNets, tt.configIPv4, tt.configIPv6) if (err != nil) != tt.wantErr { t.Fatalf("Controller.addNode() error = %v, wantErr %v", err, tt.wantErr) } @@ -244,7 +263,7 @@ func TestController_allocateNodeSubnets(t *testing.T) { // Ensure an already owned subnet isn't touched if tt.alreadyOwned != nil { - err = sna.MarkSubnetsAllocated("blahblah", ovntest.MustParseIPNets(tt.alreadyOwned.subnet)...) + err = sna.clusterSubnetAllocator.MarkAllocatedNetworks("blahblah", ovntest.MustParseIPNets(tt.alreadyOwned.subnet)...) if err == nil { t.Fatal("Expected subnet to already be allocated by a different node") } @@ -258,30 +277,45 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { if err != nil { t.Fatal(err) } - sna := NewHostSubnetAllocator() - if err := sna.InitRanges(ranges); err != nil { + config.Default.ClusterSubnets = ranges + + netInfo, err := util.NewNetInfo( + &ovncnitypes.NetConf{ + NetConf: cnitypes.NetConf{Name: types.DefaultNetworkName}, + }, + ) + if err != nil { + t.Fatal(err) + } + + sna := &HostSubnetAllocator{ + netInfo: netInfo, + clusterSubnetAllocator: NewSubnetAllocator(), + } + + if err := sna.InitRanges(); err != nil { t.Fatalf("Failed to initialize network ranges: %v", err) } // Mark all v6 subnets already allocated to force an error in AllocateNodeSubnets() - if err := sna.MarkSubnetsAllocated("blah", ovntest.MustParseIPNet("2000::/127")); err != nil { - t.Fatalf("MarkSubnetsAllocated() expected no error but got: %v", err) + if err := sna.clusterSubnetAllocator.MarkAllocatedNetworks("blah", ovntest.MustParseIPNet("2000::/127")); err != nil { + t.Fatalf("MarkAllocatedNetworks() expected no error but got: %v", err) } // test network allocation works correctly - _, v4usedBefore, _, v6usedBefore := sna.base.Usage() - got, allocated, err := sna.AllocateNodeSubnets("testNode", nil, true, true) + _, v4usedBefore, _, v6usedBefore := sna.clusterSubnetAllocator.Usage() + got, allocated, err := sna.allocateNodeSubnets(sna.clusterSubnetAllocator, "testNode", nil, true, true) if err == nil { - t.Fatalf("AllocateNodeSubnets() expected error but got success") + t.Fatalf("allocateNodeSubnets() expected error but got success") } if got != nil { - t.Fatalf("AllocateNodeSubnets() expected no existing host subnets, got %v", got) + t.Fatalf("allocateNodeSubnets() expected no existing host subnets, got %v", got) } if allocated != nil { - t.Fatalf("AllocateNodeSubnets() expected no allocated subnets, got %v", allocated) + t.Fatalf("allocateNodeSubnets() expected no allocated subnets, got %v", allocated) } - _, v4usedAfter, _, v6usedAfter := sna.base.Usage() + _, v4usedAfter, _, v6usedAfter := sna.clusterSubnetAllocator.Usage() if v4usedAfter != v4usedBefore { t.Fatalf("Expected %d v4 allocated subnets, but got %d", v4usedBefore, v4usedAfter) } @@ -289,122 +323,3 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { t.Fatalf("Expected %d v6 allocated subnets, but got %d", v6usedBefore, v6usedAfter) } } - -func ipnetStringsToSlice(strings []string) ([]*net.IPNet, error) { - slice := make([]*net.IPNet, 0, len(strings)) - for _, s := range strings { - _, subnet, err := net.ParseCIDR(s) - if err != nil { - return nil, fmt.Errorf("error parsing subnet %s", s) - } - slice = append(slice, subnet) - } - return slice, nil -} - -func TestController_markSubnetsAllocated(t *testing.T) { - tests := []struct { - name string - networkRanges []string - networkLens []int - markedSubnets []string - secondSubnets []string - wantErr bool - }{ - { - name: "IPv4 no conflict", - networkRanges: []string{"172.16.0.0/16"}, - networkLens: []int{24}, - markedSubnets: []string{"172.16.0.0/24"}, - secondSubnets: []string{"172.16.1.0/24"}, - wantErr: false, - }, - { - name: "IPv4 conflict", - networkRanges: []string{"172.16.0.0/16"}, - networkLens: []int{24}, - markedSubnets: []string{"172.16.0.0/24"}, - secondSubnets: []string{"172.16.0.0/24"}, - wantErr: true, - }, - { - name: "IPv6 no conflict", - networkRanges: []string{"2001:db2::/56"}, - networkLens: []int{64}, - markedSubnets: []string{"2001:db2:0:1::/64"}, - secondSubnets: []string{"2001:db2:0:2::/64"}, - wantErr: false, - }, - { - name: "IPv6 conflict", - networkRanges: []string{"2001:db2::/56"}, - networkLens: []int{64}, - markedSubnets: []string{"2001:db2::/64"}, - secondSubnets: []string{"2001:db2::/64"}, - wantErr: true, - }, - { - name: "dual-stack no conflict", - networkRanges: []string{"2001:db2::/56", "172.16.0.0/16"}, - networkLens: []int{64, 24}, - markedSubnets: []string{"2001:db2:0:1::/64", "172.16.0.0/24"}, - secondSubnets: []string{"2001:db2:0:2::/64", "172.16.1.0/24"}, - wantErr: false, - }, - { - name: "dual-stack v4 conflict", - networkRanges: []string{"2001:db2::/56", "172.16.0.0/16"}, - networkLens: []int{64, 24}, - markedSubnets: []string{"2001:db2:0:1::/64", "172.16.0.0/24"}, - secondSubnets: []string{"2001:db2:0:2::/64", "172.16.0.0/24"}, - wantErr: true, - }, - { - name: "dual-stack v6 conflict", - networkRanges: []string{"2001:db2::/56", "172.16.0.0/16"}, - networkLens: []int{64, 24}, - markedSubnets: []string{"2001:db2:0:1::/64", "172.16.0.0/24"}, - secondSubnets: []string{"2001:db2:0:1::/64", "172.16.1.0/24"}, - wantErr: true, - }, - { - name: "dual-stack both conflict", - networkRanges: []string{"2001:db2::/56", "172.16.0.0/16"}, - networkLens: []int{64, 24}, - markedSubnets: []string{"2001:db2:0:1::/64", "172.16.0.0/24"}, - secondSubnets: []string{"2001:db2:0:1::/64", "172.16.0.0/24"}, - wantErr: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - sna := NewHostSubnetAllocator() - - ranges, err := rangesFromStrings(tt.networkRanges, tt.networkLens) - if err != nil { - t.Fatal(err) - } - if err := sna.InitRanges(ranges); err != nil { - t.Fatalf("Failed to initialize network ranges: %v", err) - } - - subnets, err := ipnetStringsToSlice(tt.markedSubnets) - if err != nil { - t.Fatal(err) - } - if err := sna.MarkSubnetsAllocated("node1", subnets...); err != nil { - t.Fatalf("Failed to mark allocated subnets: %v", err) - } - - subnets, err = ipnetStringsToSlice(tt.secondSubnets) - if err != nil { - t.Fatal(err) - } - err = sna.MarkSubnetsAllocated("node2", subnets...) - if (err != nil) != tt.wantErr { - t.Fatalf("Mark second subnets allocated error %v, wantErr %v", err, tt.wantErr) - } - }) - } -} From d863c56c0cd166cd28bf1146dbf5e1c7be4fedec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Fri, 26 May 2023 21:02:45 +0000 Subject: [PATCH 16/31] Add ip allocator to cluster manager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an ip allocator to cluster manager. The allocator reconciles pods triggered by events registered by the cluster manager network controller which now listens to pod events when interconnect is enabled. The ip allocator not only allocates IPs but fully handles the PodAnnotation as well. It will derive from the allocated IPs or honor requests for the mac address and gateways and set the annotation on the Pod. This will trigger the CNI side to attach the pod interface while master network controller configures the northbound database. Only enabled for localnet secondary networks with interconnect for now. On such a scenario, the master network controller is inhibited from setting the PodAnnotation leaving it up for the cluster manager to do. Signed-off-by: Jaime Caamaño Ruiz --- .../network_cluster_controller.go | 88 ++++- .../pkg/clustermanager/pod/ip_allocator.go | 268 ++++++++++++++ .../clustermanager/pod/ip_allocator_test.go | 347 ++++++++++++++++++ go-controller/pkg/factory/factory.go | 7 + .../pkg/ovn/base_network_controller_pods.go | 29 ++ .../ovn/base_network_controller_secondary.go | 7 +- .../secondary_layer2_network_controller.go | 12 +- .../secondary_layer3_network_controller.go | 12 +- .../secondary_localnet_network_controller.go | 12 +- 9 files changed, 762 insertions(+), 20 deletions(-) create mode 100644 go-controller/pkg/clustermanager/pod/ip_allocator.go create mode 100644 go-controller/pkg/clustermanager/pod/ip_allocator_test.go diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index ccf8aeb551..af404c4040 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -10,7 +10,9 @@ import ( cache "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/subnetallocator" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" objretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" @@ -33,9 +35,14 @@ type networkClusterController struct { // retry framework for nodes retryNodes *objretry.RetryFramework + // retry framework for L2 pod ip allocation + podHandler *factory.Handler + retryPods *objretry.RetryFramework + // unique id of the network networkID int + podIPAllocator *pod.PodIPAllocator hostSubnetAllocator *subnetallocator.HostSubnetAllocator util.NetInfo @@ -60,10 +67,23 @@ func newNetworkClusterController(networkID int, netInfo util.NetInfo, ovnClient ncc.hostSubnetAllocator = subnetallocator.NewHostSubnetAllocator(networkID, netInfo, wf.NodeCoreInformer().Lister(), kube) } + if ncc.hasPodIPAllocation() { + ncc.podIPAllocator = pod.NewPodIPAllocator(netInfo, wf.PodCoreInformer().Lister(), kube) + } + ncc.initRetryFramework() return ncc } +func (ncc *networkClusterController) hasPodIPAllocation() bool { + // we only do pod IP allocation on L2 topologies with IPAM on interconnect + switch ncc.TopologyType() { + case types.Layer2Topology, types.LocalnetTopology: + return config.OVNKubernetesFeature.EnableInterconnect && len(ncc.Subnets()) > 0 + } + return false +} + func (ncc *networkClusterController) hasNodeSubnetAllocation() bool { // we only do node subnet allocation on L3 topologies or default network return ncc.TopologyType() == types.Layer3Topology || !ncc.IsSecondary() @@ -73,11 +93,16 @@ func (ncc *networkClusterController) initRetryFramework() { if ncc.hasNodeSubnetAllocation() { ncc.retryNodes = ncc.newRetryFramework(factory.NodeType, true) } + + if ncc.hasPodIPAllocation() { + ncc.retryPods = ncc.newRetryFramework(factory.PodType, true) + } } // Start the network cluster controller. Depending on the cluster configuration // and type of network, it does the following: // - initializes the host subnet allocator and starts listening to node events +// - initializes the pod ip allocator and starts listening to pod events func (ncc *networkClusterController) Start(ctx context.Context) error { if ncc.hasNodeSubnetAllocation() { err := ncc.hostSubnetAllocator.InitRanges() @@ -92,6 +117,19 @@ func (ncc *networkClusterController) Start(ctx context.Context) error { ncc.nodeHandler = nodeHandler } + if ncc.hasPodIPAllocation() { + err := ncc.podIPAllocator.InitRanges() + if err != nil { + return fmt.Errorf("failed to initialize pod ip allocator: %w", err) + } + + podHandler, err := ncc.retryPods.WatchResource() + if err != nil { + return fmt.Errorf("unable to watch pods: %w", err) + } + ncc.podHandler = podHandler + } + return nil } @@ -102,6 +140,10 @@ func (ncc *networkClusterController) Stop() { if ncc.nodeHandler != nil { ncc.watchFactory.RemoveNodeHandler(ncc.nodeHandler) } + + if ncc.podHandler != nil { + ncc.watchFactory.RemovePodHandler(ncc.podHandler) + } } func (ncc *networkClusterController) newRetryFramework(objectType reflect.Type, hasUpdateFunc bool) *objretry.RetryFramework { @@ -149,6 +191,16 @@ func (h *networkClusterControllerEventHandler) AddResource(obj interface{}, from var err error switch h.objType { + case factory.PodType: + pod, ok := obj.(*corev1.Pod) + if !ok { + return fmt.Errorf("could not cast %T object to *corev1.Pod", obj) + } + err := h.ncc.podIPAllocator.Reconcile(nil, pod) + if err != nil { + klog.Infof("Pod add failed for %s/%s, will try again later: %v", + pod.Namespace, pod.Name, err) + } case factory.NodeType: node, ok := obj.(*corev1.Node) if !ok { @@ -172,6 +224,20 @@ func (h *networkClusterControllerEventHandler) UpdateResource(oldObj, newObj int var err error switch h.objType { + case factory.PodType: + old, ok := oldObj.(*corev1.Pod) + if !ok { + return fmt.Errorf("could not cast %T old object to *corev1.Pod", oldObj) + } + new, ok := newObj.(*corev1.Pod) + if !ok { + return fmt.Errorf("could not cast %T new object to *corev1.Pod", newObj) + } + err := h.ncc.podIPAllocator.Reconcile(old, new) + if err != nil { + klog.Infof("Pod update failed for %s/%s, will try again later: %v", + new.Namespace, new.Name, err) + } case factory.NodeType: node, ok := newObj.(*corev1.Node) if !ok { @@ -192,6 +258,16 @@ func (h *networkClusterControllerEventHandler) UpdateResource(oldObj, newObj int // cachedObj is the internal cache entry for this object, used for now for pods and network policies. func (h *networkClusterControllerEventHandler) DeleteResource(obj, cachedObj interface{}) error { switch h.objType { + case factory.PodType: + pod, ok := obj.(*corev1.Pod) + if !ok { + return fmt.Errorf("could not cast %T object to *corev1.Pod", obj) + } + err := h.ncc.podIPAllocator.Reconcile(pod, nil) + if err != nil { + klog.Infof("Pod delete failed for %s/%s, will try again later: %v", + pod.Namespace, pod.Name, err) + } case factory.NodeType: node, ok := obj.(*corev1.Node) if !ok { @@ -210,6 +286,8 @@ func (h *networkClusterControllerEventHandler) SyncFunc(objs []interface{}) erro syncFunc = h.syncFunc } else { switch h.objType { + case factory.PodType: + syncFunc = h.ncc.podIPAllocator.Sync case factory.NodeType: syncFunc = h.ncc.hostSubnetAllocator.Sync @@ -247,8 +325,9 @@ func (h *networkClusterControllerEventHandler) IsResourceScheduled(obj interface return true } -// IsObjectInTerminalState returns true if the object is a in terminal state. Always returns true. +// IsObjectInTerminalState returns true if the object is a in terminal state. Always returns false. func (h *networkClusterControllerEventHandler) IsObjectInTerminalState(obj interface{}) bool { + // Note: the pod IP allocator needs to be aware when pods are deleted return false } @@ -281,10 +360,10 @@ func (h *networkClusterControllerEventHandler) GetInternalCacheEntry(obj interfa // given an object key and its type func (h *networkClusterControllerEventHandler) GetResourceFromInformerCache(key string) (interface{}, error) { var obj interface{} - var name string + var namespace, name string var err error - _, name, err = cache.SplitMetaNamespaceKey(key) + namespace, name, err = cache.SplitMetaNamespaceKey(key) if err != nil { return nil, fmt.Errorf("failed to split key %s: %v", key, err) } @@ -292,7 +371,8 @@ func (h *networkClusterControllerEventHandler) GetResourceFromInformerCache(key switch h.objType { case factory.NodeType: obj, err = h.ncc.watchFactory.GetNode(name) - + case factory.PodType: + obj, err = h.ncc.watchFactory.GetPod(namespace, name) default: err = fmt.Errorf("object type %s not supported, cannot retrieve it from informers cache", h.objType) diff --git a/go-controller/pkg/clustermanager/pod/ip_allocator.go b/go-controller/pkg/clustermanager/pod/ip_allocator.go new file mode 100644 index 0000000000..aada337ac5 --- /dev/null +++ b/go-controller/pkg/clustermanager/pod/ip_allocator.go @@ -0,0 +1,268 @@ +package pod + +import ( + "fmt" + "net" + "sync" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + listers "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// PodIPAllocator acts on pods events handed off by the cluster network +// controller and allocates or releases IPs for them updating the pod annotation +// as necessary with all the additional information derived from those IPs. +type PodIPAllocator struct { + netInfo util.NetInfo + + // allocator of IPs within subnets + allocator subnet.Allocator + + // An utility to allocate the PodAnnotation to pods + podAnnotationAllocator *pod.PodAnnotationAllocator + + // track pods that have been released but not deleted yet so that we don't + // release more than once + releasedPods map[string]sets.Set[string] + releasedPodsMutex sync.Mutex +} + +// NewPodIPAllocator builds a new PodIPAllocator +func NewPodIPAllocator(netInfo util.NetInfo, podLister listers.PodLister, kube kube.Interface) *PodIPAllocator { + podAnnotationAllocator := pod.NewPodAnnotationAllocator( + netInfo, + podLister, + kube, + ) + + podIPAllocator := &PodIPAllocator{ + netInfo: netInfo, + releasedPods: map[string]sets.Set[string]{}, + releasedPodsMutex: sync.Mutex{}, + podAnnotationAllocator: podAnnotationAllocator, + } + + // this network might not have IPAM, we will just allocate MAC addresses + if util.DoesNetworkRequireIPAM(netInfo) { + podIPAllocator.allocator = subnet.NewAllocator() + } + + return podIPAllocator +} + +// InitRanges initializes the allocator with the subnets configured for the +// network +func (a *PodIPAllocator) InitRanges() error { + if a.netInfo.TopologyType() != types.LocalnetTopology { + return fmt.Errorf("topology %s not supported", a.netInfo.TopologyType()) + } + + subnets := a.netInfo.Subnets() + ipNets := make([]*net.IPNet, 0, len(subnets)) + for _, subnet := range subnets { + ipNets = append(ipNets, subnet.CIDR) + } + return a.allocator.AddOrUpdateSubnet(a.netInfo.GetNetworkName(), ipNets, a.netInfo.ExcludeSubnets()...) +} + +// Reconcile allocates or releases IPs for pods updating the pod annotation +// as necessary with all the additional information derived from those IPs +func (a *PodIPAllocator) Reconcile(old, new *corev1.Pod) error { + releaseIPsFromAllocator := true + return a.reconcile(old, new, releaseIPsFromAllocator) +} + +// Sync initializes the allocator with pods that already exist on the cluster +func (a *PodIPAllocator) Sync(objs []interface{}) error { + // on sync, we don't release IPs from the allocator, we are just trying to + // allocate annotated IPs; specifically we don't want to release IPs of + // completed pods that might be being used by other pods + releaseIPsFromAllocator := false + + for _, obj := range objs { + pod, ok := obj.(*corev1.Pod) + if !ok { + klog.Errorf("Could not cast %T object to *corev1.Pod", obj) + continue + } + err := a.reconcile(nil, pod, releaseIPsFromAllocator) + if err != nil { + klog.Errorf("Failed to sync pod %s/%s: %w", pod.Namespace, pod.Name, err) + } + } + + return nil +} + +func (a *PodIPAllocator) reconcile(old, new *corev1.Pod, releaseIPsFromAllocator bool) error { + var pod *corev1.Pod + if old != nil { + pod = old + } + if new != nil { + pod = new + } + + podScheduled := util.PodScheduled(pod) + podWantsHostNetwork := util.PodWantsHostNetwork(pod) + + // nothing to do for a unscheduled or host network pods + if !podScheduled || podWantsHostNetwork { + return nil + } + + onNetwork, networkMap, err := util.GetPodNADToNetworkMapping(pod, a.netInfo) + if err != nil { + return fmt.Errorf("failed to get NAD to network mapping: %w", err) + } + + // nothing to do if not on this network + // Note: we are not considering a hotplug scenario where we would have to + // release IPs if the pod was unplugged from the network + if !onNetwork { + return nil + } + + // reconcile for each NAD + for nadName, network := range networkMap { + err = a.reconcileForNAD(old, new, nadName, network, releaseIPsFromAllocator) + if err != nil { + return err + } + } + + return nil +} + +func (a *PodIPAllocator) reconcileForNAD(old, new *corev1.Pod, nad string, network *nettypes.NetworkSelectionElement, releaseIPsFromAllocator bool) error { + var pod *corev1.Pod + if old != nil { + pod = old + } + if new != nil { + pod = new + } + podDeleted := new == nil + podCompleted := util.PodCompleted(pod) + + if podCompleted || podDeleted { + return a.releasePodOnNAD(pod, nad, podDeleted, releaseIPsFromAllocator) + } + + return a.allocatePodOnNAD(pod, nad, network) +} + +func (a *PodIPAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted, releaseIPsFromAllocator bool) error { + if !util.DoesNetworkRequireIPAM(a.netInfo) { + // no need to release if no IPAM + return nil + } + + podAnnotation, _ := util.UnmarshalPodAnnotation(pod.Annotations, nad) + if podAnnotation == nil { + // track release pods even if they have no annotation in case a user + // might have removed it manually + podAnnotation = &util.PodAnnotation{} + } + + uid := string(pod.UID) + + // do not release IPs from the allocator if not flaged to do so or if they + // werea already previosuly released + if releaseIPsFromAllocator && !a.isPodReleased(nad, uid) { + err := a.allocator.ReleaseIPs(a.netInfo.GetNetworkName(), podAnnotation.IPs) + if err != nil { + return fmt.Errorf("failed to release ips %v for pod %s/%s and nad %s: %w", + util.StringSlice(podAnnotation.IPs), + pod.Name, + pod.Namespace, + nad, + err, + ) + } + } + + if podDeleted { + a.deleteReleasedPod(nad, string(pod.UID)) + } else { + a.addReleasedPod(nad, string(pod.UID)) + } + + return nil +} + +func (a *PodIPAllocator) allocatePodOnNAD(pod *corev1.Pod, nad string, network *nettypes.NetworkSelectionElement) error { + var ipAllocator subnet.NamedAllocator + if util.DoesNetworkRequireIPAM(a.netInfo) { + ipAllocator = a.allocator.ForSubnet(a.netInfo.GetNetworkName()) + } + + // don't reallocate to new IPs if currently annotated IPs fail to alloccate + reallocate := false + + updatedPod, podAnnotation, err := a.podAnnotationAllocator.AllocatePodAnnotation( + ipAllocator, + pod, + network, + reallocate, + ) + + if err != nil { + return err + } + + if updatedPod != nil { + klog.V(5).Infof("Allocated IP addresses %v, mac address %s, gateways %v and routes %s for pod %s/%s on nad %s", + util.StringSlice(podAnnotation.IPs), + podAnnotation.MAC, + util.StringSlice(podAnnotation.Gateways), + util.StringSlice(podAnnotation.Routes), + pod.Namespace, pod.Name, nad, + ) + } + + return err +} + +func (a *PodIPAllocator) addReleasedPod(nad, uid string) { + a.releasedPodsMutex.Lock() + defer a.releasedPodsMutex.Unlock() + releasedPods := a.releasedPods[nad] + if releasedPods == nil { + a.releasedPods[nad] = sets.New(uid) + return + } + releasedPods.Insert(uid) +} + +func (a *PodIPAllocator) deleteReleasedPod(nad, uid string) { + a.releasedPodsMutex.Lock() + defer a.releasedPodsMutex.Unlock() + releasedPods := a.releasedPods[nad] + if releasedPods != nil { + releasedPods.Delete(uid) + if releasedPods.Len() == 0 { + delete(a.releasedPods, nad) + } + } +} + +func (a *PodIPAllocator) isPodReleased(nad, uid string) bool { + a.releasedPodsMutex.Lock() + defer a.releasedPodsMutex.Unlock() + releasedPods := a.releasedPods[nad] + if releasedPods != nil { + return releasedPods.Has(uid) + } + return false +} diff --git a/go-controller/pkg/clustermanager/pod/ip_allocator_test.go b/go-controller/pkg/clustermanager/pod/ip_allocator_test.go new file mode 100644 index 0000000000..1f9d28306b --- /dev/null +++ b/go-controller/pkg/clustermanager/pod/ip_allocator_test.go @@ -0,0 +1,347 @@ +package pod + +import ( + "encoding/json" + "fmt" + "net" + "sync" + "testing" + + "github.com/stretchr/testify/mock" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" + ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + apitypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + + kubemocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube/mocks" + v1mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1" +) + +type testPod struct { + scheduled bool + hostNetwork bool + completed bool + network *nadapi.NetworkSelectionElement +} + +func (p testPod) getPod(t *testing.T) *corev1.Pod { + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + UID: apitypes.UID("pod"), + Namespace: "namespace", + Annotations: map[string]string{}, + }, + Spec: corev1.PodSpec{ + HostNetwork: p.hostNetwork, + }, + Status: corev1.PodStatus{}, + } + if p.scheduled { + pod.Spec.NodeName = "node" + } + if p.completed { + pod.Status.Phase = corev1.PodSucceeded + } + + if p.network != nil { + bytes, err := json.Marshal([]*nadapi.NetworkSelectionElement{p.network}) + if err != nil { + t.Fatalf("Invalid network selection") + } + pod.ObjectMeta.Annotations[nadapi.NetworkAttachmentAnnot] = string(bytes) + } + + return pod +} + +type allocatorStub struct { + released bool +} + +func (a *allocatorStub) AddOrUpdateSubnet(name string, subnets []*net.IPNet, excludeSubnets ...*net.IPNet) error { + panic("not implemented") // TODO: Implement +} + +func (a allocatorStub) DeleteSubnet(name string) { + panic("not implemented") // TODO: Implement +} + +func (a *allocatorStub) GetSubnets(name string) ([]*net.IPNet, error) { + panic("not implemented") // TODO: Implement +} + +func (a *allocatorStub) AllocateUntilFull(name string) error { + panic("not implemented") // TODO: Implement +} + +func (a *allocatorStub) AllocateIPs(name string, ips []*net.IPNet) error { + panic("not implemented") // TODO: Implement +} + +func (a *allocatorStub) AllocateNextIPs(name string) ([]*net.IPNet, error) { + panic("not implemented") // TODO: Implement +} + +func (a *allocatorStub) ReleaseIPs(name string, ips []*net.IPNet) error { + a.released = true + return nil +} + +func (a *allocatorStub) ConditionalIPRelease(name string, ips []*net.IPNet, predicate func() (bool, error)) (bool, error) { + panic("not implemented") // TODO: Implement +} + +func (a *allocatorStub) ForSubnet(name string) subnet.NamedAllocator { + return nil +} + +func TestPodIPAllocator_reconcileForNAD(t *testing.T) { + type args struct { + old *testPod + new *testPod + release bool + } + tests := []struct { + name string + args args + ipam bool + tracked bool + expectAllocate bool + expectRelease bool + expectTracked bool + }{ + { + name: "Pod not scheduled", + args: args{ + new: &testPod{}, + }, + }, + { + name: "Pod on host network", + args: args{ + new: &testPod{ + hostNetwork: true, + }, + }, + }, + { + name: "Pod not on network", + args: args{ + new: &testPod{ + scheduled: true, + }, + }, + }, + { + name: "Pod on network", + args: args{ + new: &testPod{ + scheduled: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + }, + expectAllocate: true, + }, + { + name: "Pod completed, release inactive", + ipam: true, + args: args{ + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + }, + expectTracked: true, + }, + { + name: "Pod completed, release active, not previously released", + ipam: true, + args: args{ + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + expectRelease: true, + expectTracked: true, + }, + { + name: "Pod completed, release active, not previously released, no IPAM", + args: args{ + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + }, + { + name: "Pod completed, release active, previously released", + ipam: true, + args: args{ + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + tracked: true, + expectTracked: true, + }, + { + name: "Pod deleted, not scheduled", + ipam: true, + args: args{ + old: &testPod{}, + }, + }, + { + name: "Pod deleted, on host network", + ipam: true, + args: args{ + old: &testPod{ + hostNetwork: true, + }, + }, + }, + { + name: "Pod deleted, not on network", + ipam: true, + args: args{ + old: &testPod{ + scheduled: true, + }, + }, + }, + { + name: "Pod deleted, not previously released", + ipam: true, + args: args{ + old: &testPod{ + scheduled: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + expectRelease: true, + }, + { + name: "Pod deleted, previously released", + ipam: true, + args: args{ + old: &testPod{ + scheduled: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + tracked: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + ipallocator := &allocatorStub{} + + podListerMock := &v1mocks.PodLister{} + kubeMock := &kubemocks.Interface{} + podNamespaceLister := &v1mocks.PodNamespaceLister{} + + podListerMock.On("Pods", mock.AnythingOfType("string")).Return(podNamespaceLister) + + var allocated bool + kubeMock.On("UpdatePod", mock.AnythingOfType(fmt.Sprintf("%T", &corev1.Pod{}))).Run( + func(args mock.Arguments) { + allocated = true + }, + ).Return(nil) + + netConf := &ovncnitypes.NetConf{ + Topology: types.LocalnetTopology, + } + if tt.ipam { + netConf.Subnets = "10.1.130.0/24" + } + + netInfo, err := util.NewNetInfo(netConf) + if err != nil { + t.Fatalf("Invalid netConf") + } + netInfo.AddNAD("namespace/nad") + + podAnnotationAllocator := pod.NewPodAnnotationAllocator( + netInfo, + podListerMock, + kubeMock, + ) + + a := &PodIPAllocator{ + netInfo: netInfo, + allocator: ipallocator, + podAnnotationAllocator: podAnnotationAllocator, + releasedPods: map[string]sets.Set[string]{}, + releasedPodsMutex: sync.Mutex{}, + } + + var old, new *corev1.Pod + if tt.args.old != nil { + old = tt.args.old.getPod(t) + } + if tt.args.new != nil { + new = tt.args.new.getPod(t) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(new, nil) + } + + if tt.tracked { + a.releasedPods["namespace/nad"] = sets.New("pod") + } + + err = a.reconcile(old, new, tt.args.release) + if err != nil { + t.Errorf("reconcile failed: %v", err) + } + + if tt.expectAllocate != allocated { + t.Errorf("expected pod ips allocated to be %v but it was %v", tt.expectAllocate, allocated) + } + + if tt.expectRelease != ipallocator.released { + t.Errorf("expected pod ips released to be %v but it was %v", tt.expectRelease, ipallocator.released) + } + + if tt.expectTracked != a.releasedPods["namespace/nad"].Has("pod") { + t.Errorf("expected pod tracked to be %v but it was %v", tt.expectTracked, a.releasedPods["namespace/nad"].Has("pod")) + } + }) + } +} diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index dd6c09dded..624e8a4a4d 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -536,6 +536,13 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset } } + if config.OVNKubernetesFeature.EnableInterconnect && config.OVNKubernetesFeature.EnableMultiNetwork { + wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, defaultNumEventQueues) + if err != nil { + return nil, err + } + } + return wf, nil } diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 30b1928927..f851ba8818 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -10,6 +10,7 @@ import ( nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" ipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" subnetipallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" logicalswitchmanager "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -312,6 +313,12 @@ func (bnc *BaseNetworkController) findPodWithIPAddresses(needleIPs []net.IP) (*k // canReleasePodIPs checks if the podIPs can be released or not. func (bnc *BaseNetworkController) canReleasePodIPs(podIfAddrs []*net.IPNet) (bool, error) { + // in certain configurations IP allocation is handled by cluster manager so + // we can locally release the IPs without checking + if !bnc.handlesPodIPAllocation() { + return true, nil + } + var needleIPs []net.IP for _, podIPNet := range podIfAddrs { needleIPs = append(needleIPs, podIPNet.IP) @@ -851,6 +858,18 @@ func (bnc *BaseNetworkController) allocatePodAnnotationForSecondaryNetwork(pod * return nil, false, err } + // In certain configurations, pod IP allocation is handled from cluster + // manager so wait for it to allocate the IPs + if !bnc.handlesPodIPAllocation() { + podAnnotation, _ := util.UnmarshalPodAnnotation(pod.Annotations, nadName) + if !util.IsValidPodAnnotation(podAnnotation) { + return nil, false, fmt.Errorf("failed to get PodAnnotation for %s/%s/%s, cluster manager might have not allocated it yet", + nadName, pod.Namespace, pod.Name) + } + + return podAnnotation, false, nil + } + if network == nil { network = &nadapi.NetworkSelectionElement{} } @@ -900,3 +919,13 @@ func (bnc *BaseNetworkController) allocatePodAnnotationForSecondaryNetwork(pod * return podAnnotation, false, nil } + +func (bnc *BaseNetworkController) handlesPodIPAllocation() bool { + // the controller is in charge of pod IP allocation except L2 topologies + // with IPAM on interconnect + switch bnc.NetInfo.TopologyType() { + case ovntypes.Layer2Topology, ovntypes.LocalnetTopology: + return !config.OVNKubernetesFeature.EnableInterconnect || !bnc.doesNetworkRequireIPAM() + } + return true +} diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index 3239eb2f72..6e0f2a91a4 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -379,6 +379,11 @@ func (bsnc *BaseSecondaryNetworkController) removeLocalZonePodForSecondaryNetwor return err } + // do not release IP address if this controller does not handle IP allocation + if !bsnc.handlesPodIPAllocation() { + continue + } + // do not release IP address unless we have validated no other pod is using it if pInfo == nil { continue @@ -426,7 +431,7 @@ func (bsnc *BaseSecondaryNetworkController) syncPodsForSecondaryNetwork(pods []i } continue } - if bsnc.doesNetworkRequireIPAM() { + if bsnc.doesNetworkRequireIPAM() && bsnc.handlesPodIPAllocation() { expectedLogicalPortName, err := bsnc.allocatePodIPs(pod, annotations, nadName) if err != nil { return err diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller.go b/go-controller/pkg/ovn/secondary_layer2_network_controller.go index a9dbd74803..11e4b76e6c 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller.go @@ -49,11 +49,13 @@ func NewSecondaryLayer2NetworkController(cnci *CommonNetworkControllerInfo, netI }, } - podAnnotationAllocator := pod.NewPodAnnotationAllocator( - netInfo, - cnci.watchFactory.PodCoreInformer().Lister(), - cnci.kube) - oc.podAnnotationAllocator = podAnnotationAllocator + if oc.handlesPodIPAllocation() { + podAnnotationAllocator := pod.NewPodAnnotationAllocator( + netInfo, + cnci.watchFactory.PodCoreInformer().Lister(), + cnci.kube) + oc.podAnnotationAllocator = podAnnotationAllocator + } // disable multicast support for secondary networks // TBD: changes needs to be made to support multicast in secondary networks diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index f5b801e06e..c3a910ab3b 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -289,11 +289,13 @@ func NewSecondaryLayer3NetworkController(cnci *CommonNetworkControllerInfo, netI zoneICHandler: zoneICHandler, } - podAnnotationAllocator := pod.NewPodAnnotationAllocator( - netInfo, - cnci.watchFactory.PodCoreInformer().Lister(), - cnci.kube) - oc.podAnnotationAllocator = podAnnotationAllocator + if oc.handlesPodIPAllocation() { + podAnnotationAllocator := pod.NewPodAnnotationAllocator( + netInfo, + cnci.watchFactory.PodCoreInformer().Lister(), + cnci.kube) + oc.podAnnotationAllocator = podAnnotationAllocator + } // disable multicast support for secondary networks // TBD: changes needs to be made to support multicast in secondary networks diff --git a/go-controller/pkg/ovn/secondary_localnet_network_controller.go b/go-controller/pkg/ovn/secondary_localnet_network_controller.go index a5a259d233..6dd498bb9d 100644 --- a/go-controller/pkg/ovn/secondary_localnet_network_controller.go +++ b/go-controller/pkg/ovn/secondary_localnet_network_controller.go @@ -51,11 +51,13 @@ func NewSecondaryLocalnetNetworkController(cnci *CommonNetworkControllerInfo, ne }, } - podAnnotationAllocator := pod.NewPodAnnotationAllocator( - netInfo, - cnci.watchFactory.PodCoreInformer().Lister(), - cnci.kube) - oc.podAnnotationAllocator = podAnnotationAllocator + if oc.handlesPodIPAllocation() { + podAnnotationAllocator := pod.NewPodAnnotationAllocator( + netInfo, + cnci.watchFactory.PodCoreInformer().Lister(), + cnci.kube) + oc.podAnnotationAllocator = podAnnotationAllocator + } // disable multicast support for secondary networks // TBD: changes needs to be made to support multicast in secondary networks From 7821862abeeb259a94f4844b69aa1ac105acd9b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Fri, 26 May 2023 21:20:05 +0000 Subject: [PATCH 17/31] Enable localnet topology with interconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jaime Caamaño Ruiz --- .../secondary_network_cluster_manager.go | 31 +++++++++++++------ .../network_controller_manager.go | 3 -- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index 9c61df1d80..47c37662fe 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -8,6 +8,7 @@ import ( "k8s.io/klog/v2" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" nad "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/network-attach-def-controller" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -93,19 +94,29 @@ func (sncm *secondaryNetworkClusterManager) Stop() { // interface function. This function is called by the net-attach-def controller when // a layer2 or layer3 secondary network is created. Layer2 type is not handled here. func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetInfo) (nad.NetworkController, error) { - topoType := nInfo.TopologyType() - if topoType == ovntypes.Layer3Topology { - networkId, err := sncm.networkIDAllocator.allocateID(nInfo.GetNetworkName()) - if err != nil { - return nil, fmt.Errorf("failed to create NetworkController for secondary layer3 network %s : %w", nInfo.GetNetworkName(), err) - } + if !sncm.isTopologyManaged(nInfo) { + return nil, nad.ErrNetworkControllerTopologyNotManaged + } + + klog.Infof("Creating new network controller for network %s of topology %s", nInfo.GetNetworkName(), nInfo.TopologyType()) - sncc := newNetworkClusterController(networkId, nInfo, sncm.ovnClient, sncm.watchFactory) - return sncc, nil + networkId, err := sncm.networkIDAllocator.allocateID(nInfo.GetNetworkName()) + if err != nil { + return nil, fmt.Errorf("failed to create NetworkController for secondary layer3 network %s : %w", nInfo.GetNetworkName(), err) } - // Secondary network cluster manager doesn't manage other topology types - return nil, nad.ErrNetworkControllerTopologyNotManaged + sncc := newNetworkClusterController(networkId, nInfo, sncm.ovnClient, sncm.watchFactory) + return sncc, nil +} + +func (sncm *secondaryNetworkClusterManager) isTopologyManaged(nInfo util.NetInfo) bool { + switch nInfo.TopologyType() { + case ovntypes.Layer3Topology: + return true + case ovntypes.LocalnetTopology: + return config.OVNKubernetesFeature.EnableInterconnect && len(nInfo.Subnets()) > 0 + } + return false } // CleanupDeletedNetworks implements the networkAttachDefController.NetworkControllerManager diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index d0133485f8..003949df21 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -73,9 +73,6 @@ func (cm *networkControllerManager) NewNetworkController(nInfo util.NetInfo) (na } return ovn.NewSecondaryLayer2NetworkController(cnci, nInfo), nil case ovntypes.LocalnetTopology: - if config.OVNKubernetesFeature.EnableInterconnect { - return nil, fmt.Errorf("topology type %s not supported when Interconnect feature is enabled", topoType) - } return ovn.NewSecondaryLocalnetNetworkController(cnci, nInfo), nil } return nil, fmt.Errorf("topology type %s not supported", topoType) From ef362a8244fffab06395d1a8e3811bde42597ec4 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Sat, 17 Jun 2023 21:05:06 +0200 Subject: [PATCH 18/31] podSelectorAddressSet: fix deleting namespaceHandler. namespace and pod handlers need different commands to be cleaned up. we used to call RemovePodHandler for both namespace and pod handlers Signed-off-by: Nadia Pinaeva --- .../pkg/ovn/pod_selector_address_set.go | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/go-controller/pkg/ovn/pod_selector_address_set.go b/go-controller/pkg/ovn/pod_selector_address_set.go index 1cffbbf1e3..aecca57d1e 100644 --- a/go-controller/pkg/ovn/pod_selector_address_set.go +++ b/go-controller/pkg/ovn/pod_selector_address_set.go @@ -38,7 +38,8 @@ type PodSelectorAddressSet struct { backRefs map[string]bool // handler is either pod or namespace handler - handler *factory.Handler + nsHandler *factory.Handler + podHandler *factory.Handler podSelector labels.Selector namespaceSelector labels.Selector @@ -177,7 +178,7 @@ func (psas *PodSelectorAddressSet) init(bnc *BaseNetworkController) error { } var err error - if psas.handler == nil { + if psas.nsHandler == nil && psas.podHandler == nil { if psas.namespace != "" { // static namespace if psas.podSelector.Empty() { @@ -216,9 +217,13 @@ func (psas *PodSelectorAddressSet) destroy(bnc *BaseNetworkController) error { return fmt.Errorf("failed to delete handler resources: %w", err) } } - if psas.handler != nil { - bnc.watchFactory.RemovePodHandler(psas.handler) - psas.handler = nil + if psas.podHandler != nil { + bnc.watchFactory.RemovePodHandler(psas.podHandler) + psas.podHandler = nil + } + if psas.nsHandler != nil { + bnc.watchFactory.RemoveNamespaceHandler(psas.nsHandler) + psas.nsHandler = nil } psas.needsCleanup = false return nil @@ -243,7 +248,7 @@ func (bnc *BaseNetworkController) addPodSelectorHandler(psAddrSet *PodSelectorAd klog.Errorf("Failed WatchResource for addPodSelectorHandler: %v", err) return err } - psAddrSet.handler = podHandler + psAddrSet.podHandler = podHandler return nil } @@ -265,7 +270,7 @@ func (bnc *BaseNetworkController) addNamespacedPodSelectorHandler(psAddrSet *Pod return err } - psAddrSet.handler = namespaceHandler + psAddrSet.nsHandler = namespaceHandler return nil } From 5d6b136b074aff19d551243f4cd8f51840a88af3 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Mon, 26 Jun 2023 21:24:01 +0200 Subject: [PATCH 19/31] netpolEventHandler: fix stopChan. We used to pass oc.stopChan to the retryFramework for network policy handlers, but that means that retry loop for failed objects will not be stopped on network policy delete, therefore leaking goroutines. Create getChildStopChan function to pass stop signal both on oc and network policy delete. Signed-off-by: Nadia Pinaeva --- .../pkg/ovn/base_network_controller_policy.go | 17 +++++++- ...network_controller_policy_event_handler.go | 8 ++-- .../pkg/ovn/pod_selector_address_set.go | 18 +++++++- .../pkg/ovn/pod_selector_address_set_test.go | 31 +++++++++++++- go-controller/pkg/ovn/policy_test.go | 42 +++++++++++++++++++ go-controller/pkg/util/sync.go | 27 ++++++++++++ 6 files changed, 135 insertions(+), 8 deletions(-) create mode 100644 go-controller/pkg/util/sync.go diff --git a/go-controller/pkg/ovn/base_network_controller_policy.go b/go-controller/pkg/ovn/base_network_controller_policy.go index 0459dbab2a..6240fa7188 100644 --- a/go-controller/pkg/ovn/base_network_controller_policy.go +++ b/go-controller/pkg/ovn/base_network_controller_policy.go @@ -15,6 +15,8 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + kapi "k8s.io/api/core/v1" knet "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -182,6 +184,8 @@ type networkPolicy struct { // or this value will be set to true and handler can't proceed. // Use networkPolicy.RLock to read this field and hold it for the whole event handling. deleted bool + + stopChan chan struct{} } func NewNetworkPolicy(policy *knet.NetworkPolicy) *networkPolicy { @@ -818,7 +822,8 @@ func (bnc *BaseNetworkController) addLocalPodHandler(policy *knet.NetworkPolicy, syncFunc, &NetworkPolicyExtraParameters{ np: np, - }) + }, + np.stopChan) podHandler, err := retryLocalPods.WatchResourceFiltered(policy.Namespace, sel) if err != nil { @@ -1014,6 +1019,10 @@ func (bnc *BaseNetworkController) createNetworkPolicy(policy *knet.NetworkPolicy np.Unlock() npLocked = false + if np.stopChan == nil { + np.stopChan = util.GetChildStopChan(bnc.stopChan) + } + // 6. Start peer handlers to update all allow rules first for _, handler := range policyHandlers { // For each peer namespace selector, we create a watcher that @@ -1440,6 +1449,7 @@ func (bnc *BaseNetworkController) addPeerNamespaceHandler( factory.PeerNamespaceSelectorType, syncFunc, &NetworkPolicyExtraParameters{gp: gress, np: np}, + np.stopChan, ) namespaceHandler, err := retryPeerNamespaces.WatchResourceFiltered("", sel) @@ -1453,6 +1463,11 @@ func (bnc *BaseNetworkController) addPeerNamespaceHandler( } func (bnc *BaseNetworkController) shutdownHandlers(np *networkPolicy) { + if np.stopChan != nil { + close(np.stopChan) + np.stopChan = nil + } + if np.localPodHandler != nil { bnc.watchFactory.RemovePodHandler(np.localPodHandler) np.localPodHandler = nil diff --git a/go-controller/pkg/ovn/network_controller_policy_event_handler.go b/go-controller/pkg/ovn/network_controller_policy_event_handler.go index 754118687a..85e397cabe 100644 --- a/go-controller/pkg/ovn/network_controller_policy_event_handler.go +++ b/go-controller/pkg/ovn/network_controller_policy_event_handler.go @@ -2,11 +2,10 @@ package ovn import ( "fmt" - "reflect" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "reflect" kapi "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" @@ -27,7 +26,8 @@ import ( func (bnc *BaseNetworkController) newNetpolRetryFramework( objectType reflect.Type, syncFunc func([]interface{}) error, - extraParameters interface{}) *retry.RetryFramework { + extraParameters interface{}, + stopChan <-chan struct{}) *retry.RetryFramework { eventHandler := &networkControllerPolicyEventHandler{ objType: objectType, watchFactory: bnc.watchFactory, @@ -42,7 +42,7 @@ func (bnc *BaseNetworkController) newNetpolRetryFramework( EventHandler: eventHandler, } return retry.NewRetryFramework( - bnc.stopChan, + stopChan, bnc.wg, bnc.watchFactory, resourceHandler, diff --git a/go-controller/pkg/ovn/pod_selector_address_set.go b/go-controller/pkg/ovn/pod_selector_address_set.go index aecca57d1e..805b2b59d6 100644 --- a/go-controller/pkg/ovn/pod_selector_address_set.go +++ b/go-controller/pkg/ovn/pod_selector_address_set.go @@ -52,6 +52,8 @@ type PodSelectorAddressSet struct { // handlerResources holds the data that is used and updated by the handlers. handlerResources *PodSelectorAddrSetHandlerInfo + + stopChan chan struct{} } // EnsurePodSelectorAddressSet returns address set for requested (podSelector, namespaceSelector, namespace). @@ -159,6 +161,9 @@ func (bnc *BaseNetworkController) DeletePodSelectorAddressSet(addrSetKey, backRe func (psas *PodSelectorAddressSet) init(bnc *BaseNetworkController) error { // create pod handler resources before starting the handlers + if psas.stopChan == nil { + psas.stopChan = util.GetChildStopChan(bnc.stopChan) + } if psas.handlerResources == nil { as, err := bnc.addressSetFactory.NewAddressSet(psas.addrSetDbIDs, nil) if err != nil { @@ -174,6 +179,7 @@ func (psas *PodSelectorAddressSet) init(bnc *BaseNetworkController) error { netInfo: bnc.NetInfo, ipv4Mode: ipv4Mode, ipv6Mode: ipv6Mode, + stopChan: psas.stopChan, } } @@ -210,6 +216,11 @@ func (psas *PodSelectorAddressSet) init(bnc *BaseNetworkController) error { func (psas *PodSelectorAddressSet) destroy(bnc *BaseNetworkController) error { klog.Infof("Deleting shared address set for pod selector %s", psas.key) + if psas.stopChan != nil { + close(psas.stopChan) + psas.stopChan = nil + } + psas.needsCleanup = true if psas.handlerResources != nil { err := psas.handlerResources.destroy(bnc) @@ -241,7 +252,8 @@ func (bnc *BaseNetworkController) addPodSelectorHandler(psAddrSet *PodSelectorAd retryFramework := bnc.newNetpolRetryFramework( factory.AddressSetPodSelectorType, syncFunc, - podHandlerResources) + podHandlerResources, + psAddrSet.stopChan) podHandler, err := retryFramework.WatchResourceFiltered(namespace, podSelector) if err != nil { @@ -263,6 +275,7 @@ func (bnc *BaseNetworkController) addNamespacedPodSelectorHandler(psAddrSet *Pod factory.AddressSetNamespaceAndPodSelectorType, nil, psAddrSet.handlerResources, + psAddrSet.stopChan, ) namespaceHandler, err := retryFramework.WatchResourceFiltered("", psAddrSet.namespaceSelector) if err != nil { @@ -306,6 +319,8 @@ type PodSelectorAddrSetHandlerInfo struct { netInfo util.NetInfo ipv4Mode bool ipv6Mode bool + + stopChan chan struct{} } // idempotent @@ -540,6 +555,7 @@ func (bnc *BaseNetworkController) handleNamespaceAddUpdate(podHandlerInfo *PodSe factory.AddressSetPodSelectorType, syncFunc, podHandlerInfo, + podHandlerInfo.stopChan, ) // syncFunc and factory.AddressSetPodSelectorType add event handler also take np.RLock, // and will be called form the same thread. The same thread shouldn't take the same rlock twice. diff --git a/go-controller/pkg/ovn/pod_selector_address_set_test.go b/go-controller/pkg/ovn/pod_selector_address_set_test.go index 3295a9016c..87724cc683 100644 --- a/go-controller/pkg/ovn/pod_selector_address_set_test.go +++ b/go-controller/pkg/ovn/pod_selector_address_set_test.go @@ -13,6 +13,7 @@ import ( libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "net" + "runtime" "time" v1 "k8s.io/api/core/v1" @@ -141,10 +142,9 @@ var _ = ginkgo.Describe("OVN PodSelectorAddressSet", func() { }) ginkgo.It("creates one address set for multiple users with the same selector", func() { namespace1 := *newNamespace(namespaceName1) - namespace2 := *newNamespace(namespaceName2) networkPolicy1 := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, "", "label1", true, true) - networkPolicy2 := getMatchLabelsNetworkPolicy(netPolicyName2, namespace2.Name, + networkPolicy2 := getMatchLabelsNetworkPolicy(netPolicyName2, namespace1.Name, "", "label1", true, true) startOvn(initialDB, []v1.Namespace{namespace1}, []knet.NetworkPolicy{*networkPolicy1, *networkPolicy2}, nil, nil) @@ -468,6 +468,33 @@ var _ = ginkgo.Describe("OVN PodSelectorAddressSet", func() { // should not be present in given address set eventuallyExpectEmptyAddressSetsExist(fakeOvn, peer, namespace1.Name) }) + ginkgo.It("cleans up retryFramework resources", func() { + namespace1 := *newNamespace(namespaceName1) + namespace1.Labels = map[string]string{"key": "value"} + startOvn(initialDB, []v1.Namespace{namespace1}, nil, nil, nil) + selector := &metav1.LabelSelector{ + MatchLabels: map[string]string{"key": "value"}, + } + + goroutinesNumInit := runtime.NumGoroutine() + // namespace selector will be run because it is not empty. + // one namespace should match the label and start a pod watchFactory. + // that gives us 2 retryFrameworks, so 2 periodicallyRetryResources goroutines. + // The request itself will create one child stopChannel, that is one more goroutine. + peerASKey, _, _, err := fakeOvn.controller.EnsurePodSelectorAddressSet( + selector, selector, namespaceName1, "backRef") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() int { + return runtime.NumGoroutine() + }).Should(gomega.Equal(goroutinesNumInit + 3)) + + err = fakeOvn.controller.DeletePodSelectorAddressSet(peerASKey, "backRef") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // expect goroutines number to get back + gomega.Eventually(func() int { + return runtime.NumGoroutine() + }).Should(gomega.Equal(goroutinesNumInit)) + }) }) var _ = ginkgo.Describe("shortLabelSelectorString function", func() { diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index 467df8e662..fb41919ae0 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net" + "runtime" "sort" "strconv" "time" @@ -1853,6 +1854,47 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { } gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) }) + + ginkgo.It("cleans up retryFramework resources", func() { + app.Action = func(ctx *cli.Context) error { + namespace1 := *newNamespace(namespaceName1) + namespace1.Labels = map[string]string{"name": "label1"} + networkPolicy := newNetworkPolicy(netPolicyName2, namespace1.Name, metav1.LabelSelector{}, + []knet.NetworkPolicyIngressRule{{ + From: []knet.NetworkPolicyPeer{{ + NamespaceSelector: &metav1.LabelSelector{ + MatchLabels: namespace1.Labels, + }}, + }}, + }, nil) + startOvn(initialDB, []v1.Namespace{namespace1}, nil, nil, nil) + + goroutinesNumInit := runtime.NumGoroutine() + fmt.Printf("goroutinesNumInit %v", goroutinesNumInit) + // network policy will create 1 watchFactory for local pods selector, and 1 peer namespace selector + // that gives us 2 retryFrameworks, so 2 periodicallyRetryResources goroutines. + // The networkPolicy itself will create one child stopChannel, that is one more goroutine. + _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Create(context.TODO(), networkPolicy, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() int { + return runtime.NumGoroutine() + }).Should(gomega.Equal(goroutinesNumInit + 3)) + + // Delete network policy + err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Delete(context.TODO(), networkPolicy.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // expect goroutines number to get back + gomega.Eventually(func() int { + return runtime.NumGoroutine() + }).Should(gomega.Equal(goroutinesNumInit)) + + return nil + } + + gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) + }) }) ginkgo.Context("ACL logging for network policies", func() { diff --git a/go-controller/pkg/util/sync.go b/go-controller/pkg/util/sync.go new file mode 100644 index 0000000000..3f6c8c8125 --- /dev/null +++ b/go-controller/pkg/util/sync.go @@ -0,0 +1,27 @@ +package util + +// GetChildStopChan returns a new channel that doesn't affect parentStopChan, but will be closed when +// parentStopChan is closed. May be used for child goroutines that may need to be stopped with the main goroutine or +// separately. +func GetChildStopChan(parentStopChan <-chan struct{}) chan struct{} { + childStopChan := make(chan struct{}) + + select { + case <-parentStopChan: + // parent is already canceled + close(childStopChan) + return childStopChan + default: + } + + go func() { + select { + case <-parentStopChan: + close(childStopChan) + return + case <-childStopChan: + return + } + }() + return childStopChan +} From 05228cc37b7d3e728529feae6debd23775964f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Thu, 29 Jun 2023 10:26:54 +0000 Subject: [PATCH 20/31] Fix egressip test not waiting for update processing before delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This egressip test is updating and then deleting an egressip. The expectations in between do not correctly verify if the update had been processed. Sometimes the update would not be processed at all because we get the delete event before that. Then when we process the delete, we only look at the deleted egress ip status items but not at any other status items the pod would have been previously assigned. In this specific case those other status items are not cleared and the standby egressip does not take over. This fixes the UT to correctly verify that the update has been process. But there might be another bug to fix in the egressip logic. Signed-off-by: Jaime Caamaño Ruiz --- go-controller/pkg/ovn/egressip_test.go | 116 +++++++++++++++---------- 1 file changed, 69 insertions(+), 47 deletions(-) diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 761d40acef..bd76f850ae 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -4199,20 +4199,22 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { recordedEvent := <-fakeOvn.fakeRecorder.Events gomega.Expect(recordedEvent).To(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - - assginedEIP := egressIPs1[0] - gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - eip1Obj, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) - gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + assignedEIP := egressIPs1[0] + var pas *podAssignmentState + gomega.Eventually(func(g gomega.Gomega) { + pas = getPodAssignmentState(&egressPod1) + g.Expect(pas).NotTo(gomega.BeNil()) + g.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + g.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + }).Should(gomega.Succeed()) podEIPSNAT := &nbdb.NAT{ UUID: "egressip-nat-UUID1", LogicalIP: egressPodIP[0].String(), - ExternalIP: assginedEIP, + ExternalIP: assignedEIP, ExternalIDs: map[string]string{ "name": pas.egressIPName, }, @@ -4386,15 +4388,18 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) // check the state of the cache for podKey - pas = getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - - gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) - gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + var eip1Obj *egressipv1.EgressIP + gomega.Eventually(func(g gomega.Gomega) { + pas = getPodAssignmentState(&egressPod1) + g.Expect(pas).NotTo(gomega.BeNil()) + g.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(pas.egressStatuses).To(gomega.HaveLen(2)) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) + g.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + }).Should(gomega.Succeed()) // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first fakeOvn.controller.eIPC.podAssignmentMutex.Lock() @@ -4422,12 +4427,17 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeFalse()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) + gomega.Eventually(func(g gomega.Gomega) { + pas = getPodAssignmentState(&egressPod1) + g.Expect(pas).NotTo(gomega.BeNil()) + g.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(pas.egressStatuses).To(gomega.HaveLen(2)) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) + g.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeFalse()) + }).Should(gomega.Succeed()) // add back the standby egressIP object _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) @@ -4445,12 +4455,18 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName2, status) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeTrue()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) + gomega.Eventually(func(g gomega.Gomega) { + pas = getPodAssignmentState(&egressPod1) + g.Expect(pas).NotTo(gomega.BeNil()) + g.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(pas.egressStatuses).To(gomega.HaveLen(2)) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) + g.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + }).Should(gomega.Succeed()) + gomega.Eventually(func() string { return <-fakeOvn.fakeRecorder.Events }).Should(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) @@ -4464,7 +4480,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ipOnNode1 := assginedEIP + ipOnNode1 := assignedEIP var ipOnNode2 string if ipOnNode1 == egressIP1 { ipOnNode2 = egressIP2 @@ -4506,24 +4522,32 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) } - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeTrue()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) + gomega.Eventually(func(g gomega.Gomega) { + pas = getPodAssignmentState(&egressPod1) + g.Expect(pas).NotTo(gomega.BeNil()) + g.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(pas.egressStatuses).To(gomega.HaveLen(1)) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + g.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + }).Should(gomega.Succeed()) // delete the first egressIP object and make sure the cache is updated err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName, metav1.DeleteOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) // ensure standby takes over and we do the setup for it in OVN DB - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeFalse()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName2)) + gomega.Eventually(func(g gomega.Gomega) { + pas = getPodAssignmentState(&egressPod1) + g.Expect(pas).NotTo(gomega.BeNil()) + g.Expect(pas.egressIPName).To(gomega.Equal(egressIPName2)) + eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP2.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(pas.egressStatuses).To(gomega.HaveLen(1)) + g.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + g.Expect(pas.standbyEgressIPNames).To(gomega.BeEmpty()) + }).Should(gomega.Succeed()) finalDatabaseStatewithPod = expectedDatabaseStatewithPod finalDatabaseStatewithPod = append(expectedDatabaseStatewithPod, podLSP) @@ -4576,9 +4600,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) // we don't have any egressIPs, so cache is nil - gomega.Eventually(func() bool { - return getPodAssignmentState(&egressPod1) != nil - }).Should(gomega.BeFalse()) + gomega.Expect(getPodAssignmentState(&egressPod1)).To(gomega.BeNil()) return nil } From 37e888fecb2f422fc8e58f1ca75db3a12677a53d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 7 Jun 2023 16:30:23 +0000 Subject: [PATCH 21/31] Enable multi-homing e2e tests for localnet with IC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jaime Caamaño Ruiz --- test/e2e/multihoming.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 92f4f97b9d..5ec7f6a3bb 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -60,7 +60,7 @@ var _ = Describe("Multi Homing", func() { table.DescribeTable("is able to get to the Running phase", func(netConfigParams networkAttachmentConfigParams, podConfig podConfiguration) { netConfig := newNetworkAttachmentConfig(netConfigParams) - if netConfig.topology != "layer3" { + if netConfig.topology == "layer2" { if isInterconnectEnabled() { e2eskipper.Skipf( "Secondary network with topology %s is not yet supported with multiple zones interconnect deployment", netConfig.topology, @@ -275,8 +275,8 @@ var _ = Describe("Multi Homing", func() { func(netConfigParams networkAttachmentConfigParams, clientPodConfig podConfiguration, serverPodConfig podConfiguration) { netConfig := newNetworkAttachmentConfig(netConfigParams) - // Skip the test if the netConfig topology is not layer3 and the deployment is multi zone - if netConfig.topology != "layer3" { + // Skip the test if the netConfig topology is layer2 and the deployment is multi zone + if netConfig.topology == "layer2" { if isInterconnectEnabled() { e2eskipper.Skipf( "Secondary network with topology %s is not yet supported with multiple zones interconnect deployment", netConfig.topology, @@ -694,8 +694,8 @@ var _ = Describe("Multi Homing", func() { func(netConfigParams networkAttachmentConfigParams, allowedClientPodConfig podConfiguration, blockedClientPodConfig podConfiguration, serverPodConfig podConfiguration, policy *mnpapi.MultiNetworkPolicy) { netConfig := newNetworkAttachmentConfig(netConfigParams) - // Skip the test if the netConfig topology is not layer3 and the deployment is multi zone - if netConfig.topology != "layer3" { + // Skip the test if the netConfig topology is layer2 and the deployment is multi zone + if netConfig.topology == "layer2" { if isInterconnectEnabled() { e2eskipper.Skipf( "Secondary network with topology %s is not yet supported with multiple zones interconnect deployment", netConfig.topology, From 9ad047947b229e2d516f8d3b8c05c69e4020fed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 13 Jun 2023 09:39:20 +0000 Subject: [PATCH 22/31] Add ID Allocator interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ID allocator will be used form different packages. Lets add an interface to export its functionality and for easy mocking. Signed-off-by: Jaime Caamaño Ruiz --- .../pkg/clustermanager/id_allocator.go | 19 +++++++++++++------ .../secondary_network_cluster_manager.go | 6 +++--- .../clustermanager/zone_cluster_controller.go | 12 ++++++------ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/go-controller/pkg/clustermanager/id_allocator.go b/go-controller/pkg/clustermanager/id_allocator.go index 62be425f03..980b802eee 100644 --- a/go-controller/pkg/clustermanager/id_allocator.go +++ b/go-controller/pkg/clustermanager/id_allocator.go @@ -11,6 +11,13 @@ const ( invalidID = -1 ) +// Allocator of IDs for a set of resources identified by name +type Allocator interface { + AllocateID(name string) (int, error) + ReserveID(name string, id int) error + ReleaseID(name string) +} + // idAllocator is used to allocate id for a resource and store the resource - id in a map type idAllocator struct { nameIdMap sync.Map @@ -27,9 +34,9 @@ func NewIDAllocator(name string, maxIds int) (*idAllocator, error) { }, nil } -// allocateID allocates an id for the resource 'name' and returns the id. +// AllocateID allocates an id for the resource 'name' and returns the id. // If the id for the resource is already allocated, it returns the cached id. -func (idAllocator *idAllocator) allocateID(name string) (int, error) { +func (idAllocator *idAllocator) AllocateID(name string) (int, error) { // Check the idMap and return the id if its already allocated v, ok := idAllocator.nameIdMap.Load(name) if ok { @@ -46,11 +53,11 @@ func (idAllocator *idAllocator) allocateID(name string) (int, error) { return id, nil } -// reserveID reserves the id 'id' for the resource 'name'. It returns an +// ReserveID reserves the id 'id' for the resource 'name'. It returns an // error if the 'id' is already reserved by a resource other than 'name'. // It also returns an error if the resource 'name' has a different 'id' // already reserved. -func (idAllocator *idAllocator) reserveID(name string, id int) error { +func (idAllocator *idAllocator) ReserveID(name string, id int) error { v, ok := idAllocator.nameIdMap.Load(name) if ok { if v.(int) == id { @@ -69,8 +76,8 @@ func (idAllocator *idAllocator) reserveID(name string, id int) error { return nil } -// releaseID releases the id allocated for the resource 'name' -func (idAllocator *idAllocator) releaseID(name string) { +// ReleaseID releases the id allocated for the resource 'name' +func (idAllocator *idAllocator) ReleaseID(name string) { v, ok := idAllocator.nameIdMap.Load(name) if ok { idAllocator.idBitmap.Release(v.(int)) diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index 47c37662fe..538a31acfd 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -41,7 +41,7 @@ func newSecondaryNetworkClusterManager(ovnClient *util.OVNClusterManagerClientse } // Reserve the id 0 for the default network. - if err := networkIDAllocator.reserveID("default", defaultNetworkID); err != nil { + if err := networkIDAllocator.ReserveID("default", defaultNetworkID); err != nil { return nil, fmt.Errorf("idAllocator failed to reserve defaultNetworkID %d", defaultNetworkID) } sncm := &secondaryNetworkClusterManager{ @@ -78,7 +78,7 @@ func (sncm *secondaryNetworkClusterManager) Start() error { // two networks have the same id. We will resync the node // annotations correctly when the network controller // is created. - _ = sncm.networkIDAllocator.reserveID(networkName, id) + _ = sncm.networkIDAllocator.ReserveID(networkName, id) } } } @@ -100,7 +100,7 @@ func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetI klog.Infof("Creating new network controller for network %s of topology %s", nInfo.GetNetworkName(), nInfo.TopologyType()) - networkId, err := sncm.networkIDAllocator.allocateID(nInfo.GetNetworkName()) + networkId, err := sncm.networkIDAllocator.AllocateID(nInfo.GetNetworkName()) if err != nil { return nil, fmt.Errorf("failed to create NetworkController for secondary layer3 network %s : %w", nInfo.GetNetworkName(), err) } diff --git a/go-controller/pkg/clustermanager/zone_cluster_controller.go b/go-controller/pkg/clustermanager/zone_cluster_controller.go index 0b6fa09650..f9ef085ecf 100644 --- a/go-controller/pkg/clustermanager/zone_cluster_controller.go +++ b/go-controller/pkg/clustermanager/zone_cluster_controller.go @@ -56,10 +56,10 @@ func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *fa } // Reserve the id 0. We don't want to assign this id to any of the nodes. - if err := nodeIDAllocator.reserveID("zero", 0); err != nil { + if err := nodeIDAllocator.ReserveID("zero", 0); err != nil { return nil, fmt.Errorf("idAllocator failed to reserve id 0") } - if err := nodeIDAllocator.reserveID("one", 1); err != nil { + if err := nodeIDAllocator.ReserveID("one", 1); err != nil { return nil, fmt.Errorf("idAllocator failed to reserve id 1") } @@ -157,7 +157,7 @@ func (zcc *zoneClusterController) Stop() { // handleAddUpdateNodeEvent handles the add or update node event func (zcc *zoneClusterController) handleAddUpdateNodeEvent(node *corev1.Node) error { - allocatedNodeID, err := zcc.nodeIDAllocator.allocateID(node.Name) + allocatedNodeID, err := zcc.nodeIDAllocator.AllocateID(node.Name) if err != nil { return fmt.Errorf("failed to allocate an id to the node %s : err - %w", node.Name, err) } @@ -217,7 +217,7 @@ func (zcc *zoneClusterController) handleAddUpdateNodeEvent(node *corev1.Node) er // handleAddUpdateNodeEvent handles the delete node event func (zcc *zoneClusterController) handleDeleteNode(node *corev1.Node) error { - zcc.nodeIDAllocator.releaseID(node.Name) + zcc.nodeIDAllocator.ReleaseID(node.Name) return nil } @@ -237,7 +237,7 @@ func (zcc *zoneClusterController) syncNodeIDs(nodes []interface{}) error { nodeID := util.GetNodeID(node) if nodeID != util.InvalidNodeID { klog.Infof("Node %s has the id %d set", node.Name, nodeID) - if err := zcc.nodeIDAllocator.reserveID(node.Name, nodeID); err != nil { + if err := zcc.nodeIDAllocator.ReserveID(node.Name, nodeID); err != nil { // The id set on this node is duplicate. klog.Infof("Node %s has a duplicate id %d set", node.Name, nodeID) duplicateIdNodes = append(duplicateIdNodes, node.Name) @@ -246,7 +246,7 @@ func (zcc *zoneClusterController) syncNodeIDs(nodes []interface{}) error { } for i := range duplicateIdNodes { - newNodeID, err := zcc.nodeIDAllocator.allocateID(duplicateIdNodes[i]) + newNodeID, err := zcc.nodeIDAllocator.AllocateID(duplicateIdNodes[i]) if err != nil { return fmt.Errorf("failed to allocate id for node %s : err - %w", duplicateIdNodes[i], err) } else { From cc23a1c722cf230195a594ed14e4da0db54bcb9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 13 Jun 2023 09:45:58 +0000 Subject: [PATCH 23/31] Move ID allocator to allocator package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that it can be used to allocate tunnel IDs for pods from the clustermanager/pod package. Signed-off-by: Jaime Caamaño Ruiz --- .../id_allocator.go => allocator/id/allocator.go} | 4 ++-- .../pkg/clustermanager/secondary_network_cluster_manager.go | 5 +++-- go-controller/pkg/clustermanager/zone_cluster_controller.go | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) rename go-controller/pkg/{clustermanager/id_allocator.go => allocator/id/allocator.go} (96%) diff --git a/go-controller/pkg/clustermanager/id_allocator.go b/go-controller/pkg/allocator/id/allocator.go similarity index 96% rename from go-controller/pkg/clustermanager/id_allocator.go rename to go-controller/pkg/allocator/id/allocator.go index 980b802eee..c649b4efe1 100644 --- a/go-controller/pkg/clustermanager/id_allocator.go +++ b/go-controller/pkg/allocator/id/allocator.go @@ -1,4 +1,4 @@ -package clustermanager +package id import ( "fmt" @@ -25,7 +25,7 @@ type idAllocator struct { } // NewIDAllocator returns an IDAllocator -func NewIDAllocator(name string, maxIds int) (*idAllocator, error) { +func NewIDAllocator(name string, maxIds int) (Allocator, error) { idBitmap := bitmapallocator.NewContiguousAllocationMap(maxIds, name) return &idAllocator{ diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index 538a31acfd..4ea1d92054 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -7,6 +7,7 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" @@ -29,13 +30,13 @@ type secondaryNetworkClusterManager struct { ovnClient *util.OVNClusterManagerClientset watchFactory *factory.WatchFactory // networkIDAllocator is used to allocate a unique ID for each secondary layer3 network - networkIDAllocator *idAllocator + networkIDAllocator id.Allocator } func newSecondaryNetworkClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, recorder record.EventRecorder) (*secondaryNetworkClusterManager, error) { klog.Infof("Creating secondary network cluster manager") - networkIDAllocator, err := NewIDAllocator("NetworkIDs", maxSecondaryNetworkIDs) + networkIDAllocator, err := id.NewIDAllocator("NetworkIDs", maxSecondaryNetworkIDs) if err != nil { return nil, fmt.Errorf("failed to create an IdAllocator for the secondary network ids, err: %v", err) } diff --git a/go-controller/pkg/clustermanager/zone_cluster_controller.go b/go-controller/pkg/clustermanager/zone_cluster_controller.go index f9ef085ecf..88281adf1e 100644 --- a/go-controller/pkg/clustermanager/zone_cluster_controller.go +++ b/go-controller/pkg/clustermanager/zone_cluster_controller.go @@ -11,6 +11,7 @@ import ( cache "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -37,7 +38,7 @@ type zoneClusterController struct { retryNodes *objretry.RetryFramework // ID allocator for the nodes - nodeIDAllocator *idAllocator + nodeIDAllocator id.Allocator // node gateway router port IP generators (connecting to the join switch) nodeGWRouterLRPIPv4Generator *ipGenerator @@ -50,7 +51,7 @@ type zoneClusterController struct { func newZoneClusterController(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory) (*zoneClusterController, error) { // Since we don't assign 0 to any node, create IDAllocator with one extra element in maxIds. - nodeIDAllocator, err := NewIDAllocator("NodeIDs", maxNodeIDs+1) + nodeIDAllocator, err := id.NewIDAllocator("NodeIDs", maxNodeIDs+1) if err != nil { return nil, fmt.Errorf("failed to create an IdAllocator for the nodes, err: %w", err) } From e7763f1ee9c1daf6b01c62a7dad8fa901b0fdb40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 13 Jun 2023 09:52:23 +0000 Subject: [PATCH 24/31] Add Named ID allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tunnel IDs will be allocated for a specific network from a context that should not affect allocation of other networks. Let's protect against this with an interface bounded to a specific network. Signed-off-by: Jaime Caamaño Ruiz --- go-controller/pkg/allocator/id/allocator.go | 32 +++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/go-controller/pkg/allocator/id/allocator.go b/go-controller/pkg/allocator/id/allocator.go index c649b4efe1..271973eb14 100644 --- a/go-controller/pkg/allocator/id/allocator.go +++ b/go-controller/pkg/allocator/id/allocator.go @@ -16,6 +16,14 @@ type Allocator interface { AllocateID(name string) (int, error) ReserveID(name string, id int) error ReleaseID(name string) + ForName(name string) NamedAllocator +} + +// NamedAllocator of IDs for a specific resource +type NamedAllocator interface { + AllocateID() (int, error) + ReserveID(int) error + ReleaseID() } // idAllocator is used to allocate id for a resource and store the resource - id in a map @@ -84,3 +92,27 @@ func (idAllocator *idAllocator) ReleaseID(name string) { idAllocator.nameIdMap.Delete(name) } } + +func (idAllocator *idAllocator) ForName(name string) NamedAllocator { + return &namedAllocator{ + name: name, + allocator: idAllocator, + } +} + +type namedAllocator struct { + name string + allocator *idAllocator +} + +func (allocator *namedAllocator) AllocateID() (int, error) { + return allocator.allocator.AllocateID(allocator.name) +} + +func (allocator *namedAllocator) ReserveID(id int) error { + return allocator.allocator.ReserveID(allocator.name, id) +} + +func (allocator *namedAllocator) ReleaseID() { + allocator.allocator.ReleaseID(allocator.name) +} From 8b0bcb1bcd6ef08a1533ed202cb5c12f9990e7b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 13 Jun 2023 10:18:12 +0000 Subject: [PATCH 25/31] Allocate an tunnel ID with the PodAnnotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pods attached to secondary networks with layer2 topologies on interconnect require a tunnel ID allocated to be configured as tunnel keys on its logical switch ports of the transit switch. Allocate this tunnel Id in the PodAnnotation as it shares the same lifecycle as the other data stored there. Signed-off-by: Jaime Caamaño Ruiz --- .../pkg/allocator/pod/pod_annotation.go | 152 ++++++++++++--- .../pkg/allocator/pod/pod_annotation_test.go | 177 +++++++++++++++--- go-controller/pkg/util/multi_network.go | 5 + go-controller/pkg/util/pod_annotation.go | 12 +- 4 files changed, 294 insertions(+), 52 deletions(-) diff --git a/go-controller/pkg/allocator/pod/pod_annotation.go b/go-controller/pkg/allocator/pod/pod_annotation.go index e61c42bef7..01a58efb14 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation.go +++ b/go-controller/pkg/allocator/pod/pod_annotation.go @@ -10,6 +10,7 @@ import ( nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -75,10 +76,84 @@ func allocatePodAnnotation( podAnnotation *util.PodAnnotation, err error) { + // no id allocation + var idAllocator id.NamedAllocator + + allocateToPodWithRollback := func(pod *v1.Pod) (*v1.Pod, func(), error) { + var rollback func() + pod, podAnnotation, rollback, err = allocatePodAnnotationWithRollback( + ipAllocator, + idAllocator, + netInfo, + pod, + network, + reallocateIP) + return pod, rollback, err + } + + err = util.UpdatePodWithRetryOrRollback( + podLister, + kube, + pod, + allocateToPodWithRollback, + ) + + if err != nil { + return nil, nil, err + } + + return pod, podAnnotation, nil +} + +// AllocatePodAnnotationWithTunnelID allocates the PodAnnotation which includes +// IPs, a mac address, routes, gateways and a tunnel ID. Returns the allocated +// pod annotation and the updated pod. Returns a nil pod and the existing +// PodAnnotation if no updates are warranted to the pod. +// +// The allocation can be requested through the network selection element or +// derived from the allocator provided IPs. If the requested IPs cannot be +// honored, a new set of IPs will be allocated unless reallocateIP is set to +// false. +func (allocator *PodAnnotationAllocator) AllocatePodAnnotationWithTunnelID( + ipAllocator subnet.NamedAllocator, + idAllocator id.NamedAllocator, + pod *v1.Pod, + network *nadapi.NetworkSelectionElement, + reallocateIP bool) ( + *v1.Pod, + *util.PodAnnotation, + error) { + + return allocatePodAnnotationWithTunnelID( + allocator.podLister, + allocator.kube, + ipAllocator, + idAllocator, + allocator.netInfo, + pod, + network, + reallocateIP, + ) +} + +func allocatePodAnnotationWithTunnelID( + podLister listers.PodLister, + kube kube.Interface, + ipAllocator subnet.NamedAllocator, + idAllocator id.NamedAllocator, + netInfo util.NetInfo, + pod *v1.Pod, + network *nadapi.NetworkSelectionElement, + reallocateIP bool) ( + updatedPod *v1.Pod, + podAnnotation *util.PodAnnotation, + err error) { + allocateToPodWithRollback := func(pod *v1.Pod) (*v1.Pod, func(), error) { var rollback func() pod, podAnnotation, rollback, err = allocatePodAnnotationWithRollback( ipAllocator, + idAllocator, netInfo, pod, network, @@ -101,14 +176,15 @@ func allocatePodAnnotation( } // allocatePodAnnotationWithRollback allocates the PodAnnotation which includes -// IPs, a mac address, routes and gateways. Returns the allocated pod annotation -// and a pod with that annotation set. Returns a nil pod and the existing +// IPs, a mac address, routes, gateways and an ID. Returns the allocated pod +// annotation and a pod with that annotation set. Returns a nil pod and the existing // PodAnnotation if no updates are warranted to the pod. -// The allocation can be requested through the network selection element or -// derived from the allocator provided IPs. If no IP allocation is required, set -// allocateIP to false. If the requested IPs cannot be honored, a new set of IPs -// will be allocated unless reallocateIP is set to false. +// The allocation of network information can be requested through the network +// selection element or derived from the allocator provided IPs. If no IP +// allocation is required, set allocateIP to false. If the requested IPs cannot +// be honored, a new set of IPs will be allocated unless reallocateIP is set to +// false. // A rollback function is returned to rollback the IP allocation if there was // any. @@ -118,6 +194,7 @@ func allocatePodAnnotation( // information from it as a side-effect. func allocatePodAnnotationWithRollback( ipAllocator subnet.NamedAllocator, + idAllocator id.NamedAllocator, netInfo util.NetInfo, pod *v1.Pod, network *nadapi.NetworkSelectionElement, @@ -138,7 +215,13 @@ func allocatePodAnnotationWithRollback( // assigned via the IPAM manager. Note we are using a named return variable // for defer to work correctly. var releaseIPs []*net.IPNet + var releaseID int rollback = func() { + if releaseID != 0 { + idAllocator.ReleaseID() + klog.V(5).Infof("Released ID %d", releaseID) + releaseID = 0 + } if len(releaseIPs) == 0 { return } @@ -157,6 +240,36 @@ func allocatePodAnnotationWithRollback( } }() + podAnnotation, _ = util.UnmarshalPodAnnotation(pod.Annotations, nadName) + if podAnnotation == nil { + podAnnotation = &util.PodAnnotation{} + } + + // work on a tentative pod annotation based on the existing one + tentative := &util.PodAnnotation{ + IPs: podAnnotation.IPs, + MAC: podAnnotation.MAC, + TunnelID: podAnnotation.TunnelID, + } + + hasIDAllocation := util.DoesNetworkRequireTunnelIDs(netInfo) + needsID := tentative.TunnelID == 0 && hasIDAllocation + + if hasIDAllocation { + if needsID { + tentative.TunnelID, err = idAllocator.AllocateID() + } else { + err = idAllocator.ReserveID(tentative.TunnelID) + } + + if err != nil { + err = fmt.Errorf("failed to assign pod id for %s: %w", podDesc, err) + return + } + + releaseID = tentative.TunnelID + } + hasIPAM := util.DoesNetworkRequireIPAM(netInfo) hasIPRequest := network != nil && len(network.IPRequest) > 0 hasStaticIPRequest := hasIPRequest && !reallocateIP @@ -170,20 +283,9 @@ func allocatePodAnnotationWithRollback( return } - podAnnotation, _ = util.UnmarshalPodAnnotation(pod.Annotations, nadName) - if podAnnotation == nil { - podAnnotation = &util.PodAnnotation{} - } - - // work on a tentative pod annotation based on the existing one - tentative := &util.PodAnnotation{ - IPs: podAnnotation.IPs, - MAC: podAnnotation.MAC, - } - // we need to update the annotation if it is missing IPs or MAC - needsAnnotationUpdate := len(tentative.IPs) == 0 && (hasIPAM || hasIPRequest) - needsAnnotationUpdate = needsAnnotationUpdate || len(tentative.MAC) == 0 + needsIPOrMAC := len(tentative.IPs) == 0 && (hasIPAM || hasIPRequest) + needsIPOrMAC = needsIPOrMAC || len(tentative.MAC) == 0 reallocateOnNonStaticIPRequest := len(tentative.IPs) == 0 && hasIPRequest && !hasStaticIPRequest if len(tentative.IPs) == 0 { @@ -198,13 +300,13 @@ func allocatePodAnnotationWithRollback( if hasIPAM { if len(tentative.IPs) > 0 { if err = ipAllocator.AllocateIPs(tentative.IPs); err != nil && !ip.IsErrAllocated(err) { - err = fmt.Errorf("failed to ensure requested or annotated IPs %v for pod %s: %w", + err = fmt.Errorf("failed to ensure requested or annotated IPs %v for %s: %w", util.StringSlice(tentative.IPs), podDesc, err) if !reallocateOnNonStaticIPRequest { return } klog.Warning(err.Error()) - needsAnnotationUpdate = true + needsIPOrMAC = true tentative.IPs = nil } @@ -220,7 +322,7 @@ func allocatePodAnnotationWithRollback( if len(tentative.IPs) == 0 { tentative.IPs, err = ipAllocator.AllocateNextIPs() if err != nil { - err = fmt.Errorf("failed to assign pod addresses for pod %s: %w", podDesc, err) + err = fmt.Errorf("failed to assign pod addresses for %s: %w", podDesc, err) return } @@ -229,7 +331,7 @@ func allocatePodAnnotationWithRollback( } } - if needsAnnotationUpdate { + if needsIPOrMAC { // handle mac address if network != nil && network.MacRequest != "" { tentative.MAC, err = net.ParseMAC(network.MacRequest) @@ -247,7 +349,11 @@ func allocatePodAnnotationWithRollback( if err != nil { return } + } + + needsAnnotationUpdate := needsIPOrMAC || needsID + if needsAnnotationUpdate { updatedPod = pod updatedPod.Annotations, err = util.MarshalPodAnnotation(updatedPod.Annotations, tentative, nadName) podAnnotation = tentative diff --git a/go-controller/pkg/allocator/pod/pod_annotation_test.go b/go-controller/pkg/allocator/pod/pod_annotation_test.go index 3b18d6bb9d..8929cd1efd 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation_test.go +++ b/go-controller/pkg/allocator/pod/pod_annotation_test.go @@ -11,9 +11,11 @@ import ( cnitypes "github.com/containernetworking/cni/pkg/types" nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" ipam "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -21,29 +23,47 @@ import ( "github.com/onsi/gomega" ) -type allocatorStub struct { +type ipAllocatorStub struct { netxtIPs []*net.IPNet allocateIPsError error releasedIPs []*net.IPNet } -func (a *allocatorStub) AllocateIPs(ips []*net.IPNet) error { +func (a *ipAllocatorStub) AllocateIPs(ips []*net.IPNet) error { return a.allocateIPsError } -func (a *allocatorStub) AllocateNextIPs() ([]*net.IPNet, error) { +func (a *ipAllocatorStub) AllocateNextIPs() ([]*net.IPNet, error) { return a.netxtIPs, nil } -func (a *allocatorStub) ReleaseIPs(ips []*net.IPNet) error { +func (a *ipAllocatorStub) ReleaseIPs(ips []*net.IPNet) error { a.releasedIPs = ips return nil } -func (a *allocatorStub) IsErrAllocated(err error) bool { +func (a *ipAllocatorStub) IsErrAllocated(err error) bool { return errors.Is(err, ipam.ErrAllocated) } +type idAllocatorStub struct { + nextID int + reserveIDError error + releasedID bool +} + +func (a *idAllocatorStub) AllocateID() (int, error) { + return a.nextID, nil +} + +func (a *idAllocatorStub) ReserveID(id int) error { + return a.reserveIDError +} + +func (a *idAllocatorStub) ReleaseID() { + a.releasedID = true +} + func Test_allocatePodAnnotationWithRollback(t *testing.T) { randomMac, err := util.GenerateRandMAC() if err != nil { @@ -58,6 +78,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { type args struct { ipAllocator subnet.NamedAllocator + idAllocator id.NamedAllocator network *nadapi.NetworkSelectionElement reallocate bool } @@ -65,6 +86,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { name string args args ipam bool + idAllocation bool podAnnotation *util.PodAnnotation invalidNetworkAnnotation bool wantUpdatedPod bool @@ -72,6 +94,8 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { wantPodAnnotation *util.PodAnnotation wantReleasedIPs []*net.IPNet wantReleasedIPsOnRollback []*net.IPNet + wantReleaseID bool + wantRelasedIDOnRollback bool wantErr bool }{ { @@ -100,7 +124,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { network: &nadapi.NetworkSelectionElement{ IPRequest: []string{"192.168.0.4/24"}, }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, @@ -119,7 +143,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { IPRequest: []string{"192.168.0.4/24"}, GatewayRequest: ovntest.MustParseIPs("192.168.0.1"), }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, @@ -149,7 +173,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { name: "expect new IP", ipam: true, args: args{ - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, @@ -177,7 +201,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), }, args: args{ - ipAllocator: &allocatorStub{}, + ipAllocator: &ipAllocatorStub{}, }, wantPodAnnotation: &util.PodAnnotation{ IPs: ovntest.MustParseIPNets("192.168.0.3/24"), @@ -195,7 +219,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), }, args: args{ - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ allocateIPsError: ipam.ErrAllocated, }, }, @@ -214,7 +238,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), }, args: args{ - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ allocateIPsError: errors.New("Allocate IPs failed"), }, }, @@ -230,7 +254,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { network: &nadapi.NetworkSelectionElement{ IPRequest: []string{"192.168.0.4/24"}, }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, @@ -258,7 +282,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { network: &nadapi.NetworkSelectionElement{ IPRequest: []string{"192.168.0.4/24"}, }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), allocateIPsError: ipam.ErrAllocated, }, @@ -286,7 +310,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { network: &nadapi.NetworkSelectionElement{ IPRequest: []string{"192.168.0.4/24"}, }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), allocateIPsError: errors.New("Allocate IPs failed"), }, @@ -313,7 +337,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { network: &nadapi.NetworkSelectionElement{ IPRequest: []string{"ivalid"}, }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, @@ -327,7 +351,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { network: &nadapi.NetworkSelectionElement{ MacRequest: "ivalid", }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, @@ -343,7 +367,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { network: &nadapi.NetworkSelectionElement{ MacRequest: requestedMAC, }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, @@ -368,9 +392,9 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { ipam: true, args: args{ network: &nadapi.NetworkSelectionElement{ - MacRequest: "ivalid", + MacRequest: "invalid", }, - ipAllocator: &allocatorStub{ + ipAllocator: &ipAllocatorStub{ netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, @@ -378,6 +402,88 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { wantErr: true, wantReleasedIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, + { + // on networks with ID allocation, expect allocated ID + name: "expect ID allocation", + idAllocation: true, + args: args{ + idAllocator: &idAllocatorStub{ + nextID: 100, + }, + }, + podAnnotation: &util.PodAnnotation{ + MAC: randomMac, + }, + wantPodAnnotation: &util.PodAnnotation{ + MAC: randomMac, + TunnelID: 100, + }, + wantUpdatedPod: true, + wantRelasedIDOnRollback: true, + }, + { + // on networks with ID allocation, already allocated, expect + // allocated ID + name: "expect already allocated ID", + idAllocation: true, + args: args{ + idAllocator: &idAllocatorStub{}, + }, + podAnnotation: &util.PodAnnotation{ + MAC: randomMac, + TunnelID: 200, + }, + wantPodAnnotation: &util.PodAnnotation{ + MAC: randomMac, + TunnelID: 200, + }, + wantRelasedIDOnRollback: true, + }, + { + // ID allocation error + name: "expect ID allocation error", + idAllocation: true, + args: args{ + idAllocator: &idAllocatorStub{ + reserveIDError: errors.New("ID allocation error"), + }, + }, + podAnnotation: &util.PodAnnotation{ + MAC: randomMac, + TunnelID: 200, + }, + wantErr: true, + }, + { + // ID allocation error + name: "expect ID allocation error", + idAllocation: true, + args: args{ + idAllocator: &idAllocatorStub{ + reserveIDError: errors.New("ID allocation error"), + }, + }, + podAnnotation: &util.PodAnnotation{ + MAC: randomMac, + TunnelID: 200, + }, + wantErr: true, + }, + { + // expect ID release on error + name: "expect error, release ID", + idAllocation: true, + args: args{ + network: &nadapi.NetworkSelectionElement{ + MacRequest: "invalid", + }, + idAllocator: &idAllocatorStub{ + nextID: 300, + }, + }, + wantErr: true, + wantReleaseID: true, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -395,10 +501,10 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { var netInfo util.NetInfo netInfo = &util.DefaultNetInfo{} nadName := types.DefaultNetworkName - if !tt.ipam { + if !tt.ipam || tt.idAllocation { nadName = util.GetNADName(network.Namespace, network.Name) netInfo, err = util.NewNetInfo(&ovncnitypes.NetConf{ - Topology: types.LocalnetTopology, + Topology: types.Layer2Topology, NetConf: cnitypes.NetConf{ Name: network.Name, }, @@ -409,6 +515,8 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { } } + config.OVNKubernetesFeature.EnableInterconnect = tt.idAllocation + pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "pod", @@ -430,6 +538,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { pod, podAnnotation, rollback, err := allocatePodAnnotationWithRollback( tt.args.ipAllocator, + tt.args.idAllocator, netInfo, pod, network, @@ -437,13 +546,27 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { ) if tt.args.ipAllocator != nil { - releasedIPs := tt.args.ipAllocator.(*allocatorStub).releasedIPs - g.Expect(releasedIPs).To(gomega.Equal(tt.wantReleasedIPs), "Release on error behaved unexpectedly") - tt.args.ipAllocator.(*allocatorStub).releasedIPs = nil + releasedIPs := tt.args.ipAllocator.(*ipAllocatorStub).releasedIPs + g.Expect(releasedIPs).To(gomega.Equal(tt.wantReleasedIPs), "Release IP on error behaved unexpectedly") + tt.args.ipAllocator.(*ipAllocatorStub).releasedIPs = nil + } + + if tt.args.idAllocator != nil { + releasedID := tt.args.idAllocator.(*idAllocatorStub).releasedID + g.Expect(releasedID).To(gomega.Equal(tt.wantReleaseID), "Release ID on error behaved unexpectedly") + tt.args.idAllocator.(*idAllocatorStub).releasedID = false + } + + rollback() + + if tt.args.ipAllocator != nil { + releasedIPs := tt.args.ipAllocator.(*ipAllocatorStub).releasedIPs + g.Expect(releasedIPs).To(gomega.Equal(tt.wantReleasedIPsOnRollback), "Release IP on rollback behaved unexpectedly") + } - rollback() - releasedIPs = tt.args.ipAllocator.(*allocatorStub).releasedIPs - g.Expect(releasedIPs).To(gomega.Equal(tt.wantReleasedIPsOnRollback), "Release on rollback behaved unexpectedly") + if tt.args.idAllocator != nil { + releasedID := tt.args.idAllocator.(*idAllocatorStub).releasedID + g.Expect(releasedID).To(gomega.Equal(tt.wantRelasedIDOnRollback), "Release ID on rollback behaved unexpectedly") } if tt.wantErr { diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index 47d8acedd4..d8929b1158 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -483,3 +483,8 @@ func IsMultiNetworkPoliciesSupportEnabled() bool { func DoesNetworkRequireIPAM(netInfo NetInfo) bool { return !((netInfo.TopologyType() == types.Layer2Topology || netInfo.TopologyType() == types.LocalnetTopology) && len(netInfo.Subnets()) == 0) } + +func DoesNetworkRequireTunnelIDs(netInfo NetInfo) bool { + // Layer2Topology with IC require that we allocate tunnel IDs for each pod + return netInfo.TopologyType() == types.Layer2Topology && config.OVNKubernetesFeature.EnableInterconnect +} diff --git a/go-controller/pkg/util/pod_annotation.go b/go-controller/pkg/util/pod_annotation.go index 28046c5464..08ffa30bb6 100644 --- a/go-controller/pkg/util/pod_annotation.go +++ b/go-controller/pkg/util/pod_annotation.go @@ -68,6 +68,9 @@ type PodAnnotation struct { Gateways []net.IP // Routes are additional routes to add to the pod's network namespace Routes []PodRoute + + // TunnelID assigned to each pod for layer2 secondary networks + TunnelID int } // PodRoute describes any routes to be added to the pod's network namespace @@ -91,6 +94,8 @@ type podAnnotation struct { IP string `json:"ip_address,omitempty"` Gateway string `json:"gateway_ip,omitempty"` + + TunnelID int `json:"tunnel_id,omitempty"` } // Internal struct used to marshal PodRoute to the pod annotation @@ -109,7 +114,8 @@ func MarshalPodAnnotation(annotations map[string]string, podInfo *PodAnnotation, return nil, err } pa := podAnnotation{ - MAC: podInfo.MAC.String(), + TunnelID: podInfo.TunnelID, + MAC: podInfo.MAC.String(), } if len(podInfo.IPs) == 1 { @@ -183,7 +189,9 @@ func UnmarshalPodAnnotation(annotations map[string]string, nadName string) (*Pod a := &tempA - podAnnotation := &PodAnnotation{} + podAnnotation := &PodAnnotation{ + TunnelID: a.TunnelID, + } podAnnotation.MAC, err = net.ParseMAC(a.MAC) if err != nil { return nil, fmt.Errorf("failed to parse pod MAC %q: %v", a.MAC, err) From cffc13742509f986e3e0c4b22e071678b54076b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 13 Jun 2023 10:19:14 +0000 Subject: [PATCH 26/31] Rename PodIPAllocator to PodAllocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since it will now allocate tunnel IDs as well and not just IPs. Signed-off-by: Jaime Caamaño Ruiz --- .../network_cluster_controller.go | 4 +- .../pod/{ip_allocator.go => allocator.go} | 38 +++++++++---------- ...ip_allocator_test.go => allocator_test.go} | 4 +- 3 files changed, 23 insertions(+), 23 deletions(-) rename go-controller/pkg/clustermanager/pod/{ip_allocator.go => allocator.go} (82%) rename go-controller/pkg/clustermanager/pod/{ip_allocator_test.go => allocator_test.go} (98%) diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index af404c4040..b41f950e08 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -42,7 +42,7 @@ type networkClusterController struct { // unique id of the network networkID int - podIPAllocator *pod.PodIPAllocator + podIPAllocator *pod.PodAllocator hostSubnetAllocator *subnetallocator.HostSubnetAllocator util.NetInfo @@ -68,7 +68,7 @@ func newNetworkClusterController(networkID int, netInfo util.NetInfo, ovnClient } if ncc.hasPodIPAllocation() { - ncc.podIPAllocator = pod.NewPodIPAllocator(netInfo, wf.PodCoreInformer().Lister(), kube) + ncc.podIPAllocator = pod.NewPodAllocator(netInfo, wf.PodCoreInformer().Lister(), kube) } ncc.initRetryFramework() diff --git a/go-controller/pkg/clustermanager/pod/ip_allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go similarity index 82% rename from go-controller/pkg/clustermanager/pod/ip_allocator.go rename to go-controller/pkg/clustermanager/pod/allocator.go index aada337ac5..8e6d26e4bb 100644 --- a/go-controller/pkg/clustermanager/pod/ip_allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -19,10 +19,10 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) -// PodIPAllocator acts on pods events handed off by the cluster network -// controller and allocates or releases IPs for them updating the pod annotation -// as necessary with all the additional information derived from those IPs. -type PodIPAllocator struct { +// PodAllocator acts on pods events handed off by the cluster network controller +// and allocates or releases resources (IPs and tunnel IDs at the time of this +// writing) to pods on behalf of cluster manager. +type PodAllocator struct { netInfo util.NetInfo // allocator of IPs within subnets @@ -37,15 +37,15 @@ type PodIPAllocator struct { releasedPodsMutex sync.Mutex } -// NewPodIPAllocator builds a new PodIPAllocator -func NewPodIPAllocator(netInfo util.NetInfo, podLister listers.PodLister, kube kube.Interface) *PodIPAllocator { +// NewPodAllocator builds a new PodAllocator +func NewPodAllocator(netInfo util.NetInfo, podLister listers.PodLister, kube kube.Interface) *PodAllocator { podAnnotationAllocator := pod.NewPodAnnotationAllocator( netInfo, podLister, kube, ) - podIPAllocator := &PodIPAllocator{ + podAllocator := &PodAllocator{ netInfo: netInfo, releasedPods: map[string]sets.Set[string]{}, releasedPodsMutex: sync.Mutex{}, @@ -54,15 +54,15 @@ func NewPodIPAllocator(netInfo util.NetInfo, podLister listers.PodLister, kube k // this network might not have IPAM, we will just allocate MAC addresses if util.DoesNetworkRequireIPAM(netInfo) { - podIPAllocator.allocator = subnet.NewAllocator() + podAllocator.allocator = subnet.NewAllocator() } - return podIPAllocator + return podAllocator } // InitRanges initializes the allocator with the subnets configured for the // network -func (a *PodIPAllocator) InitRanges() error { +func (a *PodAllocator) InitRanges() error { if a.netInfo.TopologyType() != types.LocalnetTopology { return fmt.Errorf("topology %s not supported", a.netInfo.TopologyType()) } @@ -77,13 +77,13 @@ func (a *PodIPAllocator) InitRanges() error { // Reconcile allocates or releases IPs for pods updating the pod annotation // as necessary with all the additional information derived from those IPs -func (a *PodIPAllocator) Reconcile(old, new *corev1.Pod) error { +func (a *PodAllocator) Reconcile(old, new *corev1.Pod) error { releaseIPsFromAllocator := true return a.reconcile(old, new, releaseIPsFromAllocator) } // Sync initializes the allocator with pods that already exist on the cluster -func (a *PodIPAllocator) Sync(objs []interface{}) error { +func (a *PodAllocator) Sync(objs []interface{}) error { // on sync, we don't release IPs from the allocator, we are just trying to // allocate annotated IPs; specifically we don't want to release IPs of // completed pods that might be being used by other pods @@ -104,7 +104,7 @@ func (a *PodIPAllocator) Sync(objs []interface{}) error { return nil } -func (a *PodIPAllocator) reconcile(old, new *corev1.Pod, releaseIPsFromAllocator bool) error { +func (a *PodAllocator) reconcile(old, new *corev1.Pod, releaseIPsFromAllocator bool) error { var pod *corev1.Pod if old != nil { pod = old @@ -144,7 +144,7 @@ func (a *PodIPAllocator) reconcile(old, new *corev1.Pod, releaseIPsFromAllocator return nil } -func (a *PodIPAllocator) reconcileForNAD(old, new *corev1.Pod, nad string, network *nettypes.NetworkSelectionElement, releaseIPsFromAllocator bool) error { +func (a *PodAllocator) reconcileForNAD(old, new *corev1.Pod, nad string, network *nettypes.NetworkSelectionElement, releaseIPsFromAllocator bool) error { var pod *corev1.Pod if old != nil { pod = old @@ -162,7 +162,7 @@ func (a *PodIPAllocator) reconcileForNAD(old, new *corev1.Pod, nad string, netwo return a.allocatePodOnNAD(pod, nad, network) } -func (a *PodIPAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted, releaseIPsFromAllocator bool) error { +func (a *PodAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted, releaseIPsFromAllocator bool) error { if !util.DoesNetworkRequireIPAM(a.netInfo) { // no need to release if no IPAM return nil @@ -201,7 +201,7 @@ func (a *PodIPAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted return nil } -func (a *PodIPAllocator) allocatePodOnNAD(pod *corev1.Pod, nad string, network *nettypes.NetworkSelectionElement) error { +func (a *PodAllocator) allocatePodOnNAD(pod *corev1.Pod, nad string, network *nettypes.NetworkSelectionElement) error { var ipAllocator subnet.NamedAllocator if util.DoesNetworkRequireIPAM(a.netInfo) { ipAllocator = a.allocator.ForSubnet(a.netInfo.GetNetworkName()) @@ -234,7 +234,7 @@ func (a *PodIPAllocator) allocatePodOnNAD(pod *corev1.Pod, nad string, network * return err } -func (a *PodIPAllocator) addReleasedPod(nad, uid string) { +func (a *PodAllocator) addReleasedPod(nad, uid string) { a.releasedPodsMutex.Lock() defer a.releasedPodsMutex.Unlock() releasedPods := a.releasedPods[nad] @@ -245,7 +245,7 @@ func (a *PodIPAllocator) addReleasedPod(nad, uid string) { releasedPods.Insert(uid) } -func (a *PodIPAllocator) deleteReleasedPod(nad, uid string) { +func (a *PodAllocator) deleteReleasedPod(nad, uid string) { a.releasedPodsMutex.Lock() defer a.releasedPodsMutex.Unlock() releasedPods := a.releasedPods[nad] @@ -257,7 +257,7 @@ func (a *PodIPAllocator) deleteReleasedPod(nad, uid string) { } } -func (a *PodIPAllocator) isPodReleased(nad, uid string) bool { +func (a *PodAllocator) isPodReleased(nad, uid string) bool { a.releasedPodsMutex.Lock() defer a.releasedPodsMutex.Unlock() releasedPods := a.releasedPods[nad] diff --git a/go-controller/pkg/clustermanager/pod/ip_allocator_test.go b/go-controller/pkg/clustermanager/pod/allocator_test.go similarity index 98% rename from go-controller/pkg/clustermanager/pod/ip_allocator_test.go rename to go-controller/pkg/clustermanager/pod/allocator_test.go index 1f9d28306b..e5752d94c2 100644 --- a/go-controller/pkg/clustermanager/pod/ip_allocator_test.go +++ b/go-controller/pkg/clustermanager/pod/allocator_test.go @@ -106,7 +106,7 @@ func (a *allocatorStub) ForSubnet(name string) subnet.NamedAllocator { return nil } -func TestPodIPAllocator_reconcileForNAD(t *testing.T) { +func TestPodAllocator_reconcileForNAD(t *testing.T) { type args struct { old *testPod new *testPod @@ -305,7 +305,7 @@ func TestPodIPAllocator_reconcileForNAD(t *testing.T) { kubeMock, ) - a := &PodIPAllocator{ + a := &PodAllocator{ netInfo: netInfo, allocator: ipallocator, podAnnotationAllocator: podAnnotationAllocator, From b3012cf96dbe4cab1c3e23d00bc0fc682ffcdde0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 13 Jun 2023 10:47:12 +0000 Subject: [PATCH 27/31] Add ID allocator to PodAllocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pods attached to secondary networks with layer2 topologies on interconnect require a tunnel ID allocated to be configured as tunnel keys on its logical switch ports of the transit switch. This add support to allocate such IDs from cluster manager. Signed-off-by: Jaime Caamaño Ruiz --- .../network_cluster_controller.go | 30 +-- .../pkg/clustermanager/pod/allocator.go | 108 +++++++--- .../pkg/clustermanager/pod/allocator_test.go | 195 +++++++++++++++--- go-controller/pkg/types/const.go | 4 + 4 files changed, 258 insertions(+), 79 deletions(-) diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index b41f950e08..a510c0b8c9 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -42,7 +42,7 @@ type networkClusterController struct { // unique id of the network networkID int - podIPAllocator *pod.PodAllocator + podAllocator *pod.PodAllocator hostSubnetAllocator *subnetallocator.HostSubnetAllocator util.NetInfo @@ -67,18 +67,22 @@ func newNetworkClusterController(networkID int, netInfo util.NetInfo, ovnClient ncc.hostSubnetAllocator = subnetallocator.NewHostSubnetAllocator(networkID, netInfo, wf.NodeCoreInformer().Lister(), kube) } - if ncc.hasPodIPAllocation() { - ncc.podIPAllocator = pod.NewPodAllocator(netInfo, wf.PodCoreInformer().Lister(), kube) + if ncc.hasPodAllocation() { + ncc.podAllocator = pod.NewPodAllocator(netInfo, wf.PodCoreInformer().Lister(), kube) } ncc.initRetryFramework() return ncc } -func (ncc *networkClusterController) hasPodIPAllocation() bool { - // we only do pod IP allocation on L2 topologies with IPAM on interconnect +func (ncc *networkClusterController) hasPodAllocation() bool { + // we only do pod allocation on L2 topologies with interconnect switch ncc.TopologyType() { - case types.Layer2Topology, types.LocalnetTopology: + case types.Layer2Topology: + // We need to allocate the PodAnnotation + return config.OVNKubernetesFeature.EnableInterconnect + case types.LocalnetTopology: + // We need to allocate the PodAnnotation if there is IPAM return config.OVNKubernetesFeature.EnableInterconnect && len(ncc.Subnets()) > 0 } return false @@ -94,7 +98,7 @@ func (ncc *networkClusterController) initRetryFramework() { ncc.retryNodes = ncc.newRetryFramework(factory.NodeType, true) } - if ncc.hasPodIPAllocation() { + if ncc.hasPodAllocation() { ncc.retryPods = ncc.newRetryFramework(factory.PodType, true) } } @@ -117,8 +121,8 @@ func (ncc *networkClusterController) Start(ctx context.Context) error { ncc.nodeHandler = nodeHandler } - if ncc.hasPodIPAllocation() { - err := ncc.podIPAllocator.InitRanges() + if ncc.hasPodAllocation() { + err := ncc.podAllocator.Init() if err != nil { return fmt.Errorf("failed to initialize pod ip allocator: %w", err) } @@ -196,7 +200,7 @@ func (h *networkClusterControllerEventHandler) AddResource(obj interface{}, from if !ok { return fmt.Errorf("could not cast %T object to *corev1.Pod", obj) } - err := h.ncc.podIPAllocator.Reconcile(nil, pod) + err := h.ncc.podAllocator.Reconcile(nil, pod) if err != nil { klog.Infof("Pod add failed for %s/%s, will try again later: %v", pod.Namespace, pod.Name, err) @@ -233,7 +237,7 @@ func (h *networkClusterControllerEventHandler) UpdateResource(oldObj, newObj int if !ok { return fmt.Errorf("could not cast %T new object to *corev1.Pod", newObj) } - err := h.ncc.podIPAllocator.Reconcile(old, new) + err := h.ncc.podAllocator.Reconcile(old, new) if err != nil { klog.Infof("Pod update failed for %s/%s, will try again later: %v", new.Namespace, new.Name, err) @@ -263,7 +267,7 @@ func (h *networkClusterControllerEventHandler) DeleteResource(obj, cachedObj int if !ok { return fmt.Errorf("could not cast %T object to *corev1.Pod", obj) } - err := h.ncc.podIPAllocator.Reconcile(pod, nil) + err := h.ncc.podAllocator.Reconcile(pod, nil) if err != nil { klog.Infof("Pod delete failed for %s/%s, will try again later: %v", pod.Namespace, pod.Name, err) @@ -287,7 +291,7 @@ func (h *networkClusterControllerEventHandler) SyncFunc(objs []interface{}) erro } else { switch h.objType { case factory.PodType: - syncFunc = h.ncc.podIPAllocator.Sync + syncFunc = h.ncc.podAllocator.Sync case factory.NodeType: syncFunc = h.ncc.hostSubnetAllocator.Sync diff --git a/go-controller/pkg/clustermanager/pod/allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go index 8e6d26e4bb..935150af90 100644 --- a/go-controller/pkg/clustermanager/pod/allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -12,6 +12,7 @@ import ( nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -25,8 +26,11 @@ import ( type PodAllocator struct { netInfo util.NetInfo - // allocator of IPs within subnets - allocator subnet.Allocator + // ipAllocator of IPs within subnets + ipAllocator subnet.Allocator + + // idAllocator of IDs within the network + idAllocator id.Allocator // An utility to allocate the PodAnnotation to pods podAnnotationAllocator *pod.PodAnnotationAllocator @@ -54,32 +58,45 @@ func NewPodAllocator(netInfo util.NetInfo, podLister listers.PodLister, kube kub // this network might not have IPAM, we will just allocate MAC addresses if util.DoesNetworkRequireIPAM(netInfo) { - podAllocator.allocator = subnet.NewAllocator() + podAllocator.ipAllocator = subnet.NewAllocator() } return podAllocator } -// InitRanges initializes the allocator with the subnets configured for the -// network -func (a *PodAllocator) InitRanges() error { - if a.netInfo.TopologyType() != types.LocalnetTopology { - return fmt.Errorf("topology %s not supported", a.netInfo.TopologyType()) +// Init initializes the allocator with as configured for the network +func (a *PodAllocator) Init() error { + var err error + if util.DoesNetworkRequireTunnelIDs(a.netInfo) { + a.idAllocator, err = id.NewIDAllocator(a.netInfo.GetNetworkName(), types.MaxLogicalPortTunnelKey) + if err != nil { + return err + } + // Reserve the id 0. We don't want to assign this id to any of the pods. + err = a.idAllocator.ReserveID("zero", 0) + if err != nil { + return err + } } - subnets := a.netInfo.Subnets() - ipNets := make([]*net.IPNet, 0, len(subnets)) - for _, subnet := range subnets { - ipNets = append(ipNets, subnet.CIDR) + if util.DoesNetworkRequireIPAM(a.netInfo) { + subnets := a.netInfo.Subnets() + ipNets := make([]*net.IPNet, 0, len(subnets)) + for _, subnet := range subnets { + ipNets = append(ipNets, subnet.CIDR) + } + + return a.ipAllocator.AddOrUpdateSubnet(a.netInfo.GetNetworkName(), ipNets, a.netInfo.ExcludeSubnets()...) } - return a.allocator.AddOrUpdateSubnet(a.netInfo.GetNetworkName(), ipNets, a.netInfo.ExcludeSubnets()...) + + return nil } // Reconcile allocates or releases IPs for pods updating the pod annotation // as necessary with all the additional information derived from those IPs func (a *PodAllocator) Reconcile(old, new *corev1.Pod) error { - releaseIPsFromAllocator := true - return a.reconcile(old, new, releaseIPsFromAllocator) + releaseFromAllocator := true + return a.reconcile(old, new, releaseFromAllocator) } // Sync initializes the allocator with pods that already exist on the cluster @@ -87,7 +104,7 @@ func (a *PodAllocator) Sync(objs []interface{}) error { // on sync, we don't release IPs from the allocator, we are just trying to // allocate annotated IPs; specifically we don't want to release IPs of // completed pods that might be being used by other pods - releaseIPsFromAllocator := false + releaseFromAllocator := false for _, obj := range objs { pod, ok := obj.(*corev1.Pod) @@ -95,7 +112,7 @@ func (a *PodAllocator) Sync(objs []interface{}) error { klog.Errorf("Could not cast %T object to *corev1.Pod", obj) continue } - err := a.reconcile(nil, pod, releaseIPsFromAllocator) + err := a.reconcile(nil, pod, releaseFromAllocator) if err != nil { klog.Errorf("Failed to sync pod %s/%s: %w", pod.Namespace, pod.Name, err) } @@ -104,7 +121,7 @@ func (a *PodAllocator) Sync(objs []interface{}) error { return nil } -func (a *PodAllocator) reconcile(old, new *corev1.Pod, releaseIPsFromAllocator bool) error { +func (a *PodAllocator) reconcile(old, new *corev1.Pod, releaseFromAllocator bool) error { var pod *corev1.Pod if old != nil { pod = old @@ -135,7 +152,7 @@ func (a *PodAllocator) reconcile(old, new *corev1.Pod, releaseIPsFromAllocator b // reconcile for each NAD for nadName, network := range networkMap { - err = a.reconcileForNAD(old, new, nadName, network, releaseIPsFromAllocator) + err = a.reconcileForNAD(old, new, nadName, network, releaseFromAllocator) if err != nil { return err } @@ -162,12 +179,7 @@ func (a *PodAllocator) reconcileForNAD(old, new *corev1.Pod, nad string, network return a.allocatePodOnNAD(pod, nad, network) } -func (a *PodAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted, releaseIPsFromAllocator bool) error { - if !util.DoesNetworkRequireIPAM(a.netInfo) { - // no need to release if no IPAM - return nil - } - +func (a *PodAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted, releaseFromAllocator bool) error { podAnnotation, _ := util.UnmarshalPodAnnotation(pod.Annotations, nad) if podAnnotation == nil { // track release pods even if they have no annotation in case a user @@ -177,10 +189,29 @@ func (a *PodAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted, uid := string(pod.UID) - // do not release IPs from the allocator if not flaged to do so or if they - // werea already previosuly released - if releaseIPsFromAllocator && !a.isPodReleased(nad, uid) { - err := a.allocator.ReleaseIPs(a.netInfo.GetNetworkName(), podAnnotation.IPs) + hasIPAM := util.DoesNetworkRequireIPAM(a.netInfo) + hasIDAllocation := util.DoesNetworkRequireTunnelIDs(a.netInfo) + + if !hasIPAM && !hasIDAllocation { + // we only take care of IP and tunnel ID allocation, if neither were + // allocated we have nothing to do + return nil + } + + // do not release from the allocators if not flaged to do so or if they + // were already previosuly released + doRelease := releaseFromAllocator && !a.isPodReleased(nad, uid) + doReleaseIDs := doRelease && hasIDAllocation + doReleaseIPs := doRelease && hasIPAM + + if doReleaseIDs { + name := podIdAllocationName(nad, uid) + a.idAllocator.ReleaseID(name) + klog.V(5).Infof("Released ID %d", podAnnotation.TunnelID) + } + + if doReleaseIPs { + err := a.ipAllocator.ReleaseIPs(a.netInfo.GetNetworkName(), podAnnotation.IPs) if err != nil { return fmt.Errorf("failed to release ips %v for pod %s/%s and nad %s: %w", util.StringSlice(podAnnotation.IPs), @@ -190,6 +221,7 @@ func (a *PodAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted, err, ) } + klog.V(5).Infof("Released IPs %v", util.StringSlice(podAnnotation.IPs)) } if podDeleted { @@ -204,14 +236,21 @@ func (a *PodAllocator) releasePodOnNAD(pod *corev1.Pod, nad string, podDeleted, func (a *PodAllocator) allocatePodOnNAD(pod *corev1.Pod, nad string, network *nettypes.NetworkSelectionElement) error { var ipAllocator subnet.NamedAllocator if util.DoesNetworkRequireIPAM(a.netInfo) { - ipAllocator = a.allocator.ForSubnet(a.netInfo.GetNetworkName()) + ipAllocator = a.ipAllocator.ForSubnet(a.netInfo.GetNetworkName()) + } + + var idAllocator id.NamedAllocator + if util.DoesNetworkRequireTunnelIDs(a.netInfo) { + name := podIdAllocationName(nad, string(pod.UID)) + idAllocator = a.idAllocator.ForName(name) } // don't reallocate to new IPs if currently annotated IPs fail to alloccate reallocate := false - updatedPod, podAnnotation, err := a.podAnnotationAllocator.AllocatePodAnnotation( + updatedPod, podAnnotation, err := a.podAnnotationAllocator.AllocatePodAnnotationWithTunnelID( ipAllocator, + idAllocator, pod, network, reallocate, @@ -222,11 +261,12 @@ func (a *PodAllocator) allocatePodOnNAD(pod *corev1.Pod, nad string, network *ne } if updatedPod != nil { - klog.V(5).Infof("Allocated IP addresses %v, mac address %s, gateways %v and routes %s for pod %s/%s on nad %s", + klog.V(5).Infof("Allocated IP addresses %v, mac address %s, gateways %v, routes %s and tunnel id %d for pod %s/%s on nad %s", util.StringSlice(podAnnotation.IPs), podAnnotation.MAC, util.StringSlice(podAnnotation.Gateways), util.StringSlice(podAnnotation.Routes), + podAnnotation.TunnelID, pod.Namespace, pod.Name, nad, ) } @@ -266,3 +306,7 @@ func (a *PodAllocator) isPodReleased(nad, uid string) bool { } return false } + +func podIdAllocationName(nad, uid string) string { + return fmt.Sprintf("%s/%s", nad, uid) +} diff --git a/go-controller/pkg/clustermanager/pod/allocator_test.go b/go-controller/pkg/clustermanager/pod/allocator_test.go index e5752d94c2..f002ee73df 100644 --- a/go-controller/pkg/clustermanager/pod/allocator_test.go +++ b/go-controller/pkg/clustermanager/pod/allocator_test.go @@ -9,9 +9,11 @@ import ( "github.com/stretchr/testify/mock" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -65,47 +67,67 @@ func (p testPod) getPod(t *testing.T) *corev1.Pod { return pod } -type allocatorStub struct { +type ipAllocatorStub struct { released bool } -func (a *allocatorStub) AddOrUpdateSubnet(name string, subnets []*net.IPNet, excludeSubnets ...*net.IPNet) error { +func (a *ipAllocatorStub) AddOrUpdateSubnet(name string, subnets []*net.IPNet, excludeSubnets ...*net.IPNet) error { panic("not implemented") // TODO: Implement } -func (a allocatorStub) DeleteSubnet(name string) { +func (a ipAllocatorStub) DeleteSubnet(name string) { panic("not implemented") // TODO: Implement } -func (a *allocatorStub) GetSubnets(name string) ([]*net.IPNet, error) { +func (a *ipAllocatorStub) GetSubnets(name string) ([]*net.IPNet, error) { panic("not implemented") // TODO: Implement } -func (a *allocatorStub) AllocateUntilFull(name string) error { +func (a *ipAllocatorStub) AllocateUntilFull(name string) error { panic("not implemented") // TODO: Implement } -func (a *allocatorStub) AllocateIPs(name string, ips []*net.IPNet) error { +func (a *ipAllocatorStub) AllocateIPs(name string, ips []*net.IPNet) error { panic("not implemented") // TODO: Implement } -func (a *allocatorStub) AllocateNextIPs(name string) ([]*net.IPNet, error) { +func (a *ipAllocatorStub) AllocateNextIPs(name string) ([]*net.IPNet, error) { panic("not implemented") // TODO: Implement } -func (a *allocatorStub) ReleaseIPs(name string, ips []*net.IPNet) error { +func (a *ipAllocatorStub) ReleaseIPs(name string, ips []*net.IPNet) error { a.released = true return nil } -func (a *allocatorStub) ConditionalIPRelease(name string, ips []*net.IPNet, predicate func() (bool, error)) (bool, error) { +func (a *ipAllocatorStub) ConditionalIPRelease(name string, ips []*net.IPNet, predicate func() (bool, error)) (bool, error) { panic("not implemented") // TODO: Implement } -func (a *allocatorStub) ForSubnet(name string) subnet.NamedAllocator { +func (a *ipAllocatorStub) ForSubnet(name string) subnet.NamedAllocator { return nil } +type idAllocatorStub struct { + released bool +} + +func (a *idAllocatorStub) AllocateID(name string) (int, error) { + panic("not implemented") // TODO: Implement +} + +func (a *idAllocatorStub) ReserveID(name string, id int) error { + panic("not implemented") // TODO: Implement +} + +func (a *idAllocatorStub) ReleaseID(name string) { + a.released = true +} + +func (a *idAllocatorStub) ForName(name string) id.NamedAllocator { + panic("not implemented") // TODO: Implement +} + func TestPodAllocator_reconcileForNAD(t *testing.T) { type args struct { old *testPod @@ -113,13 +135,15 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { release bool } tests := []struct { - name string - args args - ipam bool - tracked bool - expectAllocate bool - expectRelease bool - expectTracked bool + name string + args args + ipam bool + idAllocation bool + tracked bool + expectAllocate bool + expectIPRelease bool + expectIDRelease bool + expectTracked bool }{ { name: "Pod not scheduled", @@ -156,7 +180,7 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { expectAllocate: true, }, { - name: "Pod completed, release inactive", + name: "Pod completed, release inactive, IP allocation", ipam: true, args: args{ new: &testPod{ @@ -170,7 +194,33 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { expectTracked: true, }, { - name: "Pod completed, release active, not previously released", + name: "Pod completed, release inactive, ID allocation", + idAllocation: true, + args: args{ + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + }, + expectTracked: true, + }, + { + name: "Pod completed, release inactive, no allocation", + args: args{ + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + }, + }, + { + name: "Pod completed, release active, not previously released, IP allocation", ipam: true, args: args{ new: &testPod{ @@ -182,11 +232,27 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { }, release: true, }, - expectRelease: true, - expectTracked: true, + expectIPRelease: true, + expectTracked: true, + }, + { + name: "Pod completed, release active, not previously released, ID allocation", + idAllocation: true, + args: args{ + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + expectTracked: true, + expectIDRelease: true, }, { - name: "Pod completed, release active, not previously released, no IPAM", + name: "Pod completed, release active, not previously released, no allocation", args: args{ new: &testPod{ scheduled: true, @@ -199,7 +265,7 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { }, }, { - name: "Pod completed, release active, previously released", + name: "Pod completed, release active, previously released, IP allocation", ipam: true, args: args{ new: &testPod{ @@ -214,16 +280,30 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { tracked: true, expectTracked: true, }, + { + name: "Pod completed, release active, previously released, ID allocation", + idAllocation: true, + args: args{ + new: &testPod{ + scheduled: true, + completed: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + tracked: true, + expectTracked: true, + }, { name: "Pod deleted, not scheduled", - ipam: true, args: args{ old: &testPod{}, }, }, { name: "Pod deleted, on host network", - ipam: true, args: args{ old: &testPod{ hostNetwork: true, @@ -232,7 +312,6 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { }, { name: "Pod deleted, not on network", - ipam: true, args: args{ old: &testPod{ scheduled: true, @@ -240,7 +319,7 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { }, }, { - name: "Pod deleted, not previously released", + name: "Pod deleted, not previously released, IP allocation", ipam: true, args: args{ old: &testPod{ @@ -251,10 +330,36 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { }, release: true, }, - expectRelease: true, + expectIPRelease: true, + }, + { + name: "Pod deleted, not previously released, ID allocation", + idAllocation: true, + args: args{ + old: &testPod{ + scheduled: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + expectIDRelease: true, }, { - name: "Pod deleted, previously released", + name: "Pod deleted, not previously released, no allocation", + args: args{ + old: &testPod{ + scheduled: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + }, + { + name: "Pod deleted, previously released, IP allocation", ipam: true, args: args{ old: &testPod{ @@ -267,11 +372,26 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { }, tracked: true, }, + { + name: "Pod deleted, previously released, ID allocation", + idAllocation: true, + args: args{ + old: &testPod{ + scheduled: true, + network: &nadapi.NetworkSelectionElement{ + Name: "nad", + }, + }, + release: true, + }, + tracked: true, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - ipallocator := &allocatorStub{} + ipallocator := &ipAllocatorStub{} + idallocator := &idAllocatorStub{} podListerMock := &v1mocks.PodLister{} kubeMock := &kubemocks.Interface{} @@ -287,12 +407,14 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { ).Return(nil) netConf := &ovncnitypes.NetConf{ - Topology: types.LocalnetTopology, + Topology: types.Layer2Topology, } if tt.ipam { netConf.Subnets = "10.1.130.0/24" } + config.OVNKubernetesFeature.EnableInterconnect = tt.idAllocation + netInfo, err := util.NewNetInfo(netConf) if err != nil { t.Fatalf("Invalid netConf") @@ -307,7 +429,8 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { a := &PodAllocator{ netInfo: netInfo, - allocator: ipallocator, + ipAllocator: ipallocator, + idAllocator: idallocator, podAnnotationAllocator: podAnnotationAllocator, releasedPods: map[string]sets.Set[string]{}, releasedPodsMutex: sync.Mutex{}, @@ -335,8 +458,12 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { t.Errorf("expected pod ips allocated to be %v but it was %v", tt.expectAllocate, allocated) } - if tt.expectRelease != ipallocator.released { - t.Errorf("expected pod ips released to be %v but it was %v", tt.expectRelease, ipallocator.released) + if tt.expectIPRelease != ipallocator.released { + t.Errorf("expected pod ips released to be %v but it was %v", tt.expectIPRelease, ipallocator.released) + } + + if tt.expectIDRelease != idallocator.released { + t.Errorf("expected pod ID released to be %v but it was %v", tt.expectIPRelease, ipallocator.released) } if tt.expectTracked != a.releasedPods["namespace/nad"].Has("pod") { diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 73c6a19eac..4a69001278 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -194,4 +194,8 @@ const ( EgressServiceNoHost = "" // set on services with no allocated node EgressServiceNoSNATHost = "ALL" // set on services with sourceIPBy=Network + + // MaxLogicalPortTunnelKey is maximum tunnel key that can be requested for a + // Logical Switch or Router Port + MaxLogicalPortTunnelKey = 32767 ) From d9864de20ed6468486274cf1d5c55fec20bf259d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 20 Jun 2023 08:14:49 +0000 Subject: [PATCH 28/31] Change ID allocator to round robin strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tunnel IDs allocated to pods have a more dynamic lifecycle than IDs allocated to networks. Lets change the allocation strategy to round robin to reduce the posibility of potential collisions when a de-allocated tunnel ID is allocated to a pod while still being used by one of the zones that might have not yet process the original pod termination event. Signed-off-by: Jaime Caamaño Ruiz --- go-controller/pkg/allocator/id/allocator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-controller/pkg/allocator/id/allocator.go b/go-controller/pkg/allocator/id/allocator.go index 271973eb14..c21729d865 100644 --- a/go-controller/pkg/allocator/id/allocator.go +++ b/go-controller/pkg/allocator/id/allocator.go @@ -34,7 +34,7 @@ type idAllocator struct { // NewIDAllocator returns an IDAllocator func NewIDAllocator(name string, maxIds int) (Allocator, error) { - idBitmap := bitmapallocator.NewContiguousAllocationMap(maxIds, name) + idBitmap := bitmapallocator.NewRoundRobinAllocationMap(maxIds, name) return &idAllocator{ nameIdMap: sync.Map{}, From fd07c045b79666f372fe283f28c887e90d604a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 13 Jun 2023 11:11:55 +0000 Subject: [PATCH 29/31] Enable cluster manager support for layer2 networks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will enable cluster manager to allocate the PodAnnotation to pods on interconnect instead of the zone controllers. Signed-off-by: Jaime Caamaño Ruiz --- .../secondary_network_cluster_manager.go | 11 +++++++++-- .../clustermanager/secondary_network_unit_test.go | 12 ++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index 4ea1d92054..56d3c52a4f 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -92,8 +92,8 @@ func (sncm *secondaryNetworkClusterManager) Stop() { } // NewNetworkController implements the networkAttachDefController.NetworkControllerManager -// interface function. This function is called by the net-attach-def controller when -// a layer2 or layer3 secondary network is created. Layer2 type is not handled here. +// interface function. This function is called by the net-attach-def controller when +// a secondary network is created. func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetInfo) (nad.NetworkController, error) { if !sncm.isTopologyManaged(nInfo) { return nil, nad.ErrNetworkControllerTopologyNotManaged @@ -113,8 +113,15 @@ func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetI func (sncm *secondaryNetworkClusterManager) isTopologyManaged(nInfo util.NetInfo) bool { switch nInfo.TopologyType() { case ovntypes.Layer3Topology: + // we need to allocate subnets to each node regardless of configuration return true + case ovntypes.Layer2Topology: + // for IC, pod IPs and tunnel IDs need to be allocated + // in non IC config, this is done from ovnkube-master network controller + return config.OVNKubernetesFeature.EnableInterconnect case ovntypes.LocalnetTopology: + // for IC, pod IPs need to be allocated + // in non IC config, this is done from ovnkube-master network controller return config.OVNKubernetesFeature.EnableInterconnect && len(nInfo.Subnets()) > 0 } return false diff --git a/go-controller/pkg/clustermanager/secondary_network_unit_test.go b/go-controller/pkg/clustermanager/secondary_network_unit_test.go index 7bfda11e47..2b90c401c7 100644 --- a/go-controller/pkg/clustermanager/secondary_network_unit_test.go +++ b/go-controller/pkg/clustermanager/secondary_network_unit_test.go @@ -147,6 +147,8 @@ var _ = ginkgo.Describe("Secondary Layer3 Cluster Controller Manager", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) config.Kubernetes.HostNetworkNamespace = "" + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableInterconnect = true f, err = factory.NewClusterManagerWatchFactory(fakeClient) gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = f.Start() @@ -156,10 +158,20 @@ var _ = ginkgo.Describe("Secondary Layer3 Cluster Controller Manager", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) netInfo, err := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: "blue"}, Topology: ovntypes.Layer2Topology}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + config.OVNKubernetesFeature.EnableInterconnect = false nc, err := sncm.NewNetworkController(netInfo) gomega.Expect(err).To(gomega.Equal(nad.ErrNetworkControllerTopologyNotManaged)) gomega.Expect(nc).To(gomega.BeNil()) + config.OVNKubernetesFeature.EnableInterconnect = true + nc, err = sncm.NewNetworkController(netInfo) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(nc).NotTo(gomega.BeNil()) + + err = nc.Start(ctx.Context) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return nil } From 2cd1adf21a6db70923f60fe575258f5c7a0722a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 11 Jul 2023 11:55:54 +0000 Subject: [PATCH 30/31] e2e multi-homing: fix uneeded replacement of network name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR#3609 removed the suffix `_br-localnet` from the localnet network name used in the bridge mappings, but also inadvertedly removed the replacement of `-` and `/` characters form the controller code but not from the tests. Let's remove them from tests as well as they don't look troublesome neither as network name in the logical switch port options nor in the OVS bridge mappings. Signed-off-by: Jaime Caamaño Ruiz --- test/e2e/localnet-underlay.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/e2e/localnet-underlay.go b/test/e2e/localnet-underlay.go index 2e54f8d8de..d09600cbd7 100644 --- a/test/e2e/localnet-underlay.go +++ b/test/e2e/localnet-underlay.go @@ -156,8 +156,6 @@ func defaultNetworkBridgeMapping() BridgeMapping { } func bridgeMapping(physnet, ovsBridge string) BridgeMapping { - physnet = strings.ReplaceAll(physnet, "-", ".") - physnet = strings.ReplaceAll(physnet, "/", ".") return BridgeMapping{ physnet: physnet, ovsBridge: ovsBridge, From d30ee3cfcb636affb1c33ccb7ce4a341685d3223 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 11 Jul 2023 14:55:54 -0500 Subject: [PATCH 31/31] subnetallocator: fix and clean up used vs. count confusion The original Usage() returned both used and available subnets for the allocator, which is confusing and error prone. The fixed commit used the "count" (eg used+unused) for both count and usage, leading to OpenShift alerts about subnets exceeded because (usage / count) = 1. Instead, break the API into two separate calls with clear meaning. Usage() returns how many subnets are allocated, and Count() returns how many are possible/available. { fail [github.com/openshift/origin/test/extended/prometheus/prometheus.go:587]: Unexpected error: : promQL query returned unexpected results: ALERTS{alertname!~"Watchdog|AlertmanagerReceiversNotConfigured|PrometheusRemoteWriteDesiredShards|KubeJobFailed|Watchdog|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|etcdMembersDown|etcdMembersDown|etcdGRPCRequestsSlow|etcdGRPCRequestsSlow|etcdHighNumberOfFailedGRPCRequests|etcdHighNumberOfFailedGRPCRequests|etcdMemberCommunicationSlow|etcdMemberCommunicationSlow|etcdNoLeader|etcdNoLeader|etcdHighFsyncDurations|etcdHighFsyncDurations|etcdHighCommitDurations|etcdHighCommitDurations|etcdInsufficientMembers|etcdInsufficientMembers|etcdHighNumberOfLeaderChanges|etcdHighNumberOfLeaderChanges|KubeAPIErrorBudgetBurn|KubeAPIErrorBudgetBurn|KubeClientErrors|KubeClientErrors|KubePersistentVolumeErrors|KubePersistentVolumeErrors|MCDDrainError|MCDDrainError|KubeMemoryOvercommit|KubeMemoryOvercommit|MCDPivotError|MCDPivotError|PrometheusOperatorWatchErrors|PrometheusOperatorWatchErrors|OVNKubernetesResourceRetryFailure|OVNKubernetesResourceRetryFailure|RedhatOperatorsCatalogError|RedhatOperatorsCatalogError|VSphereOpenshiftNodeHealthFail|VSphereOpenshiftNodeHealthFail|SamplesImagestreamImportFailing|SamplesImagestreamImportFailing",alertstate="firing",severity!="info"} >= 1 [ { "metric": { "__name__": "ALERTS", "alertname": "V4SubnetAllocationThresholdExceeded", "alertstate": "firing", "container": "kube-rbac-proxy", "endpoint": "metrics", "instance": "10.0.0.4:9102", "job": "ovnkube-master", "namespace": "openshift-ovn-kubernetes", "pod": "ovnkube-master-hjp2x", "prometheus": "openshift-monitoring/k8s", "service": "ovn-kubernetes-master", "severity": "warning" }, "value": [ 1689091264.978, "1" ] } ] [ <*errors.errorString | 0xc00194d430>{ s: "promQL query returned unexpected results:\nALERTS{alertname!~\"Watchdog|AlertmanagerReceiversNotConfigured|PrometheusRemoteWriteDesiredShards|KubeJobFailed|Watchdog|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|KubePodNotReady|etcdMembersDown|etcdMembersDown|etcdGRPCRequestsSlow|etcdGRPCRequestsSlow|etcdHighNumberOfFailedGRPCRequests|etcdHighNumberOfFailedGRPCRequests|etcdMemberCommunicationSlow|etcdMemberCommunicationSlow|etcdNoLeader|etcdNoLeader|etcdHighFsyncDurations|etcdHighFsyncDurations|etcdHighCommitDurations|etcdHighCommitDurations|etcdInsufficientMembers|etcdInsufficientMembers|etcdHighNumberOfLeaderChanges|etcdHighNumberOfLeaderChanges|KubeAPIErrorBudgetBurn|KubeAPIErrorBudgetBurn|KubeClientErrors|KubeClientErrors|KubePersistentVolumeErrors|KubePersistentVolumeErrors|MCDDrainError|MCDDrainError|KubeMemoryOvercommit|KubeMemoryOvercommit|MCDPivotError|MCDPivotError|PrometheusOperatorWatchErrors|PrometheusOperatorWatchErrors|OVNKubernetesResourceRetryFailure|OVNKubernetesResourceRetryFailure|RedhatOperatorsCatalogError|RedhatOperatorsCatalogError|VSphereOpenshiftNodeHealthFail|VSphereOpenshiftNodeHealthFail|SamplesImagestreamImportFailing|SamplesImagestreamImportFailing\",alertstate=\"firing\",severity!=\"info\"} >= 1\n[\n {\n \"metric\": {\n \"__name__\": \"ALERTS\",\n \"alertname\": \"V4SubnetAllocationThresholdExceeded\",\n \"alertstate\": \"firing\",\n \"container\": \"kube-rbac-proxy\",\n \"endpoint\": \"metrics\",\n \"instance\": \"10.0.0.4:9102\",\n \"job\": \"ovnkube-master\",\n \"namespace\": \"openshift-ovn-kubernetes\",\n \"pod\": \"ovnkube-master-hjp2x\",\n \"prometheus\": \"openshift-monitoring/k8s\",\n \"service\": \"ovn-kubernetes-master\",\n \"severity\": \"warning\"\n },\n \"value\": [\n 1689091264.978,\n \"1\"\n ]\n }\n]", }, ] occurred Fixes: d86604bf2074 ("Move subnet handling out of network cluster controller") Signed-off-by: Dan Williams --- .../subnetallocator/allocator.go | 47 ++++++++++++------- .../subnetallocator/allocator_test.go | 30 +++++++----- .../subnetallocator/host_subnet_allocator.go | 6 +-- .../host_subnet_allocator_test.go | 4 +- 4 files changed, 54 insertions(+), 33 deletions(-) diff --git a/go-controller/pkg/clustermanager/subnetallocator/allocator.go b/go-controller/pkg/clustermanager/subnetallocator/allocator.go index f9b24f85ea..e1bbb329fe 100644 --- a/go-controller/pkg/clustermanager/subnetallocator/allocator.go +++ b/go-controller/pkg/clustermanager/subnetallocator/allocator.go @@ -14,9 +14,10 @@ var ErrSubnetAllocatorFull = fmt.Errorf("no subnets available.") type SubnetAllocator interface { AddNetworkRange(network *net.IPNet, hostSubnetLen int) error MarkAllocatedNetworks(string, ...*net.IPNet) error - // Usage returns the number of available and used v4 subnets, and - // the number of available and used v6 subnets - Usage() (uint64, uint64, uint64, uint64) + // Usage returns the number of used/allocated v4 and v6 subnets + Usage() (uint64, uint64) + // Count returns the number available (both used and unused) v4 and v6 subnets + Count() (uint64, uint64) AllocateNetworks(string) ([]*net.IPNet, error) AllocateIPv4Network(string) (*net.IPNet, error) AllocateIPv6Network(string) (*net.IPNet, error) @@ -40,21 +41,28 @@ func NewSubnetAllocator() SubnetAllocator { return &BaseSubnetAllocator{} } -// Usage returns the number of allocated and used IPv4 subnets, and the number -// of allocated and used IPv6 subnets. -func (sna *BaseSubnetAllocator) Usage() (uint64, uint64, uint64, uint64) { - var v4count, v4used, v6count, v6used uint64 +// Usage returns the number of used/allocated v4 and v6 subnets +func (sna *BaseSubnetAllocator) Usage() (uint64, uint64) { + var v4used, v6used uint64 for _, snr := range sna.v4ranges { - c, u := snr.usage() - v4count = v4count + c - v4used = v4used + u + v4used = v4used + snr.usage() } for _, snr := range sna.v6ranges { - c, u := snr.usage() - v6count = v6count + c - v6used = v6used + u + v6used = v6used + snr.usage() } - return v4count, v4used, v6count, v6used + return v4used, v6used +} + +// Count returns the number of available (both used and unused) v4 and v6 subnets +func (sna *BaseSubnetAllocator) Count() (uint64, uint64) { + var v4count, v6count uint64 + for _, snr := range sna.v4ranges { + v4count = v4count + snr.count() + } + for _, snr := range sna.v6ranges { + v6count = v6count + snr.count() + } + return v4count, v6count } // AddNetworkRange makes the given range available for allocation and returns @@ -302,10 +310,15 @@ func newSubnetAllocatorRange(network *net.IPNet, hostSubnetLen int) (*subnetAllo return snr, nil } -// usage returns the number of available subnets and the number of allocated subnets -func (snr *subnetAllocatorRange) usage() (uint64, uint64) { +// usage returns the number of used/allocated subnets +func (snr *subnetAllocatorRange) usage() uint64 { + return uint64(snr.used) +} + +// count returns the number of available subnets (both used and unused) +func (snr *subnetAllocatorRange) count() uint64 { var one uint64 = 1 - return one << snr.subnetBits, uint64(snr.used) + return one << snr.subnetBits } type alreadyOwnedError struct { diff --git a/go-controller/pkg/clustermanager/subnetallocator/allocator_test.go b/go-controller/pkg/clustermanager/subnetallocator/allocator_test.go index 20b56922aa..0becb6ec73 100644 --- a/go-controller/pkg/clustermanager/subnetallocator/allocator_test.go +++ b/go-controller/pkg/clustermanager/subnetallocator/allocator_test.go @@ -55,17 +55,25 @@ func allocateExpected(sna SubnetAllocator, n int, expected ...string) error { return nil } -func allocateNotExpected(sna SubnetAllocator, n int) error { +func allocateNotExpected(sna SubnetAllocator, v4n, v6n int) error { if sns, err := sna.AllocateNetworks(testNodeName); err == nil { - return fmt.Errorf("unexpectedly succeeded in allocating %s (sns=%v)", networkID(n), sns) + return fmt.Errorf("unexpectedly succeeded in allocating v4=%s, v6=%s (sns=%v)", networkID(v4n), networkID(v6n), sns) } else if err != ErrSubnetAllocatorFull { return fmt.Errorf("returned error was not ErrSubnetAllocatorFull (%v)", err) } + + v4used, v6used := sna.Usage() + if v4n >= 0 && v4used != uint64(v4n) { + return fmt.Errorf("expected %d available v4 subnets but got %d", v4n, v4used) + } + if v6n >= 0 && v6used != uint64(v6n) { + return fmt.Errorf("expected %d available v6 subnets but got %d", v6n, v6used) + } return nil } func expectNumSubnets(t *testing.T, sna SubnetAllocator, v4expected, v6expected uint64) error { - v4count, _, v6count, _ := sna.Usage() + v4count, v6count := sna.Count() if v4count != v4expected { return fmt.Errorf("expected %d available v4 subnets but got %d", v4expected, v4count) } @@ -90,7 +98,7 @@ func TestAllocateSubnetIPv4(t *testing.T) { t.Fatal(err) } } - if err := allocateNotExpected(sna, 256); err != nil { + if err := allocateNotExpected(sna, 256, 0); err != nil { t.Fatal(err) } } @@ -142,7 +150,7 @@ func TestAllocateSubnetLargeHostBitsIPv4(t *testing.T) { t.Fatal(err) } } - if err := allocateNotExpected(sna, 64); err != nil { + if err := allocateNotExpected(sna, 64, 0); err != nil { t.Fatal(err) } } @@ -285,7 +293,7 @@ func TestAllocateSubnetNoSubnetBitsIPv4(t *testing.T) { if err := allocateExpected(sna, 0, "10.1.0.0/16"); err != nil { t.Fatal(err) } - if err := allocateNotExpected(sna, 1); err != nil { + if err := allocateNotExpected(sna, 1, 0); err != nil { t.Fatal(err) } } @@ -300,7 +308,7 @@ func TestAllocateSubnetNoSubnetBitsIPv6(t *testing.T) { if err := allocateExpected(sna, 0, "fd01::/64"); err != nil { t.Fatal(err) } - if err := allocateNotExpected(sna, 1); err != nil { + if err := allocateNotExpected(sna, 0, 1); err != nil { t.Fatal(err) } } @@ -501,7 +509,7 @@ func TestMultipleSubnets(t *testing.T) { } } - if err := allocateNotExpected(sna, 8); err != nil { + if err := allocateNotExpected(sna, 8, 0); err != nil { t.Fatal(err) } @@ -519,7 +527,7 @@ func TestMultipleSubnets(t *testing.T) { t.Fatal(err) } - if err := allocateNotExpected(sna, -1); err != nil { + if err := allocateNotExpected(sna, -1, 0); err != nil { t.Fatal(err) } } @@ -555,7 +563,7 @@ func TestDualStack(t *testing.T) { } } - if err := allocateNotExpected(sna, 8); err != nil { + if err := allocateNotExpected(sna, 8, 8); err != nil { t.Fatal(err) } @@ -577,7 +585,7 @@ func TestDualStack(t *testing.T) { t.Fatal(err) } - if err := allocateNotExpected(sna, -1); err != nil { + if err := allocateNotExpected(sna, -1, -1); err != nil { t.Fatal(err) } } diff --git a/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator.go b/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator.go index 85368ff56b..d77074b22f 100644 --- a/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator.go +++ b/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator.go @@ -81,7 +81,7 @@ func (sna *HostSubnetAllocator) hasHybridOverlayAllocation() bool { func (sna *HostSubnetAllocator) recordSubnetCount() { // only for the default network if !sna.netInfo.IsSecondary() { - v4count, _, v6count, _ := sna.clusterSubnetAllocator.Usage() + v4count, v6count := sna.clusterSubnetAllocator.Count() metrics.RecordSubnetCount(float64(v4count), float64(v6count)) } } @@ -89,8 +89,8 @@ func (sna *HostSubnetAllocator) recordSubnetCount() { func (sna *HostSubnetAllocator) recordSubnetUsage() { // only for the default network if !sna.netInfo.IsSecondary() { - v4count, _, v6count, _ := sna.clusterSubnetAllocator.Usage() - metrics.RecordSubnetUsage(float64(v4count), float64(v6count)) + v4used, v6used := sna.clusterSubnetAllocator.Usage() + metrics.RecordSubnetUsage(float64(v4used), float64(v6used)) } } diff --git a/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator_test.go b/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator_test.go index 744133e133..ce0fb66ce2 100644 --- a/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator_test.go +++ b/go-controller/pkg/clustermanager/subnetallocator/host_subnet_allocator_test.go @@ -303,7 +303,7 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { } // test network allocation works correctly - _, v4usedBefore, _, v6usedBefore := sna.clusterSubnetAllocator.Usage() + v4usedBefore, v6usedBefore := sna.clusterSubnetAllocator.Usage() got, allocated, err := sna.allocateNodeSubnets(sna.clusterSubnetAllocator, "testNode", nil, true, true) if err == nil { t.Fatalf("allocateNodeSubnets() expected error but got success") @@ -315,7 +315,7 @@ func TestController_allocateNodeSubnets_ReleaseOnError(t *testing.T) { t.Fatalf("allocateNodeSubnets() expected no allocated subnets, got %v", allocated) } - _, v4usedAfter, _, v6usedAfter := sna.clusterSubnetAllocator.Usage() + v4usedAfter, v6usedAfter := sna.clusterSubnetAllocator.Usage() if v4usedAfter != v4usedBefore { t.Fatalf("Expected %d v4 allocated subnets, but got %d", v4usedBefore, v4usedAfter) }