nb: Add support for Load_Balancer_Groups.

For deployments when a large number of load balancers are associated to multiple logical switches/routers, introduce a syntactic sugar in the OVN_Northbound database (Load_Balancer_Groups) to simplify configuration. Instead of associating N Load_Balancer records to M Logical_Switches (M x N references in the NB database) we can instead create a single Load_Balancer_Group record, associate all N Load_Balancer records to it, and associate it to all M Logical_Switches (in total M + N references in the NB database). This makes it easier for the CMS to configure Load Balancers (e.g., in the ovn-kubernetes use case cluster load balancers are applied to all node logical switches and node logical gateway routers) but also drastically improves performance on the ovsdb-server NB side. This happens thanks to the fact that ovsdb-server now has to track M times less references. With a micro benchmark which creates 120 logical switches and associates 1000 load balancers to them (with ovn-nbctl daemon) we measure: CPU Time NB DB CPU Time ovn-nbctl ----------------------------------------------------- Plain LB: 30s 35s LB Groups: 1s 2s Reported-at: https://bugzilla.redhat.com/2001528 Signed-off-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Numan Siddique <numans@ovn.org>
ovn-org · Oct 6, 2021 · f6aba21 · f6aba21
1 parent c1e3896
commit f6aba21
Show file tree

Hide file tree

Showing 6 changed files with 424 additions and 130 deletions.
diff --git a/NEWS b/NEWS
@@ -4,6 +4,8 @@ Post v21.09.0
     installed together with other flows when a logical switch port is created,
     without having to wait for the port to be UP.  CMS should set it to false
     if not desired.
+  - Added Load_Balancer_Group support, which simplifies large scale
+    configurations of load balancers.
 
 OVN v21.09.0 - xx xxx xxxx
 --------------------------

diff --git a/northd/northd.c b/northd/northd.c
@@ -827,17 +827,74 @@ static void destroy_router_enternal_ips(struct ovn_datapath *od)
     sset_destroy(&od->external_ips);
 }
 
+static bool
+lb_has_vip(const struct nbrec_load_balancer *lb)
+{
+    return !smap_is_empty(&lb->vips);
+}
+
+static bool
+lb_group_has_vip(const struct nbrec_load_balancer_group *lb_group)
+{
+    for (size_t i = 0; i < lb_group->n_load_balancer; i++) {
+        if (lb_has_vip(lb_group->load_balancer[i])) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool
+ls_has_lb_vip(struct ovn_datapath *od)
+{
+    for (size_t i = 0; i < od->nbs->n_load_balancer; i++) {
+        if (lb_has_vip(od->nbs->load_balancer[i])) {
+            return true;
+        }
+    }
+
+    for (size_t i = 0; i < od->nbs->n_load_balancer_group; i++) {
+        if (lb_group_has_vip(od->nbs->load_balancer_group[i])) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool
+lr_has_lb_vip(struct ovn_datapath *od)
+{
+    for (size_t i = 0; i < od->nbr->n_load_balancer; i++) {
+        if (lb_has_vip(od->nbr->load_balancer[i])) {
+            return true;
+        }
+    }
+
+    for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) {
+        if (lb_group_has_vip(od->nbr->load_balancer_group[i])) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static void
-init_lb_ips(struct ovn_datapath *od)
+init_lb_for_datapath(struct ovn_datapath *od)
 {
     sset_init(&od->lb_ips_v4);
     sset_init(&od->lb_ips_v4_routable);
     sset_init(&od->lb_ips_v6);
     sset_init(&od->lb_ips_v6_routable);
+
+    if (od->nbs) {
+        od->has_lb_vip = ls_has_lb_vip(od);
+    } else {
+        od->has_lb_vip = lr_has_lb_vip(od);
+    }
 }
 
 static void
-destroy_lb_ips(struct ovn_datapath *od)
+destroy_lb_for_datapath(struct ovn_datapath *od)
 {
     if (!od->nbs && !od->nbr) {
         return;
@@ -895,7 +952,7 @@ ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
         free(od->router_ports);
         destroy_nat_entries(od);
         destroy_router_enternal_ips(od);
-        destroy_lb_ips(od);
+        destroy_lb_for_datapath(od);
         free(od->nat_entries);
         free(od->localnet_ports);
         free(od->l3dgw_ports);
@@ -1219,7 +1276,7 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
 
         init_ipam_info_for_datapath(od);
         init_mcast_info_for_datapath(od);
-        init_lb_ips(od);
+        init_lb_for_datapath(od);
     }
 
     const struct nbrec_logical_router *nbr;
@@ -1252,7 +1309,7 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
         init_mcast_info_for_datapath(od);
         init_nat_entries(od);
         init_router_external_ips(od);
-        init_lb_ips(od);
+        init_lb_for_datapath(od);
         if (smap_get(&od->nbr->options, "chassis")) {
             od->is_gw_router = true;
         }
@@ -2585,7 +2642,7 @@ get_nat_addresses(const struct ovn_port *op, size_t *n, bool routable_only)
     size_t n_nats = 0;
     struct eth_addr mac;
     if (!op || !op->nbrp || !op->od || !op->od->nbr
-        || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
+        || (!op->od->nbr->n_nat && !op->od->has_lb_vip)
         || !eth_addr_from_string(op->nbrp->mac, &mac)
         || op->od->n_l3dgw_ports > 1) {
         *n = n_nats;
@@ -3555,7 +3612,7 @@ build_ovn_lr_lbs(struct hmap *datapaths, struct hmap *lbs)
         }
         if (!smap_get(&od->nbr->options, "chassis")
             && od->n_l3dgw_ports != 1) {
-            if (od->n_l3dgw_ports > 1 && od->nbr->n_load_balancer) {
+            if (od->n_l3dgw_ports > 1 && od->has_lb_vip) {
                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
                 VLOG_WARN_RL(&rl, "Load-balancers are configured on logical "
                              "router %s, which has %"PRIuSIZE" distributed "
@@ -3573,6 +3630,17 @@ build_ovn_lr_lbs(struct hmap *datapaths, struct hmap *lbs)
             lb = ovn_northd_lb_find(lbs, lb_uuid);
             ovn_northd_lb_add_lr(lb, od);
         }
+
+        for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) {
+            const struct nbrec_load_balancer_group *lbg =
+                od->nbr->load_balancer_group[i];
+            for (size_t j = 0; j < lbg->n_load_balancer; j++) {
+                const struct uuid *lb_uuid =
+                    &lbg->load_balancer[j]->header_.uuid;
+                lb = ovn_northd_lb_find(lbs, lb_uuid);
+                ovn_northd_lb_add_lr(lb, od);
+            }
+        }
     }
 }
 
@@ -3603,6 +3671,17 @@ build_ovn_lbs(struct northd_context *ctx, struct hmap *datapaths,
             lb = ovn_northd_lb_find(lbs, lb_uuid);
             ovn_northd_lb_add_ls(lb, od);
         }
+
+        for (size_t i = 0; i < od->nbs->n_load_balancer_group; i++) {
+            const struct nbrec_load_balancer_group *lbg =
+                od->nbs->load_balancer_group[i];
+            for (size_t j = 0; j < lbg->n_load_balancer; j++) {
+                const struct uuid *lb_uuid =
+                    &lbg->load_balancer[j]->header_.uuid;
+                lb = ovn_northd_lb_find(lbs, lb_uuid);
+                ovn_northd_lb_add_ls(lb, od);
+            }
+        }
     }
 
     /* Delete any stale SB load balancer rows. */
@@ -3711,6 +3790,26 @@ build_ovn_lb_svcs(struct northd_context *ctx, struct hmap *ports,
     hmap_destroy(&monitor_map);
 }
 
+static void
+build_lrouter_lb_ips(struct ovn_datapath *od, const struct ovn_northd_lb *lb)
+{
+    bool is_routable = smap_get_bool(&lb->nlb->options, "add_route",  false);
+    const char *ip_address;
+
+    SSET_FOR_EACH (ip_address, &lb->ips_v4) {
+        sset_add(&od->lb_ips_v4, ip_address);
+        if (is_routable) {
+            sset_add(&od->lb_ips_v4_routable, ip_address);
+        }
+    }
+    SSET_FOR_EACH (ip_address, &lb->ips_v6) {
+        sset_add(&od->lb_ips_v6, ip_address);
+        if (is_routable) {
+            sset_add(&od->lb_ips_v6_routable, ip_address);
+        }
+    }
+}
+
 static void
 build_lrouter_lbs(struct hmap *datapaths, struct hmap *lbs)
 {
@@ -3725,20 +3824,17 @@ build_lrouter_lbs(struct hmap *datapaths, struct hmap *lbs)
             struct ovn_northd_lb *lb =
                 ovn_northd_lb_find(lbs,
                                    &od->nbr->load_balancer[i]->header_.uuid);
-            const char *ip_address;
-            bool is_routable = smap_get_bool(&lb->nlb->options, "add_route",
-                                             false);
-            SSET_FOR_EACH (ip_address, &lb->ips_v4) {
-                sset_add(&od->lb_ips_v4, ip_address);
-                if (is_routable) {
-                    sset_add(&od->lb_ips_v4_routable, ip_address);
-                }
-            }
-            SSET_FOR_EACH (ip_address, &lb->ips_v6) {
-                sset_add(&od->lb_ips_v6, ip_address);
-                if (is_routable) {
-                    sset_add(&od->lb_ips_v6_routable, ip_address);
-                }
+            build_lrouter_lb_ips(od, lb);
+        }
+
+        for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) {
+            const struct nbrec_load_balancer_group *lbg =
+                od->nbr->load_balancer_group[i];
+            for (size_t j = 0; j < lbg->n_load_balancer; j++) {
+                struct ovn_northd_lb *lb =
+                    ovn_northd_lb_find(lbs,
+                                       &lbg->load_balancer[j]->header_.uuid);
+                build_lrouter_lb_ips(od, lb);
             }
         }
     }
@@ -5520,22 +5616,8 @@ build_empty_lb_event_flow(struct ovn_lb_vip *lb_vip,
     return true;
 }
 
-static bool
-ls_has_lb_vip(struct ovn_datapath *od)
-{
-    for (int i = 0; i < od->nbs->n_load_balancer; i++) {
-        struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
-        if (!smap_is_empty(&nb_lb->vips)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
 static void
-build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
-             struct hmap *lbs)
+build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
 {
     /* Do not send ND packets to conntrack */
     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
@@ -5570,49 +5652,41 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
                                  110, lflows);
     }
 
-    for (int i = 0; i < od->nbs->n_load_balancer; i++) {
-        struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i];
-        struct ovn_northd_lb *lb =
-            ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
-        ovs_assert(lb);
-
-        /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
-         * packet to conntrack for defragmentation and possibly for unNATting.
-         *
-         * Send all the packets to conntrack in the ingress pipeline if the
-         * logical switch has a load balancer with VIP configured. Earlier
-         * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress
-         * pipeline if the IP destination matches the VIP. But this causes
-         * few issues when a logical switch has no ACLs configured with
-         * allow-related.
-         * To understand the issue, lets a take a TCP load balancer -
-         * 10.0.0.10:80=10.0.0.3:80.
-         * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection
-         * with the VIP - 10.0.0.10, then the packet in the ingress pipeline
-         * of 'p1' is sent to the p1's conntrack zone id and the packet is
-         * load balanced to the backend - 10.0.0.3. For the reply packet from
-         * the backend lport, it is not sent to the conntrack of backend
-         * lport's zone id. This is fine as long as the packet is valid.
-         * Suppose the backend lport sends an invalid TCP packet (like
-         * incorrect sequence number), the packet gets * delivered to the
-         * lport 'p1' without unDNATing the packet to the VIP - 10.0.0.10.
-         * And this causes the connection to be reset by the lport p1's VIF.
-         *
-         * We can't fix this issue by adding a logical flow to drop ct.inv
-         * packets in the egress pipeline since it will drop all other
-         * connections not destined to the load balancers.
-         *
-         * To fix this issue, we send all the packets to the conntrack in the
-         * ingress pipeline if a load balancer is configured. We can now
-         * add a lflow to drop ct.inv packets.
-         */
-        if (lb->n_vips) {
-            ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
-                          100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
-            ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
-                          100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
-            break;
-        }
+    /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
+     * packet to conntrack for defragmentation and possibly for unNATting.
+     *
+     * Send all the packets to conntrack in the ingress pipeline if the
+     * logical switch has a load balancer with VIP configured. Earlier
+     * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress
+     * pipeline if the IP destination matches the VIP. But this causes
+     * few issues when a logical switch has no ACLs configured with
+     * allow-related.
+     * To understand the issue, lets a take a TCP load balancer -
+     * 10.0.0.10:80=10.0.0.3:80.
+     * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection
+     * with the VIP - 10.0.0.10, then the packet in the ingress pipeline
+     * of 'p1' is sent to the p1's conntrack zone id and the packet is
+     * load balanced to the backend - 10.0.0.3. For the reply packet from
+     * the backend lport, it is not sent to the conntrack of backend
+     * lport's zone id. This is fine as long as the packet is valid.
+     * Suppose the backend lport sends an invalid TCP packet (like
+     * incorrect sequence number), the packet gets * delivered to the
+     * lport 'p1' without unDNATing the packet to the VIP - 10.0.0.10.
+     * And this causes the connection to be reset by the lport p1's VIF.
+     *
+     * We can't fix this issue by adding a logical flow to drop ct.inv
+     * packets in the egress pipeline since it will drop all other
+     * connections not destined to the load balancers.
+     *
+     * To fix this issue, we send all the packets to the conntrack in the
+     * ingress pipeline if a load balancer is configured. We can now
+     * add a lflow to drop ct.inv packets.
+     */
+    if (od->has_lb_vip) {
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
+                      100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
+        ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
+                      100, "ip", REGBIT_CONNTRACK_NAT" = 1; next;");
     }
 }
 
@@ -7294,15 +7368,13 @@ static void
 build_lswitch_lflows_pre_acl_and_acl(struct ovn_datapath *od,
                                      struct hmap *port_groups,
                                      struct hmap *lflows,
-                                     struct shash *meter_groups,
-                                     struct hmap *lbs)
+                                     struct shash *meter_groups)
 {
     if (od->nbs) {
-        od->has_lb_vip = ls_has_lb_vip(od);
         ls_get_acl_flags(od);
 
         build_pre_acls(od, port_groups, lflows);
-        build_pre_lb(od, lflows, lbs);
+        build_pre_lb(od, lflows);
         build_pre_stateful(od, lflows);
         build_acl_hints(od, lflows);
         build_acls(od, lflows, port_groups, meter_groups);
@@ -12552,7 +12624,7 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
      * flag set. Some NICs are unable to offload these flows.
      */
     if ((od->is_gw_router || od->n_l3dgw_ports) &&
-        (od->nbr->n_nat || od->nbr->n_load_balancer)) {
+        (od->nbr->n_nat || od->has_lb_vip)) {
         ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 50,
                       "ip", "flags.loopback = 1; ct_dnat;");
         ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 50,
@@ -12771,7 +12843,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
 {
     /* Build Logical Switch Flows. */
     build_lswitch_lflows_pre_acl_and_acl(od, lsi->port_groups, lsi->lflows,
-                                         lsi->meter_groups, lsi->lbs);
+                                         lsi->meter_groups);
 
     build_fwd_group_lflows(od, lsi->lflows);
     build_lswitch_lflows_admission_control(od, lsi->lflows);