From 35b00c7e79900787e510bb203c5cddfb895f6c4f Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Wed, 2 Dec 2020 23:57:56 +0530 Subject: [PATCH] northd: Add ECMP support to router policies. A user can add a policy now like: ovn-nbctl lr-policy-add 100 "ip4.src == 10.0.0.4" reroute 172.0.0.5,172.0.0.6 We do have ECMP support for logical router static routes, but since policies are applied after the routing stage, ECMP support for policies is desired by ovn-kubernetes. A new column 'nexthops' is added to the Logical_Router_Policy table instead of modifying the existing column 'nexthop' to preserve backward compatibility and avoid any upgrade issues. Requested-by: Alexander Constantinescu Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1881826 Acked-by: Mark Michelson Signed-off-by: Numan Siddique --- NEWS | 2 +- northd/ovn-northd.8.xml | 80 +++++++++++++++++++-- northd/ovn-northd.c | 148 ++++++++++++++++++++++++++++++++++---- ovn-nb.ovsschema | 6 +- ovn-nb.xml | 18 ++++- tests/ovn-northd.at | 124 ++++++++++++++++++++++++++++++++ tests/ovn.at | 16 ++--- utilities/ovn-nbctl.8.xml | 12 ++-- utilities/ovn-nbctl.c | 73 +++++++++++++++---- 9 files changed, 430 insertions(+), 49 deletions(-) diff --git a/NEWS b/NEWS index ebf1897ccc..f0ac94b8ed 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,6 @@ Post-v20.12.0 ------------------------- - + - Support ECMP multiple nexthops for reroute router policies. OVN v20.12.0 - xx xxx xxxx -------------------------- diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index d86f36ea63..1f0f71f34f 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -3041,14 +3041,36 @@ outport = P;
  • - If the policy action is reroute, then the logical - flow is added with the following actions: + If the policy action is reroute with 2 or more nexthops + defined, then the logical flow is added with the following actions: +

    + +
    +reg8[0..15] = GID;
    +reg8[16..31] = select(1,..n);
    +        
    + +

    + where GID is the ECMP group id generated by + ovn-northd for this policy and n + is the number of nexthops. select action + selects one of the nexthop member id, stores it in the register + reg8[16..31] and advances the packet to the + next stage. +

    +
  • + +
  • +

    + If the policy action is reroute with just one nexhop, + then the logical flow is added with the following actions:

     [xx]reg0 = H;
     eth.src = E;
     outport = P;
    +reg8[0..15] = 0;
     flags.loopback = 1;
     next;
             
    @@ -3072,7 +3094,51 @@ next;
  • -

    Ingress Table 13: ARP/ND Resolution

    +

    Ingress Table 13: ECMP handling for router policies

    +

    + This table handles the ECMP for the router policies configured + with multiple nexthops. +

    + +
      +
    • +

      + A priority-150 flow is added to advance the packet to the next stage + if the ECMP group id register reg8[0..15] is 0. +

      +
    • + +
    • +

      + For each ECMP reroute router policy with multiple nexthops, + a priority-100 flow is added for each nexthop H + with the match reg8[0..15] == GID && + reg8[16..31] == M where GID + is the router policy group id generated by ovn-northd + and M is the member id of the nexthop H + generated by ovn-northd. The following actions are added + to the flow: +

      + +
      +[xx]reg0 = H;
      +eth.src = E;
      +outport = P
      +"flags.loopback = 1; "
      +"next;"
      +        
      + +

      + where H is the nexthop defined in the + router policy, E is the ethernet address of the + logical router port from which the nexthop is + reachable and P is the logical router port from + which the nexthop is reachable. +

      +
    • +
    + +

    Ingress Table 14: ARP/ND Resolution

    Any packet that reaches this table is an IP packet whose next-hop @@ -3258,7 +3324,7 @@ next; -

    Ingress Table 14: Check packet length

    +

    Ingress Table 15: Check packet length

    For distributed logical routers with distributed gateway port configured @@ -3288,7 +3354,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(L); next; and advances to the next table.

    -

    Ingress Table 15: Handle larger packets

    +

    Ingress Table 16: Handle larger packets

    For distributed logical routers with distributed gateway port configured @@ -3349,7 +3415,7 @@ icmp6 { and advances to the next table.

    -

    Ingress Table 16: Gateway Redirect

    +

    Ingress Table 17: Gateway Redirect

    For distributed logical routers where one of the logical router @@ -3389,7 +3455,7 @@ icmp6 { -

    Ingress Table 17: ARP Request

    +

    Ingress Table 18: ARP Request

    In the common case where the Ethernet destination has been resolved, this diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 478f1a3390..dfd7d69d0e 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -188,11 +188,12 @@ enum ovn_stage { PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 10, "lr_in_ip_routing") \ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 11, "lr_in_ip_routing_ecmp") \ PIPELINE_STAGE(ROUTER, IN, POLICY, 12, "lr_in_policy") \ - PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 13, "lr_in_arp_resolve") \ - PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 14, "lr_in_chk_pkt_len") \ - PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 15,"lr_in_larger_pkts") \ - PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 16, "lr_in_gw_redirect") \ - PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 17, "lr_in_arp_request") \ + PIPELINE_STAGE(ROUTER, IN, POLICY_ECMP, 13, "lr_in_policy_ecmp") \ + PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 14, "lr_in_arp_resolve") \ + PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 15, "lr_in_chk_pkt_len") \ + PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 16, "lr_in_larger_pkts") \ + PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 17, "lr_in_gw_redirect") \ + PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 18, "lr_in_arp_request") \ \ /* Logical router egress stages. */ \ PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \ @@ -7562,33 +7563,39 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, struct ds actions = DS_EMPTY_INITIALIZER; if (!strcmp(rule->action, "reroute")) { + ovs_assert(rule->n_nexthops <= 1); + + char *nexthop = + (rule->n_nexthops == 1 ? rule->nexthops[0] : rule->nexthop); struct ovn_port *out_port = get_outport_for_routing_policy_nexthop( - od, ports, rule->priority, rule->nexthop); + od, ports, rule->priority, nexthop); if (!out_port) { return; } - const char *lrp_addr_s = find_lrp_member_ip(out_port, rule->nexthop); + const char *lrp_addr_s = find_lrp_member_ip(out_port, nexthop); if (!lrp_addr_s) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy " " priority %"PRId64" nexthop %s", - rule->priority, rule->nexthop); + rule->priority, nexthop); return; } uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0); if (pkt_mark) { ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); } - bool is_ipv4 = strchr(rule->nexthop, '.') ? true : false; + + bool is_ipv4 = strchr(nexthop, '.') ? true : false; ds_put_format(&actions, "%s = %s; " "%s = %s; " "eth.src = %s; " "outport = %s; " "flags.loopback = 1; " + REG_ECMP_GROUP_ID" = 0; " "next;", is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6, - rule->nexthop, + nexthop, is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6, lrp_addr_s, out_port->lrp_networks.ea_s, @@ -7601,7 +7608,7 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, if (pkt_mark) { ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); } - ds_put_cstr(&actions, "next;"); + ds_put_cstr(&actions, REG_ECMP_GROUP_ID" = 0; next;"); } ds_put_format(&match, "%s", rule->match); @@ -7611,6 +7618,107 @@ build_routing_policy_flow(struct hmap *lflows, struct ovn_datapath *od, ds_destroy(&actions); } +static void +build_ecmp_routing_policy_flows(struct hmap *lflows, struct ovn_datapath *od, + struct hmap *ports, + const struct nbrec_logical_router_policy *rule, + uint16_t ecmp_group_id) +{ + ovs_assert(rule->n_nexthops > 1); + + bool nexthops_is_ipv4 = true; + + /* Check that all the nexthops belong to the same addr family before + * adding logical flows. */ + for (uint16_t i = 0; i < rule->n_nexthops; i++) { + bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false; + + if (i == 0) { + nexthops_is_ipv4 = is_ipv4; + } + + if (is_ipv4 != nexthops_is_ipv4) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "nexthop [%s] of the router policy with " + "the match [%s] do not belong to the same address " + "family as other next hops", + rule->nexthops[i], rule->match); + return; + } + } + + struct ds match = DS_EMPTY_INITIALIZER; + struct ds actions = DS_EMPTY_INITIALIZER; + + for (uint16_t i = 0; i < rule->n_nexthops; i++) { + struct ovn_port *out_port = get_outport_for_routing_policy_nexthop( + od, ports, rule->priority, rule->nexthops[i]); + if (!out_port) { + goto cleanup; + } + + const char *lrp_addr_s = + find_lrp_member_ip(out_port, rule->nexthops[i]); + if (!lrp_addr_s) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "lrp_addr not found for routing policy " + " priority %"PRId64" nexthop %s", + rule->priority, rule->nexthops[i]); + goto cleanup; + } + + ds_clear(&actions); + uint32_t pkt_mark = ovn_smap_get_uint(&rule->options, "pkt_mark", 0); + if (pkt_mark) { + ds_put_format(&actions, "pkt.mark = %u; ", pkt_mark); + } + + bool is_ipv4 = strchr(rule->nexthops[i], '.') ? true : false; + + ds_put_format(&actions, "%s = %s; " + "%s = %s; " + "eth.src = %s; " + "outport = %s; " + "flags.loopback = 1; " + "next;", + is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6, + rule->nexthops[i], + is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6, + lrp_addr_s, + out_port->lrp_networks.ea_s, + out_port->json_key); + + ds_clear(&match); + ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && " + REG_ECMP_MEMBER_ID" == %"PRIu16, + ecmp_group_id, i + 1); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY_ECMP, + 100, ds_cstr(&match), + ds_cstr(&actions), &rule->header_); + } + + ds_clear(&actions); + ds_put_format(&actions, "%s = %"PRIu16 + "; %s = select(", REG_ECMP_GROUP_ID, ecmp_group_id, + REG_ECMP_MEMBER_ID); + + for (uint16_t i = 0; i < rule->n_nexthops; i++) { + if (i > 0) { + ds_put_cstr(&actions, ", "); + } + + ds_put_format(&actions, "%"PRIu16, i + 1); + } + ds_put_cstr(&actions, ");"); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_POLICY, + rule->priority, rule->match, + ds_cstr(&actions), &rule->header_); + +cleanup: + ds_destroy(&match); + ds_destroy(&actions); +} + struct parsed_route { struct ovs_list list_node; struct in6_addr prefix; @@ -10300,13 +10408,27 @@ build_ingress_policy_flows_for_lrouter( if (od->nbr) { /* This is a catch-all rule. It has the lowest priority (0) * does a match-all("1") and pass-through (next) */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1", + REG_ECMP_GROUP_ID" = 0; next;"); + ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150, + REG_ECMP_GROUP_ID" == 0", "next;"); /* Convert routing policies to flows. */ + uint16_t ecmp_group_id = 1; for (int i = 0; i < od->nbr->n_policies; i++) { const struct nbrec_logical_router_policy *rule = od->nbr->policies[i]; - build_routing_policy_flow(lflows, od, ports, rule, &rule->header_); + bool is_ecmp_reroute = + (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1); + + if (is_ecmp_reroute) { + build_ecmp_routing_policy_flows(lflows, od, ports, rule, + ecmp_group_id); + ecmp_group_id++; + } else { + build_routing_policy_flow(lflows, od, ports, rule, + &rule->header_); + } } } } diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema index af77dd1387..b77a2308cf 100644 --- a/ovn-nb.ovsschema +++ b/ovn-nb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Northbound", - "version": "5.29.0", - "cksum": "328602112 27064", + "version": "5.30.0", + "cksum": "3273824429 27172", "tables": { "NB_Global": { "columns": { @@ -391,6 +391,8 @@ "key": {"type": "string", "enum": ["set", ["allow", "drop", "reroute"]]}}}, "nexthop": {"type": {"key": "string", "min": 0, "max": 1}}, + "nexthops": {"type": { + "key": "string", "min": 0, "max": "unlimited"}}, "options": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, diff --git a/ovn-nb.xml b/ovn-nb.xml index e7a8d6833f..0cf043790e 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -2723,18 +2723,34 @@

  • - reroute: Reroute packet to . + reroute: Reroute packet to or + .
  • +

    + Note: This column is deprecated in favor of . +

    Next-hop IP address for this route, which should be the IP address of a connected router port or the IP address of a logical port.

    + +

    + Next-hop ECMP IP addresses for this route. Each IP in the list should + be the IP address of a connected router port or the IP address of a + logical port. +

    + +

    + One IP from the list is selected as next hop. +

    +
    +

    Marks the packet with the value specified when the router policy diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 50a4cae76a..ce6c44db40 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -2198,3 +2198,127 @@ dnl Number of common flows should be the same. check_row_count Logical_Flow ${n_flows_common} logical_dp_group=${dp_group_uuid} AT_CLEANUP + +AT_SETUP([ovn -- Router policies - ECMP reroute]) +AT_KEYWORDS([router policies ecmp reroute]) +ovn_start + +check ovn-nbctl ls-add sw0 +check ovn-nbctl lsp-add sw0 sw0-port1 +check ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3" + +check ovn-nbctl ls-add sw1 +check ovn-nbctl lsp-add sw1 sw1-port1 +check ovn-nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3" + +# Create a logical router and attach both logical switches +check ovn-nbctl lr-add lr0 +check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64 +check ovn-nbctl lsp-add sw0 sw0-lr0 +check ovn-nbctl lsp-set-type sw0-lr0 router +check ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 +check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 + +check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64 +check ovn-nbctl lsp-add sw1 sw1-lr0 +check ovn-nbctl lsp-set-type sw1-lr0 router +check ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 +check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr-sw1 + +check ovn-nbctl ls-add public +check ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.168.0.100/24 +check ovn-nbctl lsp-add public public-lr0 +check ovn-nbctl lsp-set-type public-lr0 router +check ovn-nbctl lsp-set-addresses public-lr0 router +check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public + +check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.3" reroute 172.168.0.101,172.168.0.102 + +ovn-sbctl dump-flows lr0 > lr0flows3 +AT_CAPTURE_FILE([lr0flows3]) + +AT_CHECK([grep "lr_in_policy" lr0flows3 | sort], [0], [dnl + table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = 0; next;) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == 0), action=(next;) +]) + +check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.4" reroute 172.168.0.101,172.168.0.102,172.168.0.103 +ovn-sbctl dump-flows lr0 > lr0flows3 +AT_CAPTURE_FILE([lr0flows3]) + +AT_CHECK([grep "lr_in_policy" lr0flows3 | \ +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl + table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2);) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2, 3);) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) +]) + +check ovn-nbctl --wait=sb lr-policy-add lr0 10 "ip4.src == 10.0.0.5" reroute 172.168.0.110 +ovn-sbctl dump-flows lr0 > lr0flows3 +AT_CAPTURE_FILE([lr0flows3]) + +AT_CHECK([grep "lr_in_policy" lr0flows3 | \ +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl + table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.3), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2);) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2, 3);) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = ; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) +]) + +check ovn-nbctl --wait=sb lr-policy-del lr0 10 "ip4.src == 10.0.0.3" +ovn-sbctl dump-flows lr0 > lr0flows3 +AT_CAPTURE_FILE([lr0flows3]) + +AT_CHECK([grep "lr_in_policy" lr0flows3 | \ +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl + table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.4), action=(reg8[[0..15]] = ; reg8[[16..31]] = select(1, 2, 3);) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = ; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 1), action=(reg0 = 172.168.0.101; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 2), action=(reg0 = 172.168.0.102; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=100 , match=(reg8[[0..15]] == && reg8[[16..31]] == 3), action=(reg0 = 172.168.0.103; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) +]) + +check ovn-nbctl --wait=sb lr-policy-del lr0 10 "ip4.src == 10.0.0.4" +ovn-sbctl dump-flows lr0 > lr0flows3 +AT_CAPTURE_FILE([lr0flows3]) + +AT_CHECK([grep "lr_in_policy" lr0flows3 | \ +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl + table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) + table=12(lr_in_policy ), priority=10 , match=(ip4.src == 10.0.0.5), action=(reg0 = 172.168.0.110; reg1 = 172.168.0.100; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; reg8[[0..15]] = ; next;) + table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) +]) + +check ovn-nbctl --wait=sb add logical_router_policy . nexthops "2000\:\:b" +ovn-sbctl dump-flows lr0 > lr0flows3 +AT_CAPTURE_FILE([lr0flows3]) + +AT_CHECK([grep "lr_in_policy" lr0flows3 | \ +sed 's/reg8\[[0..15\]] = [[0-9]]*/reg8\[[0..15\]] = /' | \ +sed 's/reg8\[[0..15\]] == [[0-9]]*/reg8\[[0..15\]] == /' | sort], [0], [dnl + table=12(lr_in_policy ), priority=0 , match=(1), action=(reg8[[0..15]] = ; next;) + table=13(lr_in_policy_ecmp ), priority=150 , match=(reg8[[0..15]] == ), action=(next;) +]) + +AT_CLEANUP diff --git a/tests/ovn.at b/tests/ovn.at index 310eff9caf..80bc099c7b 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -21156,31 +21156,31 @@ AT_CHECK([ AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl table=12(lr_in_policy ), priority=1001 , dnl -match=(ip6), action=(pkt.mark = 4294967295; next;) +match=(ip6), action=(pkt.mark = 4294967295; reg8[[0..15]] = 0; next;) ]) ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=-1 AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl table=12(lr_in_policy ), priority=1001 , dnl -match=(ip6), action=(next;) +match=(ip6), action=(reg8[[0..15]] = 0; next;) ]) ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=2147483648 AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl table=12(lr_in_policy ), priority=1001 , dnl -match=(ip6), action=(pkt.mark = 2147483648; next;) +match=(ip6), action=(pkt.mark = 2147483648; reg8[[0..15]] = 0; next;) ]) ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=foo AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl table=12(lr_in_policy ), priority=1001 , dnl -match=(ip6), action=(next;) +match=(ip6), action=(reg8[[0..15]] = 0; next;) ]) ovn-nbctl --wait=hv set logical_router_policy $pol5 options:pkt_mark=4294967296 AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_policy.*priority=1001" | sort], [0], [dnl table=12(lr_in_policy ), priority=1001 , dnl -match=(ip6), action=(next;) +match=(ip6), action=(reg8[[0..15]] = 0; next;) ]) OVN_CLEANUP([hv1]) @@ -21759,7 +21759,7 @@ AT_CHECK([ grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c) ]) AT_CHECK([ - test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=21 | \ + test 1 -eq $(as hv1 ovs-ofctl dump-flows br-int table=22 | \ grep "priority=200" | \ grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c) ]) @@ -21770,7 +21770,7 @@ AT_CHECK([ grep "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))" -c) ]) AT_CHECK([ - test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=21 | \ + test 0 -eq $(as hv2 ovs-ofctl dump-flows br-int table=22 | \ grep "priority=200" | \ grep "actions=move:NXM_NX_CT_LABEL\\[[32..79\\]]->NXM_OF_ETH_DST\\[[\\]]" -c) ]) @@ -22136,7 +22136,7 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep "actions=controller" | grep ]) # The packet should've been dropped in the lr_in_arp_resolve stage. -AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep -E "table=21, n_packets=1,.* priority=1,ip,metadata=0x${sw_key},nw_dst=10.0.1.1 actions=drop" -c], [0], [dnl +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int | grep -E "table=22, n_packets=1,.* priority=1,ip,metadata=0x${sw_key},nw_dst=10.0.1.1 actions=drop" -c], [0], [dnl 1 ]) diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml index e5a35f3072..e6fec99802 100644 --- a/utilities/ovn-nbctl.8.xml +++ b/utilities/ovn-nbctl.8.xml @@ -739,7 +739,7 @@

    [--may-exist]lr-policy-add router priority match - action [nexthop] + action [nexthop[,nexthop,...]] [options key=value]]

    @@ -748,10 +748,12 @@ are similar to OVN ACLs, but exist on the logical-router. Reroute policies are needed for service-insertion and service-chaining. nexthop is an optional parameter. It needs to be provided - only when action is reroute. A policy is - uniquely identified by priority and match. - Multiple policies can have the same priority. - options sets the router policy options as key-value pair. + only when action is reroute. Multiple + nexthops can be specified for ECMP routing. + A policy is uniquely identified by priority and + match. Multiple policies can have the same + priority. options sets the router policy + options as key-value pair. The supported option is : pkt_mark.

    diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c index 835161f255..94e7eedeb5 100644 --- a/utilities/ovn-nbctl.c +++ b/utilities/ovn-nbctl.c @@ -766,7 +766,7 @@ Route commands:\n\ lr-route-list ROUTER print routes for ROUTER\n\ \n\ Policy commands:\n\ - lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP] \ + lr-policy-add ROUTER PRIORITY MATCH ACTION [NEXTHOP,[NEXTHOP,...]] \ [OPTIONS KEY=VALUE ...] \n\ add a policy to router\n\ lr-policy-del ROUTER [{PRIORITY | UUID} [MATCH]]\n\ @@ -3634,7 +3634,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) return; } const char *action = ctx->argv[4]; - char *next_hop = NULL; + size_t n_nexthops = 0; + char **nexthops = NULL; bool reroute = false; /* Validate action. */ @@ -3654,7 +3655,8 @@ nbctl_lr_policy_add(struct ctl_context *ctx) /* Check if same routing policy already exists. * A policy is uniquely identified by priority and match */ bool may_exist = !!shash_find(&ctx->options, "--may-exist"); - for (int i = 0; i < lr->n_policies; i++) { + size_t i; + for (i = 0; i < lr->n_policies; i++) { const struct nbrec_logical_router_policy *policy = lr->policies[i]; if (policy->priority == priority && !strcmp(policy->match, ctx->argv[3])) { @@ -3665,12 +3667,53 @@ nbctl_lr_policy_add(struct ctl_context *ctx) return; } } + if (reroute) { - next_hop = normalize_prefix_str(ctx->argv[5]); - if (!next_hop) { - ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]); - return; + char *nexthops_arg = xstrdup(ctx->argv[5]); + char *save_ptr, *next_hop, *token; + + n_nexthops = 0; + size_t n_allocs = 0; + + bool nexthops_is_ipv4 = true; + for (token = strtok_r(nexthops_arg, ",", &save_ptr); + token != NULL; token = strtok_r(NULL, ",", &save_ptr)) { + next_hop = normalize_addr_str(token); + + if (!next_hop) { + ctl_error(ctx, "bad next hop argument: %s", ctx->argv[5]); + free(nexthops_arg); + for (i = 0; i < n_nexthops; i++) { + free(nexthops[i]); + } + free(nexthops); + return; + } + if (n_nexthops == n_allocs) { + nexthops = x2nrealloc(nexthops, &n_allocs, sizeof *nexthops); + } + + bool is_ipv4 = strchr(next_hop, '.') ? true : false; + if (n_nexthops == 0) { + nexthops_is_ipv4 = is_ipv4; + } + + if (is_ipv4 != nexthops_is_ipv4) { + ctl_error(ctx, "bad next hops argument, not in the same " + "addr family : %s", ctx->argv[5]); + free(nexthops_arg); + free(next_hop); + for (i = 0; i < n_nexthops; i++) { + free(nexthops[i]); + } + free(nexthops); + return; + } + nexthops[n_nexthops] = next_hop; + n_nexthops++; } + + free(nexthops_arg); } struct nbrec_logical_router_policy *policy; @@ -3679,12 +3722,13 @@ nbctl_lr_policy_add(struct ctl_context *ctx) nbrec_logical_router_policy_set_match(policy, ctx->argv[3]); nbrec_logical_router_policy_set_action(policy, action); if (reroute) { - nbrec_logical_router_policy_set_nexthop(policy, next_hop); + nbrec_logical_router_policy_set_nexthops( + policy, (const char **)nexthops, n_nexthops); } /* Parse the options. */ struct smap options = SMAP_INITIALIZER(&options); - for (size_t i = reroute ? 6 : 5; i < ctx->argc; i++) { + for (i = reroute ? 6 : 5; i < ctx->argc; i++) { char *key, *value; value = xstrdup(ctx->argv[i]); key = strsep(&value, "="); @@ -3694,7 +3738,10 @@ nbctl_lr_policy_add(struct ctl_context *ctx) ctl_error(ctx, "No value specified for the option : %s", key); smap_destroy(&options); free(key); - free(next_hop); + for (i = 0; i < n_nexthops; i++) { + free(nexthops[i]); + } + free(nexthops); return; } free(key); @@ -3703,9 +3750,11 @@ nbctl_lr_policy_add(struct ctl_context *ctx) smap_destroy(&options); nbrec_logical_router_update_policies_addvalue(lr, policy); - if (next_hop != NULL) { - free(next_hop); + + for (i = 0; i < n_nexthops; i++) { + free(nexthops[i]); } + free(nexthops); } static void