Skip to content

Commit

Permalink
northd: make connected routes have higher priority than static
Browse files Browse the repository at this point in the history
With this patch routes to connected networks have higher
priority than static routes with same ip_prefix.

This brings commonly-used behaviour for routes lookup order:
1: longest prefix match
2: metric

The metric has next lookup order:
1: connected routes
2: static routes

Earlier static and connected routes with same ip_prefix had
the same priority, so it was impossible to predict which one
is used for routing decision.

Each route's prefix length has its own 'slot' in lflow prios.
Now prefix length space is calculated using next information:
to calculate route's priority prefixlen multiplied by 3
+ route origin offset (0 - source-based route; 1 - static route;
2 - directly-connected route2).

Also, enlarge prio for generic records in lr_in_ip_routing stage
by 10000.

Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
  • Loading branch information
odivlad authored and numansiddique committed Nov 22, 2021
1 parent 9ce8151 commit b0d9f46
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 31 deletions.
50 changes: 35 additions & 15 deletions northd/northd.c
Expand Up @@ -309,6 +309,15 @@ enum ovn_stage {
*
*/

/*
* Route offsets implement logic to prioritize traffic for routes with
* same ip_prefix values:
* - connected route overrides static one;
* - static route overrides connected route. */
#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
#define ROUTE_PRIO_OFFSET_STATIC 1
#define ROUTE_PRIO_OFFSET_CONNECTED 2

/* Returns an "enum ovn_stage" built from the arguments. */
static enum ovn_stage
ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
Expand Down Expand Up @@ -8830,6 +8839,7 @@ struct ecmp_groups_node {
struct in6_addr prefix;
unsigned int plen;
bool is_src_route;
const char *origin;
uint16_t route_count;
struct ovs_list route_list; /* Contains ecmp_route_list_node */
};
Expand Down Expand Up @@ -8867,6 +8877,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
eg->prefix = route->prefix;
eg->plen = route->plen;
eg->is_src_route = route->is_src_route;
eg->origin = smap_get_def(&route->route->options, "origin", "");
ovs_list_init(&eg->route_list);
ecmp_groups_add_route(eg, route);

Expand Down Expand Up @@ -8967,19 +8978,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
static void
build_route_match(const struct ovn_port *op_inport, const char *network_s,
int plen, bool is_src_route, bool is_ipv4, struct ds *match,
uint16_t *priority)
uint16_t *priority, int ofs)
{
const char *dir;
/* The priority here is calculated to implement longest-prefix-match
* routing. */
if (is_src_route) {
dir = "src";
*priority = plen * 2;
ofs = 0;
} else {
dir = "dst";
*priority = (plen * 2) + 1;
}

*priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;

if (op_inport) {
ds_put_format(match, "inport == %s && ", op_inport->json_key);
}
Expand Down Expand Up @@ -9121,7 +9133,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
out_port->lrp_networks.ea_s,
IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
port_ip, out_port->json_key);
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
ds_cstr(&match), ds_cstr(&actions),
&st_route->header_);

Expand Down Expand Up @@ -9151,8 +9163,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
struct ds route_match = DS_EMPTY_INITIALIZER;

char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
&route_match, &priority);
&route_match, &priority, ofs);
free(prefix_s);

struct ds actions = DS_EMPTY_INITIALIZER;
Expand Down Expand Up @@ -9228,7 +9242,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
const struct ovn_port *op, const char *lrp_addr_s,
const char *network_s, int plen, const char *gateway,
bool is_src_route, const struct ovsdb_idl_row *stage_hint,
bool is_discard_route)
bool is_discard_route, int ofs)
{
bool is_ipv4 = strchr(network_s, '.') ? true : false;
struct ds match = DS_EMPTY_INITIALIZER;
Expand All @@ -9244,7 +9258,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
}
}
build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
&match, &priority);
&match, &priority, ofs);

struct ds common_actions = DS_EMPTY_INITIALIZER;
struct ds actions = DS_EMPTY_INITIALIZER;
Expand Down Expand Up @@ -9304,10 +9318,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
}
}

int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
: ROUTE_PRIO_OFFSET_STATIC;

char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
lrp_addr_s, prefix_s, route_->plen, route->nexthop,
route_->is_src_route, &route->header_, route_->is_discard_route);
route_->is_src_route, &route->header_, route_->is_discard_route,
ofs);

free(prefix_s);
}
Expand Down Expand Up @@ -10721,14 +10740,14 @@ build_ip_routing_flows_for_lrouter_port(
add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
op->lrp_networks.ipv4_addrs[i].network_s,
op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
&op->nbrp->header_, false);
&op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
}

for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
op->lrp_networks.ipv6_addrs[i].network_s,
op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
&op->nbrp->header_, false);
&op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
}
} else if (lsp_is_router(op->nbsp)) {
struct ovn_port *peer = ovn_port_get_peer(ports, op);
Expand All @@ -10751,7 +10770,8 @@ build_ip_routing_flows_for_lrouter_port(
peer->lrp_networks.ipv4_addrs[0].addr_s,
laddrs->ipv4_addrs[k].network_s,
laddrs->ipv4_addrs[k].plen, NULL, false,
&peer->nbrp->header_, false);
&peer->nbrp->header_, false,
ROUTE_PRIO_OFFSET_CONNECTED);
}
}
}
Expand Down Expand Up @@ -10822,7 +10842,7 @@ build_mcast_lookup_flows_for_lrouter(
/* Drop IPv6 multicast traffic that shouldn't be forwarded,
* i.e., router solicitation and router advertisement.
*/
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
"nd_rs || nd_ra", "drop;");
if (!od->mcast_info.rtr.relay) {
return;
Expand Down Expand Up @@ -10850,23 +10870,23 @@ build_mcast_lookup_flows_for_lrouter(
}
ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
igmp_group->mcgroup.name);
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
ds_cstr(match), ds_cstr(actions));
}

/* If needed, flood unregistered multicast on statically configured
* ports. Otherwise drop any multicast traffic.
*/
if (od->mcast_info.rtr.flood_static) {
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
"ip4.mcast || ip6.mcast",
"clone { "
"outport = \""MC_STATIC"\"; "
"ip.ttl--; "
"next; "
"};");
} else {
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
"ip4.mcast || ip6.mcast", "drop;");
}
}
Expand Down
26 changes: 14 additions & 12 deletions northd/ovn-northd.8.xml
Expand Up @@ -3355,9 +3355,9 @@ output;
table. This table, instead, is responsible for determine the ECMP
group id and select a member id within the group based on 5-tuple
hashing. It stores group id in <code>reg8[0..15]</code> and member id in
<code>reg8[16..31]</code>. This step is skipped with a priority-300 rule
if the traffic going out the ECMP route is reply traffic, and the ECMP
route was configured to use symmetric replies. Instead, the stored
<code>reg8[16..31]</code>. This step is skipped with a priority-10300
rule if the traffic going out the ECMP route is reply traffic, and the
ECMP route was configured to use symmetric replies. Instead, the stored
<code>ct_label</code> value is used to choose the destination. The least
significant 48 bits of the <code>ct_label</code> tell the destination MAC
address to which the packet should be sent. The next 16 bits tell the
Expand All @@ -3373,14 +3373,14 @@ output;
<ul>
<li>
<p>
Priority-550 flow that drops IPv6 Router Solicitation/Advertisement
Priority-10550 flow that drops IPv6 Router Solicitation/Advertisement
packets that were not processed in previous tables.
</p>
</li>

<li>
<p>
Priority-500 flows that match IP multicast traffic destined to
Priority-10500 flows that match IP multicast traffic destined to
groups registered on any of the attached switches and sets
<code>outport</code> to the associated multicast group that will
eventually flood the traffic to all interested attached logical
Expand All @@ -3390,7 +3390,7 @@ output;

<li>
<p>
Priority-450 flow that matches unregistered IP multicast traffic
Priority-10450 flow that matches unregistered IP multicast traffic
and sets <code>outport</code> to the <code>MC_STATIC</code>
multicast group, which <code>ovn-northd</code> populates with the
logical ports that have
Expand All @@ -3404,10 +3404,11 @@ output;
<p>
IPv4 routing table. For each route to IPv4 network <var>N</var> with
netmask <var>M</var>, on router port <var>P</var> with IP address
<var>A</var> and Ethernet
address <var>E</var>, a logical flow with match <code>ip4.dst ==
<var>N</var>/<var>M</var></code>, whose priority is the number of
1-bits in <var>M</var>, has the following actions:
<var>A</var> and Ethernet address <var>E</var>, a logical flow with
match <code>ip4.dst == <var>N</var>/<var>M</var></code>, whose
priority is the number of 1-bits in <var>M</var> multiplied by 3 +
route offset: 0 for src-ip route policy, 1 for dst-ip static routes.
This flow has the following actions:
</p>

<pre>
Expand Down Expand Up @@ -3440,8 +3441,9 @@ next;
<var>P</var> with IP address <var>A</var> and Ethernet address
<var>E</var>, a logical flow with match in CIDR notation
<code>ip6.dst == <var>N</var>/<var>M</var></code>,
whose priority is the integer value of <var>M</var>, has the
following actions:
whose priority is the integer value of <var>M</var> multiplied by 3 +
route offset: 0 for src-ip route policy, 1 for dst-ip static routes.
This flow has the following actions:
</p>

<pre>
Expand Down
8 changes: 4 additions & 4 deletions tests/ovn-northd.at
Expand Up @@ -5479,7 +5479,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16

ovn-sbctl dump-flows lr0 > lr0flows
AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing ), priority=65 , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
table=??(lr_in_ip_routing ), priority=97 , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
])
AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing_ecmp), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
Expand All @@ -5492,7 +5492,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16

ovn-sbctl dump-flows lr0 > lr0flows
AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing ), priority=65 , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
table=??(lr_in_ip_routing ), priority=97 , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
])
AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing_ecmp), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
Expand All @@ -5507,14 +5507,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
ovn-sbctl dump-flows lr0 > lr0flows

AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing ), priority=49 , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
table=??(lr_in_ip_routing ), priority=73 , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
])

check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public

ovn-sbctl dump-flows lr0 > lr0flows
AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing ), priority=49 , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
table=??(lr_in_ip_routing ), priority=73 , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
])

AT_CLEANUP
Expand Down

0 comments on commit b0d9f46

Please sign in to comment.