Skip to content

Commit

Permalink
northd: Remove the protocol match from ECMP symmetric reply flows.
Browse files Browse the repository at this point in the history
There are 3 flows for matching ECMP symmetric reply that are different
in a single match part that is the protocol (udp/tcp/sctp) for ct.new
traffic and additional 3 flows (udp/tcp/sctp) for ct.est && !ct.rpl.

Remove the protocol requirement from those flows and merge the
remaining two onto single flow. This also reduces the logical flows
per ECMP reply from 6 to 1.

Fixes: 506f7d4 ("northd: rely on new actions for ecmp-symmetric routing")
Reported-at: https://issues.redhat.com/browse/FDP-358
Suggested-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
(cherry picked from commit 62d71aa)
  • Loading branch information
almusil authored and dceara committed Feb 8, 2024
1 parent f2e8130 commit 152e9d9
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 88 deletions.
79 changes: 3 additions & 76 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -10518,7 +10518,6 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows,
struct lflow_ref *lflow_ref)
{
const struct nbrec_logical_router_static_route *st_route = route->route;
struct ds base_match = DS_EMPTY_INITIALIZER;
struct ds match = DS_EMPTY_INITIALIZER;
struct ds actions = DS_EMPTY_INITIALIZER;
struct ds ecmp_reply = DS_EMPTY_INITIALIZER;
Expand All @@ -10530,14 +10529,14 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows,
/* If symmetric ECMP replies are enabled, then packets that arrive over
* an ECMP route need to go through conntrack.
*/
ds_put_format(&base_match, "inport == %s && ip%s.%s == %s",
ds_put_format(&match, "inport == %s && ip%s.%s == %s",
out_port->json_key,
IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "4" : "6",
route->is_src_route ? "dst" : "src",
cidr);
free(cidr);
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100,
ds_cstr(&base_match), "ct_next;",
ds_cstr(&match), "ct_next;",
&st_route->header_, lflow_ref);

/* And packets that go out over an ECMP route need conntrack */
Expand All @@ -10551,78 +10550,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows,
* NOTE: we purposely are not clearing match before this
* ds_put_cstr() call. The previous contents are needed.
*/
ds_put_format(&match, "%s && (ct.new && !ct.est) && tcp",
ds_cstr(&base_match));
ds_put_format(&actions,
"ct_commit { ct_label.ecmp_reply_eth = eth.src; "
" %s = %" PRId64 ";}; "
"next;",
ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
ds_cstr(&match), ds_cstr(&actions),
&st_route->header_,
lflow_ref);
ds_clear(&match);
ds_put_format(&match, "%s && (ct.new && !ct.est) && udp",
ds_cstr(&base_match));
ds_clear(&actions);
ds_put_format(&actions,
"ct_commit { ct_label.ecmp_reply_eth = eth.src; "
" %s = %" PRId64 ";}; "
"next;",
ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
ds_cstr(&match), ds_cstr(&actions),
&st_route->header_,
lflow_ref);
ds_clear(&match);
ds_put_format(&match, "%s && (ct.new && !ct.est) && sctp",
ds_cstr(&base_match));
ds_clear(&actions);
ds_put_format(&actions,
"ct_commit { ct_label.ecmp_reply_eth = eth.src; "
" %s = %" PRId64 ";}; "
"next;",
ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
ds_cstr(&match), ds_cstr(&actions),
&st_route->header_,
lflow_ref);

ds_clear(&match);
ds_put_format(&match,
"%s && (!ct.rpl && ct.est) && tcp",
ds_cstr(&base_match));
ds_clear(&actions);
ds_put_format(&actions,
"ct_commit { ct_label.ecmp_reply_eth = eth.src; "
" %s = %" PRId64 ";}; "
"next;",
ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
ds_cstr(&match), ds_cstr(&actions),
&st_route->header_,
lflow_ref);

ds_clear(&match);
ds_put_format(&match,
"%s && (!ct.rpl && ct.est) && udp",
ds_cstr(&base_match));
ds_clear(&actions);
ds_put_format(&actions,
"ct_commit { ct_label.ecmp_reply_eth = eth.src; "
" %s = %" PRId64 ";}; "
"next;",
ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
ds_cstr(&match), ds_cstr(&actions),
&st_route->header_,
lflow_ref);
ds_clear(&match);
ds_put_format(&match,
"%s && (!ct.rpl && ct.est) && sctp",
ds_cstr(&base_match));
ds_clear(&actions);
ds_put_cstr(&match, " && !ct.rpl && (ct.new || ct.est)");
ds_put_format(&actions,
"ct_commit { ct_label.ecmp_reply_eth = eth.src; "
" %s = %" PRId64 ";}; "
Expand Down Expand Up @@ -10676,7 +10604,6 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows,
action, &st_route->header_,
lflow_ref);

ds_destroy(&base_match);
ds_destroy(&match);
ds_destroy(&actions);
ds_destroy(&ecmp_reply);
Expand Down
4 changes: 2 additions & 2 deletions tests/ovn.at
Original file line number Diff line number Diff line change
Expand Up @@ -29008,7 +29008,7 @@ AT_CHECK([
grep "priority=200" | \
grep -c "move:NXM_NX_CT_LABEL\\[[\\]]->NXM_NX_XXREG1\\[[\\]],move:NXM_NX_XXREG1\\[[32..79\\]]->NXM_OF_ETH_DST"
done; :], [0], [dnl
6
2
1
0
0
Expand Down Expand Up @@ -29133,7 +29133,7 @@ AT_CHECK([
grep "priority=200" | \
grep -c "move:NXM_NX_CT_LABEL\\[[\\]]->NXM_NX_XXREG1\\[[\\]],move:NXM_NX_XXREG1\\[[32..79\\]]->NXM_OF_ETH_DST"
done; :], [0], [dnl
6
2
1
0
0
Expand Down
41 changes: 31 additions & 10 deletions tests/system-ovn.at
Original file line number Diff line number Diff line change
Expand Up @@ -6111,24 +6111,30 @@ on_exit 'ovs-ofctl dump-flows br-int'

NETNS_DAEMONIZE([alice1], [nc -l -k 80], [alice1.pid])
NS_CHECK_EXEC([bob1], [nc -z 10.0.0.2 80], [0])
NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])

# Ensure conntrack entry is present. We should not try to predict
# the tunnel key for the output port, so we strip it from the labels
# and just ensure that the known ethernet address is present.
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.1) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000
tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>)
])

# Ensure datapaths show conntrack states as expected
# Like with conntrack entries, we shouldn't try to predict
# port binding tunnel keys. So omit them from expected labels.
ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)'
AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], [dnl
1
2
])
AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl
1
2
])

# Flush conntrack entries for easier output parsing of next test.
Expand All @@ -6142,16 +6148,21 @@ ovn-nbctl set Logical_Switch_Port r2-ext \
ovn-nbctl --wait=hv sync

NS_CHECK_EXEC([bob1], [nc -z 10.0.0.2 80], [0])
NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl
1
2
])
AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl
1
2
])

AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(172.16.0.1) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000
tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>)
])
# Check entries in table 76 and 77 expires w/o traffic
Expand Down Expand Up @@ -6303,24 +6314,29 @@ on_exit 'ovs-ofctl dump-flows br-int'

NETNS_DAEMONIZE([alice1], [nc -6 -l -k 80], [alice1.pid])
NS_CHECK_EXEC([bob1], [nc -6 -z fd01::2 80], [0])
NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])

# Ensure datapaths show conntrack states as expected
# Like with conntrack entries, we shouldn't try to predict
# port binding tunnel keys. So omit them from expected labels.
AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], [dnl
1
2
])

AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl
1
2
])

# Ensure conntrack entry is present. We should not try to predict
# the tunnel key for the output port, so we strip it from the labels
# and just ensure that the known ethernet address is present.
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd01::2) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000
tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>)
])

Expand All @@ -6333,17 +6349,22 @@ ovn-nbctl --wait=hv set Logical_Switch_Port r2-ext \
type=router options:router-port=R2_ext addresses='"00:00:10:01:02:04"'

NS_CHECK_EXEC([bob1], [nc -6 -z fd01::2 80], [0])
NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])

AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl
1
2
])
AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl
1
2
])

AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(fd01::2) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000
tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>)
])

Expand Down

0 comments on commit 152e9d9

Please sign in to comment.