Skip to content

Commit

Permalink
northd: prevents sending packet to conntrack for router ports
Browse files Browse the repository at this point in the history
As commented in northd.c, we should not use ct() for router ports.
When there are no stateful_acl, this patch prevents sending packet to conntrack
for router ports.
The patch does this by issuing ct_clear in ls_out_pre_lb stage so that hints
are not set in ls_out_acl_hint and ls_out_acl stages.

Note that ct_clear is not added for ingress for router ports as already done
for patch ports (no change by this patch on this aspect).

Also, this patch does not change the behavior for ACLs such as allow-related:
packets are still sent to conntrack, even for router ports. While this does
not work if router ports are distributed, allow-related ACLs work today on
router ports when those ports are handled on the same chassis for ingress and
egress traffic. This patch does not change that behavior.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2062431
Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
Acked-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
  • Loading branch information
simonartxavier authored and dceara committed Mar 8, 2023
1 parent 07da3e3 commit d17ece7
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 15 deletions.
24 changes: 14 additions & 10 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -5779,20 +5779,24 @@ skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op,
* know about the connection, as the icmp request went through the logical
* router on hostA, not hostB. This would only work with distributed
* conntrack state across all chassis. */
struct ds match_in = DS_EMPTY_INITIALIZER;
struct ds match_out = DS_EMPTY_INITIALIZER;

ds_put_format(&match_in, "ip && inport == %s", op->json_key);
ds_put_format(&match_out, "ip && outport == %s", op->json_key);
const char *ingress_action = "next;";
const char *egress_action = od->has_stateful_acl
? "next;"
: "ct_clear; next;";

char *ingress_match = xasprintf("ip && inport == %s", op->json_key);
char *egress_match = xasprintf("ip && outport == %s", op->json_key);

ovn_lflow_add_with_lport_and_hint(lflows, od, in_stage, priority,
ds_cstr(&match_in), "next;", op->key,
&op->nbsp->header_);
ingress_match, ingress_action,
op->key, &op->nbsp->header_);
ovn_lflow_add_with_lport_and_hint(lflows, od, out_stage, priority,
ds_cstr(&match_out), "next;", op->key,
&op->nbsp->header_);
egress_match, egress_action,
op->key, &op->nbsp->header_);

ds_destroy(&match_in);
ds_destroy(&match_out);
free(ingress_match);
free(egress_match);
}

static void
Expand Down
10 changes: 10 additions & 0 deletions northd/ovn-northd.8.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2056,6 +2056,16 @@ output;
db="OVN_Northbound"/> table.
</p>

<p>
This table also has a priority-110 flow with the match
<code>outport == <var>I</var></code> for all logical switch
datapaths to move traffic to the next table, and, if there are no
stateful_acl, clear the ct_state. Where <var>I</var>
is the peer of a logical router port. This flow is added to
skip the connection tracking of packets which will be entering
logical router datapath from logical switch datapath for routing.
</p>

<h3>Egress Table 2: Pre-stateful</h3>

<p>
Expand Down
11 changes: 6 additions & 5 deletions tests/ovn-northd.at
Original file line number Diff line number Diff line change
Expand Up @@ -4111,6 +4111,7 @@ check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
check ovn-nbctl --wait=sb sync

check_stateful_flows() {
action=$1
ovn-sbctl dump-flows sw0 > sw0flows
AT_CAPTURE_FILE([sw0flows])

Expand Down Expand Up @@ -4144,12 +4145,12 @@ check_stateful_flows() {
table=??(ls_in_stateful ), priority=100 , match=(reg0[[1]] == 1 && reg0[[13]] == 1), action=(ct_commit { ct_mark.blocked = 0; ct_label.label = reg3; }; next;)
])

AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl
AT_CHECK_UNQUOTED([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl
table=1 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;)
table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;)
table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;)
table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;)
table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;)
table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == \$svc_monitor_mac), action=(next;)
table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=($action)
table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;)
table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;)
])
Expand All @@ -4169,13 +4170,13 @@ check_stateful_flows() {
])
}

check_stateful_flows
check_stateful_flows "ct_clear; next;"

# Add few ACLs
check ovn-nbctl --wait=sb acl-add sw0 from-lport 1002 "ip4 && tcp && tcp.dst == 80" allow-related
check ovn-nbctl --wait=sb acl-add sw0 to-lport 1002 "ip4 && tcp && tcp.src == 80" drop

check_stateful_flows
check_stateful_flows "next;"

# Remove load balancers from sw0
check ovn-nbctl ls-lb-del sw0 lb0
Expand Down
165 changes: 165 additions & 0 deletions tests/system-ovn.at
Original file line number Diff line number Diff line change
Expand Up @@ -10851,3 +10851,168 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
/connection dropped.*/d"])
AT_CLEANUP
])

OVN_FOR_EACH_NORTHD([
AT_SETUP([ACL and committing to conntrack])
AT_KEYWORDS([acl])

CHECK_CONNTRACK()
CHECK_CONNTRACK_NAT()
ovn_start
OVS_TRAFFIC_VSWITCHD_START()
ADD_BR([br-int])
# Set external-ids in br-int needed for ovn-controller
ovs-vsctl \
-- set Open_vSwitch . external-ids:system-id=hv1 \
-- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
-- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
-- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
-- set bridge br-int fail-mode=secure other-config:disable-in-band=true

start_daemon ovn-controller

check ovn-nbctl lr-add r1
check ovn-nbctl lrp-add r1 r1_s1 00:de:ad:fe:00:01 173.0.1.1/24
check ovn-nbctl lrp-add r1 r1_s2 00:de:ad:fe:00:02 173.0.2.1/24

check ovn-nbctl ls-add s1
check ovn-nbctl lsp-add s1 s1_r1
check ovn-nbctl lsp-set-type s1_r1 router
check ovn-nbctl lsp-set-addresses s1_r1 router
check ovn-nbctl lsp-set-options s1_r1 router-port=r1_s1

check ovn-nbctl ls-add s2
check ovn-nbctl lsp-add s2 s2_r1
check ovn-nbctl lsp-set-type s2_r1 router
check ovn-nbctl lsp-set-addresses s2_r1 router
check ovn-nbctl lsp-set-options s2_r1 router-port=r1_s2

check ovn-nbctl lsp-add s1 vm1
check ovn-nbctl lsp-set-addresses vm1 "00:de:ad:01:00:01 173.0.1.2"

check ovn-nbctl lsp-add s2 vm2
check ovn-nbctl lsp-set-addresses vm2 "00:de:ad:01:00:02 173.0.2.2"

check ovn-nbctl lsp-add s2 vm3
check ovn-nbctl lsp-set-addresses vm3 "00:de:ad:01:00:03 173.0.2.3"

check ovn-nbctl lb-add lb1 30.0.0.1:80 173.0.2.2:80 udp
check ovn-nbctl lb-add lb2 20.0.0.1:80 173.0.1.2:80 udp
check ovn-nbctl lb-add lb1 30.0.0.1 173.0.2.2
check ovn-nbctl lb-add lb2 173.0.2.250 173.0.1.3
check ovn-nbctl ls-lb-add s1 lb1
check ovn-nbctl ls-lb-add s2 lb2

ADD_NAMESPACES(vm1)
ADD_VETH(vm1, vm1, br-int, "173.0.1.2/24", "00:de:ad:01:00:01", \
"173.0.1.1")
ADD_NAMESPACES(vm2)
ADD_VETH(vm2, vm2, br-int, "173.0.2.2/24", "00:de:ad:01:00:02", \
"173.0.2.1")
ADD_NAMESPACES(vm3)
ADD_VETH(vm3, vm3, br-int, "173.0.2.250/24", "00:de:ad:01:00:03", \
"173.0.2.1")

check ovn-nbctl acl-add s1 from-lport 1001 "ip" allow
check ovn-nbctl acl-add s1 to-lport 1002 "ip" allow
check ovn-nbctl acl-add s2 from-lport 1003 "ip" allow
check ovn-nbctl acl-add s2 to-lport 1004 "ip" allow
check ovn-nbctl --wait=hv sync
AS_BOX([initial ping])
# Send ping in background. Same ping, same flow throughout the test
on_exit 'kill $(pidof ping)'
NS_EXEC([vm1], [ping -c 10000 -i 0.1 30.0.0.1 > icmp.txt &])

# Check for conntrack entries
OVS_WAIT_FOR_OUTPUT([
ovs-appctl dpctl/dump-conntrack | FORMAT_CT(173.0.1.2) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
icmp,orig=(src=173.0.1.2,dst=173.0.2.2,id=<cleared>,type=8,code=0),reply=(src=173.0.2.2,dst=173.0.1.2,id=<cleared>,type=0,code=0),zone=<cleared>
icmp,orig=(src=173.0.1.2,dst=30.0.0.1,id=<cleared>,type=8,code=0),reply=(src=173.0.2.2,dst=173.0.1.2,id=<cleared>,type=0,code=0),zone=<cleared>,mark=2
])

# Now check for multiple ct_commits
ovs-appctl dpctl/dump-flows > dp_flows
zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep vm1 | cut -d ' ' -f2)
AT_CHECK([test 1 = `cat dp_flows | grep "commit,zone=$zone_id" | wc -l`])

check ovn-nbctl acl-del s1 from-lport 1001 "ip"
check ovn-nbctl acl-del s1 to-lport 1002 "ip"
check ovn-nbctl acl-del s2 from-lport 1003 "ip"
check ovn-nbctl acl-del s2 to-lport 1004 "ip"

AS_BOX([acl drop echo request])
check ovn-nbctl --log --severity=alert --name=drop-flow-s1 acl-add s1 to-lport 2001 icmp4 drop
# acl-drop to-lport s1 apply to traffic from s1 to vm1 and s1 to r1.
check ovn-nbctl --wait=hv sync

# Check that traffic is blocked
# Wait for some packets to hit the rule to avoid potential race conditions. Then count packets.
OVS_WAIT_UNTIL([test `cat ovn-controller.log | grep acl_log | grep -c drop-flow-s1` -gt "0"])
total_icmp_pkts=$(cat icmp.txt | grep ttl | wc -l)

# Wait some time and check whether packets went through. In the worse race condition, the sleep is too short
# and this test will still succeed.
sleep 1
OVS_WAIT_UNTIL([
total_icmp1_pkts=$(cat icmp.txt | grep ttl | wc -l)
test "${total_icmp1_pkts}" -eq "${total_icmp_pkts}"
])

AS_BOX([acl allow-related echo request])
check ovn-nbctl acl-add s1 to-lport 2002 "icmp4 && ip4.src == 173.0.1.2" allow-related
# This rule has higher priority than to-lport 2001 icmp4 drop.
# So traffic from s1 (w/ src=173.0.1.2) to r1 should be accepted
# (return) traffic from s1 to vm1 should be accepted as return traffic
check ovn-nbctl --wait=hv sync
OVS_WAIT_UNTIL([
total_icmp1_pkts=$(cat icmp.txt | grep ttl | wc -l)
test "${total_icmp1_pkts}" -gt "${total_icmp_pkts}"
])

# Check we did not break handling acl-drop for existing flows
AS_BOX([acl drop echo request in s2])
check ovn-nbctl acl-del s1 to-lport 2001 icmp4
check ovn-nbctl --log --severity=alert --name=drop-flow-s2 acl-add s2 to-lport 2001 icmp4 drop
check ovn-nbctl --wait=hv sync

OVS_WAIT_UNTIL([test `cat ovn-controller.log | grep acl_log | grep -c drop-flow-s2` -gt "0"])

OVS_WAIT_FOR_OUTPUT([
ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | \
sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
icmp,orig=(src=173.0.1.2,dst=30.0.0.1,id=<cleared>,type=8,code=0),reply=(src=173.0.2.2,dst=173.0.1.2,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>
])
total_icmp_pkts=$(cat icmp.txt | grep ttl | wc -l)

# Allow ping again
AS_BOX([acl allow echo request in s2])
check ovn-nbctl acl-add s2 to-lport 2005 icmp4 allow
check ovn-nbctl --wait=hv sync
OVS_WAIT_FOR_OUTPUT([
ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
icmp,orig=(src=173.0.1.2,dst=30.0.0.1,id=<cleared>,type=8,code=0),reply=(src=173.0.2.2,dst=173.0.1.2,id=<cleared>,type=0,code=0),zone=<cleared>,mark=2
])
OVS_WAIT_UNTIL([
total_icmp1_pkts=$(cat icmp.txt | grep ttl | wc -l)
test "${total_icmp1_pkts}" -gt "${total_icmp_pkts}"
])

OVS_APP_EXIT_AND_WAIT([ovn-controller])

as ovn-sb
OVS_APP_EXIT_AND_WAIT([ovsdb-server])

as ovn-nb
OVS_APP_EXIT_AND_WAIT([ovsdb-server])

as northd
OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])

as
OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
/connection dropped.*/d"])
AT_CLEANUP
])

0 comments on commit d17ece7

Please sign in to comment.