Skip to content

Commit

Permalink
ovn: add geneve PMTUD support
Browse files Browse the repository at this point in the history
Introduce specific flows for E/W ICMPv{4,6} need frag packets
generated by the tunnel module to be delivered back to the
source port, if packets to be tunnelled do not fit path MTU.
This patch enables PMTUD for East/West Geneve traffic.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2241711
Tested-by: Jaime Ruiz <jcaamano@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
  • Loading branch information
LorenzoBianconi authored and numansiddique committed Dec 4, 2023
1 parent 56f62ef commit 450e41e
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 3 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/ovn-fake-multinode-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ jobs:
fail-fast: false
matrix:
cfg:
- { branch: "main" }
- { branch: "branch-22.03" }
- { branch: "main", testsuiteflags: ""}
- { branch: "branch-22.03", testsuiteflags: "-k 'basic test'" }
name: multinode tests ${{ join(matrix.cfg.*, ' ') }}
env:
RUNC_CMD: podman
Expand Down Expand Up @@ -176,7 +176,7 @@ jobs:

- name: Run fake-multinode system tests
run: |
if ! sudo make check-multinode; then
if ! sudo make check-multinode TESTSUITEFLAGS="${{ matrix.cfg.testsuiteflags }}"; then
sudo podman exec -it ovn-central ovn-nbctl show || :
sudo podman exec -it ovn-central ovn-sbctl show || :
sudo podman exec -it ovn-chassis-1 ovs-vsctl show || :
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Post v23.09.0
connection method and doesn't require additional probing.
external_ids:ovn-openflow-probe-interval configuration option for
ovn-controller no longer matters and is ignored.
- Enable PMTU discovery on geneve tunnels for E/W traffic.

OVN v23.09.0 - 15 Sep 2023
--------------------------
Expand Down
45 changes: 45 additions & 0 deletions controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -2446,6 +2446,51 @@ physical_run(struct physical_ctx *p_ctx,
&ofpacts, hc_uuid);
}

/* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets do not
* fit path MTU.
*/
HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) {
ofpbuf_clear(&ofpacts);
if (tun->type == GENEVE) {
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
put_move(p_ctx->mff_ovn_geneve, 16, MFF_LOG_OUTPORT, 0, 16,
&ofpacts);
put_move(p_ctx->mff_ovn_geneve, 0, MFF_LOG_INPORT, 0, 15,
&ofpacts);
} else if (tun->type == STT) {
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
put_move(MFF_TUN_ID, 40, MFF_LOG_OUTPORT, 0, 16, &ofpacts);
put_move(MFF_TUN_ID, 24, MFF_LOG_INPORT, 0, 15, &ofpacts);
} else if (tun->type == VXLAN) {
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts);
put_move(MFF_TUN_ID, 12, MFF_LOG_INPORT, 0, 12, &ofpacts);
} else {
OVS_NOT_REACHED();
}
put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);

/* IPv4 */
struct match match = MATCH_CATCHALL_INITIALIZER;
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_IP));
match_set_nw_proto(&match, IPPROTO_ICMP);
match_set_icmp_type(&match, 3);
match_set_icmp_code(&match, 4);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
&ofpacts, hc_uuid);
/* IPv6 */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
match_set_nw_proto(&match, IPPROTO_ICMPV6);
match_set_icmp_type(&match, 2);
match_set_icmp_code(&match, 0);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
&ofpacts, hc_uuid);
}

/* Add VXLAN specific rules to transform port keys
* from 12 bits to 16 bits used elsewhere. */
HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) {
Expand Down
24 changes: 24 additions & 0 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -12765,6 +12765,28 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
ds_destroy(&actions);
}

/* Following flows are used to manage traffic redirected by the kernel
* (e.g. ICMP errors packets) that enter the cluster from the geneve ports
*/
static void
build_lrouter_redirected_traffic_flows(
struct ovn_port *op, struct hmap *lflows,
struct ds *match, struct ds *actions)
{
ovs_assert(op->nbrp);
if (!is_l3dgw_port(op)) {
return;
}

ds_clear(match);
ds_put_format(match, "inport == %s && !is_chassis_resident(%s)",
op->cr_port->json_key, op->cr_port->json_key);
ds_clear(actions);
ds_put_format(actions, "inport = %s; next;", op->json_key);
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 120,
ds_cstr(match), ds_cstr(actions));
}

static void
build_lrouter_force_snat_flows_op(struct ovn_port *op,
struct hmap *lflows,
Expand Down Expand Up @@ -16168,6 +16190,8 @@ build_lswitch_and_lrouter_iterate_by_lrp(struct ovn_port *op,
&lsi->match, &lsi->actions, lsi->meter_groups);
build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
&lsi->actions);
build_lrouter_redirected_traffic_flows(op, lsi->lflows, &lsi->match,
&lsi->actions);
}

static void *
Expand Down
8 changes: 8 additions & 0 deletions northd/ovn-northd.8.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2463,6 +2463,14 @@ output;
(LBs, NAT).
</p>

<p>
For each gateway port <var>GW</var> on a distributed logical router
a priority-120 flow that matches <code>inport == <var>cr-GW</var>
&amp;&amp; !is_chassis_resident(<var>cr-GW</var>)</code> where
<var>cr-GW</var> is the chassis resident port of <var>GW</var>,
stores <var>GW</var> as inport and advances to the next table.
</p>

<p>
For a distributed logical router or for gateway router where
the port is configured with <code>options:gateway_mtu</code>
Expand Down
69 changes: 69 additions & 0 deletions tests/multinode.at
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,72 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | F
])

AT_CLEANUP

AT_SETUP([ovn multinode geneve pmtu])

# Check that ovn-fake-multinode setup is up and running
check_fake_multinode_setup

# Delete the multinode NB and OVS resources before starting the test.
cleanup_multinode_resources

# Test East-West switching
check multinode_nbctl ls-add sw0
check multinode_nbctl lsp-add sw0 sw0-port1
check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
check multinode_nbctl lsp-add sw0 sw0-port2
check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"

m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a

m_wait_for_ports_up

M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])

# Create the second logical switch with one port
check multinode_nbctl ls-add sw1
check multinode_nbctl lsp-add sw1 sw1-port1
check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"

# Create a logical router and attach both logical switches
check multinode_nbctl lr-add lr0
check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
check multinode_nbctl lsp-add sw0 sw0-lr0
check multinode_nbctl lsp-set-type sw0-lr0 router
check multinode_nbctl lsp-set-addresses sw0-lr0 router
check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0

check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
check multinode_nbctl lsp-add sw1 sw1-lr0
check multinode_nbctl lsp-set-type sw1-lr0 router
check multinode_nbctl lsp-set-addresses sw1-lr0 router
check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1

m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::4/64 1000::a

m_wait_for_ports_up sw1-port1

M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])

# Change ptmu for the geneve tunnel
m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])

check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10

M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])

m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1142"])

AT_CLEANUP
6 changes: 6 additions & 0 deletions tests/ovn-northd.at
Original file line number Diff line number Diff line change
Expand Up @@ -6480,6 +6480,9 @@ AT_CAPTURE_FILE([lrflows])

# Check the flows in lr_in_admission stage
AT_CHECK([grep lr_in_admission lrflows | grep cr-DR | sort], [0], [dnl
table=0 (lr_in_admission ), priority=120 , match=(inport == "cr-DR-S1" && !is_chassis_resident("cr-DR-S1")), action=(inport = "DR-S1"; next;)
table=0 (lr_in_admission ), priority=120 , match=(inport == "cr-DR-S2" && !is_chassis_resident("cr-DR-S2")), action=(inport = "DR-S2"; next;)
table=0 (lr_in_admission ), priority=120 , match=(inport == "cr-DR-S3" && !is_chassis_resident("cr-DR-S3")), action=(inport = "DR-S3"; next;)
table=0 (lr_in_admission ), priority=50 , match=(eth.dst == 02:ac:10:01:00:01 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")), action=(xreg0[[0..47]] = 02:ac:10:01:00:01; next;)
table=0 (lr_in_admission ), priority=50 , match=(eth.dst == 03:ac:10:01:00:01 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")), action=(xreg0[[0..47]] = 03:ac:10:01:00:01; next;)
table=0 (lr_in_admission ), priority=50 , match=(eth.dst == 04:ac:10:01:00:01 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")), action=(xreg0[[0..47]] = 04:ac:10:01:00:01; next;)
Expand Down Expand Up @@ -6539,6 +6542,7 @@ AT_CAPTURE_FILE([lrflows])

# Check the flows in lr_in_admission stage
AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_admission ), priority=120 , match=(inport == "cr-lrp1" && !is_chassis_resident("cr-lrp1")), action=(inport = "lrp1"; next;)
table=??(lr_in_admission ), priority=50 , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
])
Expand All @@ -6560,6 +6564,7 @@ AT_CAPTURE_FILE([lrflows])

# Check the flows in lr_in_admission stage
AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_admission ), priority=120 , match=(inport == "cr-lrp1" && !is_chassis_resident("cr-lrp1")), action=(inport = "lrp1"; next;)
table=??(lr_in_admission ), priority=50 , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
])
Expand All @@ -6578,6 +6583,7 @@ AT_CAPTURE_FILE([lrflows])

# Check the flows in lr_in_admission stage
AT_CHECK([grep lr_in_admission lrflows | grep lrp1 | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_admission ), priority=120 , match=(inport == "cr-lrp1" && !is_chassis_resident("cr-lrp1")), action=(inport = "lrp1"; next;)
table=??(lr_in_admission ), priority=50 , match=(eth.dst == 00:00:00:00:00:01 && inport == "lrp1" && is_chassis_resident("cr-lrp1")), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport == "lrp1"), action=(xreg0[[0..47]] = 00:00:00:00:00:01; next;)
])
Expand Down

0 comments on commit 450e41e

Please sign in to comment.