Skip to content

Commit

Permalink
ovn: Add geneve PMTUD support.
Browse files Browse the repository at this point in the history
Introduce specif flows for E/W ICMPv{4,6} packets if tunnelled packets
do not fit path MTU. This patch enable PMTUD for East/West Geneve traffic.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2241711
Tested-at: https://github.com/LorenzoBianconi/ovn/actions/runs/7627345562/job/20777962659
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
  • Loading branch information
LorenzoBianconi authored and numansiddique committed Jan 23, 2024
1 parent 425f699 commit fa35acc
Show file tree
Hide file tree
Showing 9 changed files with 985 additions and 6 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/ovn-fake-multinode-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ jobs:
fail-fast: false
matrix:
cfg:
- { branch: "main" }
- { branch: "branch-22.03" }
- { branch: "main", testsuiteflags: ""}
- { branch: "branch-22.03", testsuiteflags: "-k 'basic test'" }
name: multinode tests ${{ join(matrix.cfg.*, ' ') }}
env:
RUNC_CMD: podman
Expand Down Expand Up @@ -176,7 +176,7 @@ jobs:

- name: Run fake-multinode system tests
run: |
if ! sudo make check-multinode; then
if ! sudo make check-multinode TESTSUITEFLAGS="${{ matrix.cfg.testsuiteflags }}"; then
sudo podman exec -it ovn-central ovn-nbctl show || :
sudo podman exec -it ovn-central ovn-sbctl show || :
sudo podman exec -it ovn-chassis-1 ovs-vsctl show || :
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Post v23.09.0
- ovn-northd-ddlog has been removed.
- A new LSP option "enable_router_port_acl" has been added to enable
conntrack for the router port whose peer is l3dgw_port if set it true.
- Enable PMTU discovery on geneve tunnels for E/W traffic.

OVN v23.09.0 - 15 Sep 2023
--------------------------
Expand Down
30 changes: 29 additions & 1 deletion controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -2452,9 +2452,37 @@ physical_run(struct physical_ctx *p_ctx,
}

put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match,
&ofpacts, hc_uuid);

/* Set allow rx from tunnel bit. */
put_load(1, MFF_LOG_FLAGS, MLF_RX_FROM_TUNNEL_BIT, 1, &ofpacts);

/* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets
* do not fit path MTU.
*/
put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);

/* IPv4 */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_IP));
match_set_nw_proto(&match, IPPROTO_ICMP);
match_set_icmp_type(&match, 3);
match_set_icmp_code(&match, 4);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
&ofpacts, hc_uuid);
/* IPv6 */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
match_set_nw_proto(&match, IPPROTO_ICMPV6);
match_set_icmp_type(&match, 2);
match_set_icmp_code(&match, 0);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
&ofpacts, hc_uuid);
}

/* Add VXLAN specific rules to transform port keys
Expand Down
3 changes: 3 additions & 0 deletions include/ovn/logical-fields.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ enum mff_log_flags_bits {
MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13,
MLF_USE_LB_AFF_SESSION_BIT = 14,
MLF_LOCALNET_BIT = 15,
MLF_RX_FROM_TUNNEL_BIT = 16,
};

/* MFF_LOG_FLAGS_REG flag assignments */
Expand Down Expand Up @@ -129,6 +130,8 @@ enum mff_log_flags {
/* Indicate that the port is localnet. */
MLF_LOCALNET = (1 << MLF_LOCALNET_BIT),

/* Indicate the packet has been received from the tunnel. */
MLF_RX_FROM_TUNNEL = (1 << MLF_RX_FROM_TUNNEL_BIT),
};

/* OVN logical fields
Expand Down
2 changes: 2 additions & 0 deletions lib/logical-fields.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ ovn_init_symtab(struct shash *symtab)
MLF_LOCALNET_BIT);
expr_symtab_add_subfield(symtab, "flags.localnet", NULL,
flags_str);
snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_RX_FROM_TUNNEL_BIT);
expr_symtab_add_subfield(symtab, "flags.tunnel_rx", NULL, flags_str);

/* Connection tracking state. */
expr_symtab_add_field_scoped(symtab, "ct_mark", MFF_CT_MARK, NULL, false,
Expand Down
83 changes: 83 additions & 0 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -9775,6 +9775,13 @@ build_lswitch_lflows_admission_control(struct ovn_datapath *od,
struct hmap *lflows)
{
ovs_assert(od->nbs);

/* Default action for recirculated ICMP error 'packet too big'. */
ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
"((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
" (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
" flags.tunnel_rx == 1", debug_drop_action());

/* Logical VLANs not supported. */
if (!is_vlan_transparent(od)) {
/* Block logical VLANs. */
Expand Down Expand Up @@ -12814,6 +12821,72 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
ds_destroy(&actions);
}

/* Following flows are used to manage traffic redirected by the kernel
* (e.g. ICMP errors packets) that enter the cluster from the geneve ports
*/
static void
build_lrouter_icmp_packet_toobig_admin_flows(
struct ovn_port *op, struct hmap *lflows,
struct ds *match, struct ds *actions)
{
ovs_assert(op->nbrp);

if (!is_l3dgw_port(op)) {
return;
}

ds_clear(match);
ds_put_format(match,
"((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
" (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
" eth.dst == %s && !is_chassis_resident(%s) &&"
" flags.tunnel_rx == 1",
op->nbrp->mac, op->cr_port->json_key);
ds_clear(actions);
ds_put_format(actions, "outport <-> inport; inport = %s; next;",
op->json_key);
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 120,
ds_cstr(match), ds_cstr(actions));
}

static void
build_lswitch_icmp_packet_toobig_admin_flows(
struct ovn_port *op, struct hmap *lflows,
struct ds *match, struct ds *actions)
{
ovs_assert(op->nbsp);

if (!lsp_is_router(op->nbsp)) {
return;
}

struct ovn_port *peer = op->peer;
if (!peer) {
return;
}

ds_clear(match);
if (peer->od->is_gw_router) {
ds_put_format(match,
"((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
" (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
"eth.src == %s && outport == %s && flags.tunnel_rx == 1",
peer->nbrp->mac, op->json_key);
} else {
ds_put_format(match,
"((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
" (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
"eth.dst == %s && flags.tunnel_rx == 1",
peer->nbrp->mac);
}
ds_clear(actions);
ds_put_format(actions,
"outport <-> inport; next(pipeline=ingress,table=%d);",
ovn_stage_get_table(S_SWITCH_IN_L2_LKUP));
ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
ds_cstr(match), ds_cstr(actions));
}

static void
build_lrouter_force_snat_flows_op(struct ovn_port *op,
struct hmap *lflows,
Expand Down Expand Up @@ -12946,6 +13019,13 @@ build_adm_ctrl_flows_for_lrouter(
struct ovn_datapath *od, struct hmap *lflows)
{
ovs_assert(od->nbr);

/* Default action for recirculated ICMP error 'packet too big'. */
ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 110,
"((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
" (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
" flags.tunnel_rx == 1", debug_drop_action());

/* Logical VLANs not supported.
* Broadcast/multicast source address is invalid. */
ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
Expand Down Expand Up @@ -16181,6 +16261,7 @@ build_lswitch_and_lrouter_iterate_by_lsp(struct ovn_port *op,
build_lswitch_dhcp_options_and_response(op, lflows, meter_groups);
build_lswitch_external_port(op, lflows);
build_lswitch_ip_unicast_lookup(op, lflows, actions, match);
build_lswitch_icmp_packet_toobig_admin_flows(op, lflows, match, actions);

/* Build Logical Router Flows. */
build_ip_routing_flows_for_router_type_lsp(op, lr_ports, lflows);
Expand Down Expand Up @@ -16217,6 +16298,8 @@ build_lswitch_and_lrouter_iterate_by_lrp(struct ovn_port *op,
&lsi->match, &lsi->actions, lsi->meter_groups);
build_lrouter_force_snat_flows_op(op, lsi->lflows, &lsi->match,
&lsi->actions);
build_lrouter_icmp_packet_toobig_admin_flows(op, lsi->lflows, &lsi->match,
&lsi->actions);
}

static void *
Expand Down
41 changes: 40 additions & 1 deletion northd/ovn-northd.8.xml
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@
<li>
For each (enabled) vtep logical port, a priority 70 flow is added which
matches on all packets and applies the action
<code>next(pipeline=ingress, table=S_SWITCH_IN_L2_LKUP) = 1;</code>
<code>next(pipeline=ingress, table=S_SWITCH_IN_L3_LKUP) = 1;</code>
to skip most stages of ingress pipeline and go directly to ingress L2
lookup table to determine the output port. Packets from VTEP (RAMP)
switch should not be subjected to any ACL checks. Egress pipeline will
Expand Down Expand Up @@ -318,6 +318,30 @@

<h3>Ingress Table 1: Ingress Port Security - Apply</h3>

<p>
For each logical switch port <var>P</var> of type router connected to a
gw router a priority-120 flow that matches 'recirculated' icmp{4,6} error
'packet too big' and <code>eth.src == <var>D</var> &amp;&amp;
outport == <var>P</var> &amp;&amp; flags.tunnel_rx == 1</code> where
<var>D</var> is the peer logical router port <var>RP</var> mac address,
swaps inport and outport and applies the action <code>
next(pipeline=S_SWITCH_IN_L2_LKUP)</code>.
</p>

<p>
For each logical switch port <var>P</var> of type router connected to a
distributed router a priority-120 flow that matches 'recirculated'
icmp{4,6} error 'packet too big' and <code>eth.dst == <var>D</var>
&amp;&amp; flags.tunnel_rx == 1</code> where <var>D</var> is the peer
logical router port <var>RP</var> mac address, swaps inport and outport
and applies the action <code> next(pipeline=S_SWITCH_IN_L2_LKUP)</code>.
</p>

<p>
This table adds a priority-110 flow that matches 'recirculated' icmp{4,6}
error 'packet too big' to drop the packet.
</p>

<p>
This table drops the packets if the port security check failed
in the previous stage i.e the register bit
Expand Down Expand Up @@ -2414,6 +2438,21 @@ output;
(LBs, NAT).
</p>

<p>
For each gateway port <var>GW</var> on a distributed logical router
a priority-120 flow that matches 'recirculated' icmp{4,6} error
'packet too big' and <code>eth.dst == <var>D</var> &amp;&amp;
!is_chassis_resident(<var> cr-GW</var>)</code> where <var>D</var>
is the gateway port mac address and <var>cr-GW</var> is the chassis
resident port of <var>GW</var>, swap inport and outport and stores
<var>GW</var> as inport.
</p>

<p>
This table adds a priority-110 flow that matches 'recirculated'
icmp{4,6} error 'packet too big' to drop the packet.
</p>

<p>
For a distributed logical router or for gateway router where
the port is configured with <code>options:gateway_mtu</code>
Expand Down
Loading

0 comments on commit fa35acc

Please sign in to comment.