Skip to content

Commit

Permalink
northd: add check_pkt_larger lflows for ingress traffic
Browse files Browse the repository at this point in the history
Introduce check_pkt_larger action for ingress traffic
entering the cluster from a distributed gw router port
or from a gw router. This patch enables pMTU discovery
for ingress traffic.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
  • Loading branch information
LorenzoBianconi authored and numansiddique committed Jul 29, 2021
1 parent 947e8d4 commit 1c9e46a
Show file tree
Hide file tree
Showing 5 changed files with 502 additions and 123 deletions.
60 changes: 55 additions & 5 deletions northd/ovn-northd.8.xml
Expand Up @@ -1940,6 +1940,15 @@ output;
<code>eth.dst == <var>E</var></code> is only programmed on
the gateway port instance on the gateway chassis.
</p>

<p>
For a distributed logical router or for gateway router where
the port is configured with <code>options:gateway_mtu</code>
the action of the above flow is modified adding
<code>check_pkt_larger</code> in order to mark the packet
setting <code>REGBIT_PKT_LARGER</code> if the size is greater
than the MTU.
</p>
</li>

<li>
Expand Down Expand Up @@ -2164,6 +2173,46 @@ next;
</p>

<ul>
<li>
<p>
For distributed logical routers or gateway routers with gateway port
configured with <code>options:gateway_mtu</code> to a valid integer
value, a priority-150 flow with the match <code>inport ==
<var>LRP</var> &amp;&amp; REGBIT_PKT_LARGER &amp;&amp;
REGBIT_EGRESS_LOOPBACK == 0</code>, where <var>LRP</var> is the
logical router port and applies the following action for ipv4
and ipv6 respectively:
</p>

<pre>
icmp4 {
icmp4.type = 3; /* Destination Unreachable. */
icmp4.code = 4; /* Frag Needed and DF was Set. */
icmp4.frag_mtu = <var>M</var>;
eth.dst = <var>E</var>;
ip4.dst = ip4.src;
ip4.src = <var>I</var>;
ip.ttl = 255;
REGBIT_EGRESS_LOOPBACK = 1;
REGBIT_PKT_LARGER 0;
next(pipeline=ingress, table=0);
};

icmp6 {
icmp6.type = 2;
icmp6.code = 0;
icmp6.frag_mtu = <var>M</var>;
eth.dst = <var>E</var>;
ip6.dst = ip6.src;
ip6.src = <var>I</var>;
ip.ttl = 255;
REGBIT_EGRESS_LOOPBACK = 1;
REGBIT_PKT_LARGER 0;
next(pipeline=ingress, table=0);
};
</pre>
</li>

<li>
<p>
For each NAT entry of a distributed logical router (with
Expand Down Expand Up @@ -3705,12 +3754,11 @@ REGBIT_PKT_LARGER = check_pkt_larger(<var>L</var>); next;
<p>
For distributed logical routers or gateway routers with gateway port
configured with <code>options:gateway_mtu</code> to a valid integer
value, this table adds the following priority-50 logical flow for each
value, this table adds the following priority-150 logical flow for each
logical router port with the match <code>inport == <var>LRP</var>
&amp;&amp; outport == <var>GW_PORT</var> &amp;&amp;
REGBIT_PKT_LARGER</code>, where <var>LRP</var> is the logical
router port and <var>GW_PORT</var> is the gateway router port and applies
the following action for ipv4 and ipv6 respectively:
&amp;&amp; REGBIT_PKT_LARGER &amp;&amp; !REGBIT_EGRESS_LOOPBACK</code>,
where <var>LRP</var> is the logical router port and applies the following
action for ipv4 and ipv6 respectively:
</p>

<pre>
Expand All @@ -3723,6 +3771,7 @@ icmp4 {
ip4.src = <var>I</var>;
ip.ttl = 255;
REGBIT_EGRESS_LOOPBACK = 1;
REGBIT_PKT_LARGER = 0;
next(pipeline=ingress, table=0);
};

Expand All @@ -3735,6 +3784,7 @@ icmp6 {
ip6.src = <var>I</var>;
ip.ttl = 255;
REGBIT_EGRESS_LOOPBACK = 1;
REGBIT_PKT_LARGER = 0;
next(pipeline=ingress, table=0);
};
</pre>
Expand Down
180 changes: 103 additions & 77 deletions northd/ovn-northd.c
Expand Up @@ -9952,6 +9952,9 @@ build_adm_ctrl_flows_for_lrouter(
}
}

static int
build_check_pkt_len_action_string(struct ovn_port *op, struct ds *actions);

/* Logical router ingress Table 0: L2 Admission Control
* This table drops packets that the router shouldn’t see at all based
* on their Ethernet headers.
Expand Down Expand Up @@ -9979,6 +9982,8 @@ build_adm_ctrl_flows_for_lrouter_port(
* the pipeline.
*/
ds_clear(actions);

build_check_pkt_len_action_string(op, actions);
ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
op->lrp_networks.ea_s);

Expand Down Expand Up @@ -10919,107 +10924,126 @@ build_arp_resolve_flows_for_lrouter_port(

}

static void
build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu, struct hmap *lflows,
struct shash *meter_groups, struct ds *match,
struct ds *actions, enum ovn_stage stage)
{
if (op->lrp_networks.ipv4_addrs) {
ds_clear(match);
ds_put_format(match,
"inport == %s && ip4 && "REGBIT_PKT_LARGER
" && "REGBIT_EGRESS_LOOPBACK" == 0", op->json_key);

ds_clear(actions);
/* Set icmp4.frag_mtu to gw_mtu */
ds_put_format(actions,
"icmp4_error {"
REGBIT_EGRESS_LOOPBACK" = 1; "
REGBIT_PKT_LARGER" = 0; "
"eth.dst = %s; "
"ip4.dst = ip4.src; "
"ip4.src = %s; "
"ip.ttl = 255; "
"icmp4.type = 3; /* Destination Unreachable. */ "
"icmp4.code = 4; /* Frag Needed and DF was Set. */ "
"icmp4.frag_mtu = %d; "
"next(pipeline=ingress, table=%d); };",
op->lrp_networks.ea_s,
op->lrp_networks.ipv4_addrs[0].addr_s,
mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
ovn_lflow_add_with_hint__(lflows, op->od, stage, 150,
ds_cstr(match), ds_cstr(actions),
NULL,
copp_meter_get(
COPP_ICMP4_ERR,
op->od->nbr->copp,
meter_groups),
&op->nbrp->header_);
}

if (op->lrp_networks.ipv6_addrs) {
ds_clear(match);
ds_put_format(match, "inport == %s && ip6 && "REGBIT_PKT_LARGER
" && "REGBIT_EGRESS_LOOPBACK" == 0", op->json_key);

ds_clear(actions);
/* Set icmp6.frag_mtu to gw_mtu */
ds_put_format(actions,
"icmp6_error {"
REGBIT_EGRESS_LOOPBACK" = 1; "
REGBIT_PKT_LARGER" = 0; "
"eth.dst = %s; "
"ip6.dst = ip6.src; "
"ip6.src = %s; "
"ip.ttl = 255; "
"icmp6.type = 2; /* Packet Too Big. */ "
"icmp6.code = 0; "
"icmp6.frag_mtu = %d; "
"next(pipeline=ingress, table=%d); };",
op->lrp_networks.ea_s,
op->lrp_networks.ipv6_addrs[0].addr_s,
mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
ovn_lflow_add_with_hint__(lflows, op->od, stage, 150,
ds_cstr(match), ds_cstr(actions),
NULL,
copp_meter_get(
COPP_ICMP6_ERR,
op->od->nbr->copp,
meter_groups),
&op->nbrp->header_);
}
}

static int
build_check_pkt_len_action_string(struct ovn_port *op, struct ds *actions)
{
int gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);

if (gw_mtu > 0) {
/* Add the flows only if gateway_mtu is configured. */
ds_put_format(actions,
REGBIT_PKT_LARGER" = check_pkt_larger(%d); ",
gw_mtu + VLAN_ETH_HEADER_LEN);
}
return gw_mtu;
}

static void
build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
struct hmap *lflows, struct hmap *ports,
struct shash *meter_groups, struct ds *match,
struct ds *actions)
{
int gw_mtu = 0;

if (op->nbrp) {
gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
}
/* Add the flows only if gateway_mtu is configured. */
ds_clear(actions);
int gw_mtu = build_check_pkt_len_action_string(op, actions);
if (gw_mtu <= 0) {
return;
}

ds_put_format(actions, "next;");

ds_clear(match);
ds_put_format(match, "outport == %s", op->json_key);

ds_clear(actions);
ds_put_format(actions,
REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
" next;", gw_mtu + VLAN_ETH_HEADER_LEN);
ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_CHK_PKT_LEN, 50,
ds_cstr(match), ds_cstr(actions),
&op->nbrp->header_);

/* ingress traffic */
build_icmperr_pkt_big_flows(op, gw_mtu, lflows, meter_groups,
match, actions, S_ROUTER_IN_IP_INPUT);

for (size_t i = 0; i < op->od->nbr->n_ports; i++) {
struct ovn_port *rp = ovn_port_find(ports,
op->od->nbr->ports[i]->name);
if (!rp || rp == op) {
continue;
}

if (rp->lrp_networks.ipv4_addrs) {
ds_clear(match);
ds_put_format(match, "inport == %s && outport == %s"
" && ip4 && "REGBIT_PKT_LARGER,
rp->json_key, op->json_key);

ds_clear(actions);
/* Set icmp4.frag_mtu to gw_mtu */
ds_put_format(actions,
"icmp4_error {"
REGBIT_EGRESS_LOOPBACK" = 1; "
"eth.dst = %s; "
"ip4.dst = ip4.src; "
"ip4.src = %s; "
"ip.ttl = 255; "
"icmp4.type = 3; /* Destination Unreachable. */ "
"icmp4.code = 4; /* Frag Needed and DF was Set. */ "
"icmp4.frag_mtu = %d; "
"next(pipeline=ingress, table=%d); };",
rp->lrp_networks.ea_s,
rp->lrp_networks.ipv4_addrs[0].addr_s,
gw_mtu,
ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
ovn_lflow_add_with_hint__(lflows, op->od,
S_ROUTER_IN_LARGER_PKTS, 50,
ds_cstr(match), ds_cstr(actions),
NULL,
copp_meter_get(
COPP_ICMP4_ERR,
rp->od->nbr->copp,
meter_groups),
&rp->nbrp->header_);
}

if (rp->lrp_networks.ipv6_addrs) {
ds_clear(match);
ds_put_format(match, "inport == %s && outport == %s"
" && ip6 && "REGBIT_PKT_LARGER,
rp->json_key, op->json_key);

ds_clear(actions);
/* Set icmp6.frag_mtu to gw_mtu */
ds_put_format(actions,
"icmp6_error {"
REGBIT_EGRESS_LOOPBACK" = 1; "
"eth.dst = %s; "
"ip6.dst = ip6.src; "
"ip6.src = %s; "
"ip.ttl = 255; "
"icmp6.type = 2; /* Packet Too Big. */ "
"icmp6.code = 0; "
"icmp6.frag_mtu = %d; "
"next(pipeline=ingress, table=%d); };",
rp->lrp_networks.ea_s,
rp->lrp_networks.ipv6_addrs[0].addr_s,
gw_mtu,
ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
ovn_lflow_add_with_hint__(lflows, op->od,
S_ROUTER_IN_LARGER_PKTS, 50,
ds_cstr(match), ds_cstr(actions),
NULL,
copp_meter_get(
COPP_ICMP6_ERR,
rp->od->nbr->copp,
meter_groups),
&rp->nbrp->header_);
}
/* egress traffic */
build_icmperr_pkt_big_flows(rp, gw_mtu, lflows, meter_groups,
match, actions, S_ROUTER_IN_LARGER_PKTS);
}
}

Expand Down Expand Up @@ -12126,6 +12150,8 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
* down in the pipeline.
*/
ds_clear(actions);

build_check_pkt_len_action_string(od->l3dgw_port, actions);
ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
od->l3dgw_port->lrp_networks.ea_s);

Expand Down

0 comments on commit 1c9e46a

Please sign in to comment.