Skip to content

Commit

Permalink
northd: Explicitly handle SNAT for ICMP need frag.
Browse files Browse the repository at this point in the history
Considering following topology:
client - sw0 - lrp0 - lr - lrp1 - sw1 - server
sw0 in subnet 192.168.0.0/24
sw1 in subnet 172.168.0.0/24
SNAT configured for client
gateway_mtu=1400 configured for lrp0

If we send UDP traffic from client to server
and server responds with packet bigger than 1400
the following sequence will happen:

1) Packet is coming into lr via lrp1
2) unSNAT
3) Routing, the outport will be set to lrp0
4) Check for packet larger will fail
5) We will generate ICMP need frag

However, the last step is wrong from the server
perspective. The ICMP message will have IP source
address = lrp1 IP address. Which means that SNAT won't
happen because the source is not within the sw0 subnet,
but the inner packet has sw0 subnet address, because it
was unSNATted. This results in server ignoring the ICMP
message because server never sent any packet to the
sw0 subnet.

In order to prevent this issue perform SNAT for the
ICMP packet. Because the packet is related to already
existing connection we just need to perform
ct_commit_nat(snat) action.

This is achieved with addition of the following flow for
"lr_in_larger_pkts" stage (the flow for IPv6 is the in
regard to the addition):

match=(inport == "INPORT" && outport == "OUTPORT" && ip4 && REGBIT_PKT_LARGER && REGBIT_EGRESS_LOOPBACK == 0 && ct.trk && ct.rpl && ct.dnat), action=(icmp4_error {flags.icmp_snat = 1; REGBIT_EGRESS_LOOPBACK = 1; REGBIT_PKT_LARGER = 0; eth.dst = ETH_DST; ip4.dst = ip4.src; ip4.src = IP_SRC; ip.ttl = 255; icmp4.type = 3; /* Destination Unreachable. */ icmp4.code = 4; /* Frag Needed and DF was Set. */ icmp4.frag_mtu = 1500; next(pipeline=ingress, table=0); };)

Also, add flow to "lr_out_post_snat" stage:

match=(icmp && flags.icmp_snat == 1), action=(ct_commit_nat(snat);)

Partially revert 0e49f49 ("northd: Allow need frag to be SNATed")
which attempted to fix the same issue in a wrong way.

Also add feature flag for the updated ct_commit_nat action.
In case there is an update of northd to newer version before all
controllers are updated.

Fixes: 0e49f49 ("northd: Allow need frag to be SNATed")
Reported-at: https://issues.redhat.com/browse/FDP-134
Reported-at: https://issues.redhat.com/browse/FDP-159
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
(cherry picked from commit 43f741c)
  • Loading branch information
almusil authored and dceara committed Feb 8, 2024
1 parent c76cb23 commit f2e8130
Show file tree
Hide file tree
Showing 10 changed files with 218 additions and 125 deletions.
8 changes: 8 additions & 0 deletions controller/chassis.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ chassis_build_other_config(const struct ovs_chassis_cfg *ovs_cfg,
smap_replace(config, OVN_FEATURE_CT_LB_RELATED, "true");
smap_replace(config, OVN_FEATURE_FDB_TIMESTAMP, "true");
smap_replace(config, OVN_FEATURE_LS_DPG_COLUMN, "true");
smap_replace(config, OVN_FEATURE_CT_COMMIT_NAT_V2, "true");
}

/*
Expand Down Expand Up @@ -509,6 +510,12 @@ chassis_other_config_changed(const struct ovs_chassis_cfg *ovs_cfg,
return true;
}

if (!smap_get_bool(&chassis_rec->other_config,
OVN_FEATURE_CT_COMMIT_NAT_V2,
false)) {
return true;
}

return false;
}

Expand Down Expand Up @@ -640,6 +647,7 @@ update_supported_sset(struct sset *supported)
sset_add(supported, OVN_FEATURE_CT_LB_RELATED);
sset_add(supported, OVN_FEATURE_FDB_TIMESTAMP);
sset_add(supported, OVN_FEATURE_LS_DPG_COLUMN);
sset_add(supported, OVN_FEATURE_CT_COMMIT_NAT_V2);
}

static void
Expand Down
1 change: 1 addition & 0 deletions include/ovn/features.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#define OVN_FEATURE_CT_LB_RELATED "ovn-ct-lb-related"
#define OVN_FEATURE_FDB_TIMESTAMP "fdb-timestamp"
#define OVN_FEATURE_LS_DPG_COLUMN "ls-dpg-column"
#define OVN_FEATURE_CT_COMMIT_NAT_V2 "ct-commit-nat-v2"

/* OVS datapath supported features. Based on availability OVN might generate
* different types of openflows.
Expand Down
3 changes: 3 additions & 0 deletions include/ovn/logical-fields.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ enum mff_log_flags_bits {
MLF_USE_LB_AFF_SESSION_BIT = 14,
MLF_LOCALNET_BIT = 15,
MLF_RX_FROM_TUNNEL_BIT = 16,
MLF_ICMP_SNAT_BIT = 17,
};

/* MFF_LOG_FLAGS_REG flag assignments */
Expand Down Expand Up @@ -134,6 +135,8 @@ enum mff_log_flags {

/* Indicate the packet has been received from the tunnel. */
MLF_RX_FROM_TUNNEL = (1 << MLF_RX_FROM_TUNNEL_BIT),

MLF_ICMP_SNAT = (1 << MLF_ICMP_SNAT_BIT),
};

/* OVN logical fields
Expand Down
4 changes: 4 additions & 0 deletions lib/logical-fields.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ ovn_init_symtab(struct shash *symtab)
snprintf(flags_str, sizeof flags_str, "flags[%d]",
MLF_LOCALNET_BIT);
expr_symtab_add_subfield(symtab, "flags.localnet", NULL,
flags_str);
snprintf(flags_str, sizeof flags_str, "flags[%d]",
MLF_ICMP_SNAT_BIT);
expr_symtab_add_subfield(symtab, "flags.icmp_snat", NULL,
flags_str);
snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_RX_FROM_TUNNEL_BIT);
expr_symtab_add_subfield(symtab, "flags.tunnel_rx", NULL, flags_str);
Expand Down
10 changes: 10 additions & 0 deletions northd/en-global-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ northd_enable_all_features(struct ed_type_global_config *data)
.ct_lb_related = true,
.fdb_timestamp = true,
.ls_dpg_column = true,
.ct_commit_nat_v2 = true,
};
}

Expand Down Expand Up @@ -429,6 +430,15 @@ build_chassis_features(const struct sbrec_chassis_table *sbrec_chassis_table,
chassis_features->ls_dpg_column) {
chassis_features->ls_dpg_column = false;
}

bool ct_commit_nat_v2 =
smap_get_bool(&chassis->other_config,
OVN_FEATURE_CT_COMMIT_NAT_V2,
false);
if (!ct_commit_nat_v2 &&
chassis_features->ct_commit_nat_v2) {
chassis_features->ct_commit_nat_v2 = false;
}
}
}

Expand Down
1 change: 1 addition & 0 deletions northd/en-global-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct chassis_features {
bool ct_lb_related;
bool fdb_timestamp;
bool ls_dpg_column;
bool ct_commit_nat_v2;
};

struct global_config_tracked_data {
Expand Down
189 changes: 102 additions & 87 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -13111,6 +13111,73 @@ build_arp_resolve_flows_for_lsp(
}
}

#define ICMP4_NEED_FRAG_FORMAT \
"icmp4_error {" \
"%s" \
REGBIT_EGRESS_LOOPBACK" = 1; " \
REGBIT_PKT_LARGER" = 0; " \
"eth.dst = %s; " \
"ip4.dst = ip4.src; " \
"ip4.src = %s; " \
"ip.ttl = 255; " \
"icmp4.type = 3; /* Destination Unreachable. */ " \
"icmp4.code = 4; /* Frag Needed and DF was Set. */ " \
"icmp4.frag_mtu = %d; " \
"next(pipeline=ingress, table=%d); };" \

#define ICMP6_NEED_FRAG_FORMAT \
"icmp6_error {" \
"%s" \
REGBIT_EGRESS_LOOPBACK" = 1; " \
REGBIT_PKT_LARGER" = 0; " \
"eth.dst = %s; " \
"ip6.dst = ip6.src; " \
"ip6.src = %s; " \
"ip.ttl = 255; " \
"icmp6.type = 2; /* Packet Too Big. */ " \
"icmp6.code = 0; " \
"icmp6.frag_mtu = %d; " \
"next(pipeline=ingress, table=%d); };"

static void
create_icmp_need_frag_lflow(const struct ovn_port *op, int mtu,
struct ds *actions, struct ds *match,
const char *meter, struct lflow_table *lflows,
struct lflow_ref *lflow_ref,
enum ovn_stage stage, uint16_t priority,
bool is_ipv6, const char *extra_match,
const char *extra_action)
{
if ((is_ipv6 && !op->lrp_networks.ipv6_addrs) ||
(!is_ipv6 && !op->lrp_networks.ipv4_addrs)) {
return;
}

const char *ip = is_ipv6
? op->lrp_networks.ipv6_addrs[0].addr_s
: op->lrp_networks.ipv4_addrs[0].addr_s;
size_t match_len = match->length;

ds_put_format(match, " && ip%c && "REGBIT_PKT_LARGER
" && "REGBIT_EGRESS_LOOPBACK" == 0", is_ipv6 ? '6' : '4');

if (*extra_match) {
ds_put_format(match, " && %s", extra_match);
}

ds_clear(actions);
ds_put_format(actions,
is_ipv6 ? ICMP6_NEED_FRAG_FORMAT : ICMP4_NEED_FRAG_FORMAT,
extra_action, op->lrp_networks.ea_s, ip,
mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));

ovn_lflow_add_with_hint__(lflows, op->od, stage, priority,
ds_cstr(match), ds_cstr(actions),
NULL, meter, &op->nbrp->header_, lflow_ref);

ds_truncate(match, match_len);
}

static void
build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu,
struct lflow_table *lflows,
Expand All @@ -13119,102 +13186,41 @@ build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu,
struct ovn_port *outport,
struct lflow_ref *lflow_ref)
{
char *outport_match = outport ? xasprintf("outport == %s && ",
outport->json_key)
: NULL;

char *ip4_src = NULL;
const char *ipv4_meter = copp_meter_get(COPP_ICMP4_ERR, op->od->nbr->copp,
meter_groups);
const char *ipv6_meter = copp_meter_get(COPP_ICMP6_ERR, op->od->nbr->copp,
meter_groups);

if (outport && outport->lrp_networks.ipv4_addrs) {
ip4_src = outport->lrp_networks.ipv4_addrs[0].addr_s;
} else if (op->lrp_networks.ipv4_addrs) {
ip4_src = op->lrp_networks.ipv4_addrs[0].addr_s;
}

if (ip4_src) {
ds_clear(match);
ds_put_format(match, "inport == %s && %sip4 && "REGBIT_PKT_LARGER
" && "REGBIT_EGRESS_LOOPBACK" == 0", op->json_key,
outport ? outport_match : "");

ds_clear(actions);
/* Set icmp4.frag_mtu to gw_mtu */
ds_put_format(actions,
"icmp4_error {"
REGBIT_EGRESS_LOOPBACK" = 1; "
REGBIT_PKT_LARGER" = 0; "
"eth.dst = %s; "
"ip4.dst = ip4.src; "
"ip4.src = %s; "
"ip.ttl = 255; "
"icmp4.type = 3; /* Destination Unreachable. */ "
"icmp4.code = 4; /* Frag Needed and DF was Set. */ "
"icmp4.frag_mtu = %d; "
"next(pipeline=ingress, table=%d); };",
op->lrp_networks.ea_s, ip4_src, mtu,
ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
ovn_lflow_add_with_hint__(lflows, op->od, stage, 150,
ds_cstr(match), ds_cstr(actions),
NULL,
copp_meter_get(
COPP_ICMP4_ERR,
op->od->nbr->copp,
meter_groups),
&op->nbrp->header_,
lflow_ref);
}
ds_clear(match);
ds_put_format(match, "inport == %s", op->json_key);

char *ip6_src = NULL;
if (outport) {
ds_put_format(match, " && outport == %s", outport->json_key);

if (outport && outport->lrp_networks.ipv6_addrs) {
ip6_src = outport->lrp_networks.ipv6_addrs[0].addr_s;
} else if (op->lrp_networks.ipv6_addrs) {
ip6_src = op->lrp_networks.ipv6_addrs[0].addr_s;
create_icmp_need_frag_lflow(op, mtu, actions, match, ipv4_meter,
lflows, lflow_ref, stage, 160, false,
"ct.trk && ct.rpl && ct.dnat",
"flags.icmp_snat = 1; ");
create_icmp_need_frag_lflow(op, mtu, actions, match, ipv6_meter,
lflows, lflow_ref, stage, 160, true,
"ct.trk && ct.rpl && ct.dnat",
"flags.icmp_snat = 1; ");
}

if (ip6_src) {
ds_clear(match);
ds_put_format(match, "inport == %s && %sip6 && "REGBIT_PKT_LARGER
" && "REGBIT_EGRESS_LOOPBACK" == 0", op->json_key,
outport ? outport_match : "");

ds_clear(actions);
/* Set icmp6.frag_mtu to gw_mtu */
ds_put_format(actions,
"icmp6_error {"
REGBIT_EGRESS_LOOPBACK" = 1; "
REGBIT_PKT_LARGER" = 0; "
"eth.dst = %s; "
"ip6.dst = ip6.src; "
"ip6.src = %s; "
"ip.ttl = 255; "
"icmp6.type = 2; /* Packet Too Big. */ "
"icmp6.code = 0; "
"icmp6.frag_mtu = %d; "
"next(pipeline=ingress, table=%d); };",
op->lrp_networks.ea_s, ip6_src, mtu,
ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
ovn_lflow_add_with_hint__(lflows, op->od, stage, 150,
ds_cstr(match), ds_cstr(actions),
NULL,
copp_meter_get(
COPP_ICMP6_ERR,
op->od->nbr->copp,
meter_groups),
&op->nbrp->header_,
lflow_ref);
}
free(outport_match);
create_icmp_need_frag_lflow(op, mtu, actions, match, ipv4_meter, lflows,
lflow_ref, stage, 150, false, "", "");
create_icmp_need_frag_lflow(op, mtu, actions, match, ipv6_meter, lflows,
lflow_ref, stage, 150, true, "", "");
}

static void
build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
struct lflow_table *lflows,
const struct hmap *lr_ports,
const struct shash *meter_groups,
struct ds *match,
struct ds *actions,
struct lflow_ref *lflow_ref)
struct ds *match, struct ds *actions,
struct lflow_ref *lflow_ref,
const struct chassis_features *features)
{
int gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
if (gw_mtu <= 0) {
Expand Down Expand Up @@ -13244,6 +13250,13 @@ build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
match, actions, S_ROUTER_IN_LARGER_PKTS,
op, lflow_ref);
}

if (features->ct_commit_nat_v2) {
ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_OUT_POST_SNAT, 100,
"icmp && flags.icmp_snat == 1",
"ct_commit_nat(snat);", &op->nbrp->header_,
lflow_ref);
}
}

/* Local router ingress table CHK_PKT_LEN: Check packet length.
Expand All @@ -13265,7 +13278,8 @@ build_check_pkt_len_flows_for_lrouter(
const struct hmap *lr_ports,
struct ds *match, struct ds *actions,
const struct shash *meter_groups,
struct lflow_ref *lflow_ref)
struct lflow_ref *lflow_ref,
const struct chassis_features *features)
{
ovs_assert(od->nbr);

Expand All @@ -13282,7 +13296,7 @@ build_check_pkt_len_flows_for_lrouter(
continue;
}
build_check_pkt_len_flows_for_lrp(rp, lflows, lr_ports, meter_groups,
match, actions, lflow_ref);
match, actions, lflow_ref, features);
}
}

Expand Down Expand Up @@ -15639,7 +15653,8 @@ build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od,
build_arp_resolve_flows_for_lrouter(od, lsi->lflows, NULL);
build_check_pkt_len_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports,
&lsi->match, &lsi->actions,
lsi->meter_groups, NULL);
lsi->meter_groups, NULL,
lsi->features);
build_gateway_redirect_flows_for_lrouter(od, lsi->lflows, &lsi->match,
&lsi->actions, NULL);
build_arp_request_flows_for_lrouter(od, lsi->lflows, &lsi->match,
Expand Down

0 comments on commit f2e8130

Please sign in to comment.