Skip to content

Commit

Permalink
bpf: nat: let caller determine whether SNATed connection needs CT
Browse files Browse the repository at this point in the history
Right now this is decided at the lowest level of the SNAT path. But
actually the callers know much better. In particular this avoids one case
where we bake EgressGW knowledge deep into the SNAT code.

Signed-off-by: Julian Wiedmann <jwi@isovalent.com>
  • Loading branch information
julianwiedmann committed Jul 27, 2023
1 parent fc8da72 commit 93c8884
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 57 deletions.
52 changes: 11 additions & 41 deletions bpf/lib/nat.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ struct ipv4_nat_target {
bool from_local_endpoint;
bool egress_gateway; /* NAT is needed because of an egress gateway policy */
__u32 cluster_id;
bool needs_ct;
};

#if defined(ENABLE_IPV4) && defined(ENABLE_NODEPORT)
Expand Down Expand Up @@ -255,31 +256,6 @@ static __always_inline int snat_v4_new_mapping(struct __ctx_buff *ctx,
return ret;
}

static __always_inline bool
snat_v4_needs_ct(const struct ipv4_ct_tuple *tuple,
const struct ipv4_nat_target *target)
{
if (tuple->saddr == target->addr) {
/* Host-local connection. */
return true;
}

#if defined(ENABLE_EGRESS_GATEWAY) && defined(IS_BPF_HOST)
/* Track egress gateway connections, but only if they are related to a
* remote endpoint (if the endpoint is local then the connection is
* already tracked).
*/
if (target->egress_gateway && !target->from_local_endpoint) {
/* Track established egress gateway connections to extend the
* CT entry expiration timeout.
*/
return true;
}
#endif

return false;
}

static __always_inline int
snat_v4_nat_handle_mapping(struct __ctx_buff *ctx,
struct ipv4_ct_tuple *tuple,
Expand All @@ -291,15 +267,14 @@ snat_v4_nat_handle_mapping(struct __ctx_buff *ctx,
const struct ipv4_nat_target *target,
__s8 *ext_err)
{
bool needs_ct;
bool needs_ct = target->needs_ct;
void *map;

map = get_cluster_snat_map_v4(target->cluster_id);
if (!map)
return DROP_SNAT_NO_MAP_FOUND;

*state = __snat_lookup(map, tuple);
needs_ct = *state ? (*state)->common.needs_ct : snat_v4_needs_ct(tuple, target);

if (needs_ct) {
struct ipv4_ct_tuple tuple_snat;
Expand Down Expand Up @@ -732,6 +707,8 @@ snat_v4_needs_masquerade(struct __ctx_buff *ctx __maybe_unused,
#if defined(ENABLE_MASQUERADE_IPV4) && defined(IS_BPF_HOST)
if (tuple->saddr == IPV4_MASQUERADE) {
target->addr = IPV4_MASQUERADE;
target->needs_ct = true;

return NAT_NEEDED;
}

Expand Down Expand Up @@ -784,6 +761,9 @@ snat_v4_needs_masquerade(struct __ctx_buff *ctx __maybe_unused,

if (egress_gw_snat_needed(tuple->saddr, tuple->daddr, &target->addr)) {
target->egress_gateway = true;
/* If the endpoint is local, then the connection is already tracked. */
if (!local_ep)
target->needs_ct = true;

return NAT_NEEDED;
}
Expand Down Expand Up @@ -1185,6 +1165,7 @@ struct ipv6_nat_target {
const __u16 min_port; /* host endianness */
const __u16 max_port; /* host endianness */
bool from_local_endpoint;
bool needs_ct;
};

#if defined(ENABLE_IPV6) && defined(ENABLE_NODEPORT)
Expand Down Expand Up @@ -1317,18 +1298,6 @@ static __always_inline int snat_v6_new_mapping(struct __ctx_buff *ctx,
return ret;
}

static __always_inline bool
snat_v6_needs_ct(struct ipv6_ct_tuple *tuple,
const struct ipv6_nat_target *target)
{
if (ipv6_addr_equals(&tuple->saddr, &target->addr)) {
/* Host-local connection. */
return true;
}

return false;
}

static __always_inline int
snat_v6_nat_handle_mapping(struct __ctx_buff *ctx,
struct ipv6_ct_tuple *tuple,
Expand All @@ -1339,10 +1308,9 @@ snat_v6_nat_handle_mapping(struct __ctx_buff *ctx,
const struct ipv6_nat_target *target,
__s8 *ext_err)
{
bool needs_ct;
bool needs_ct = target->needs_ct;

*state = snat_v6_lookup(tuple);
needs_ct = *state ? (*state)->common.needs_ct : snat_v6_needs_ct(tuple, target);

if (needs_ct) {
struct ipv6_ct_tuple tuple_snat;
Expand Down Expand Up @@ -1654,6 +1622,8 @@ snat_v6_needs_masquerade(struct __ctx_buff *ctx __maybe_unused,
BPF_V6(masq_addr, IPV6_MASQUERADE);
if (ipv6_addr_equals(&tuple->saddr, &masq_addr)) {
ipv6_addr_copy(&target->addr, &masq_addr);
target->needs_ct = true;

return NAT_NEEDED;
}

Expand Down
8 changes: 8 additions & 0 deletions bpf/lib/nodeport.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ nodeport_has_nat_conflict_ipv6(const struct ipv6hdr *ip6 __maybe_unused,
BPF_V6(router_ip, ROUTER_IP);
if (ipv6_addr_equals((union v6addr *)&ip6->saddr, &router_ip)) {
ipv6_addr_copy(&target->addr, &router_ip);
target->needs_ct = true;

return true;
}
#endif /* TUNNEL_MODE && IS_BPF_OVERLAY */
Expand All @@ -152,6 +154,8 @@ nodeport_has_nat_conflict_ipv6(const struct ipv6hdr *ip6 __maybe_unused,
if (dr_ifindex == NATIVE_DEV_IFINDEX &&
ipv6_addr_equals((union v6addr *)&ip6->saddr, &dr_addr)) {
ipv6_addr_copy(&target->addr, &dr_addr);
target->needs_ct = true;

return true;
}
#endif /* IS_BPF_HOST */
Expand Down Expand Up @@ -1551,6 +1555,8 @@ nodeport_has_nat_conflict_ipv4(const struct iphdr *ip4 __maybe_unused,
#if defined(TUNNEL_MODE) && defined(IS_BPF_OVERLAY)
if (ip4->saddr == IPV4_GATEWAY) {
target->addr = IPV4_GATEWAY;
target->needs_ct = true;

return true;
}
#endif /* TUNNEL_MODE && IS_BPF_OVERLAY */
Expand All @@ -1565,6 +1571,8 @@ nodeport_has_nat_conflict_ipv4(const struct iphdr *ip4 __maybe_unused,
if (dr_ifindex == NATIVE_DEV_IFINDEX &&
ip4->saddr == IPV4_DIRECT_ROUTING) {
target->addr = IPV4_DIRECT_ROUTING;
target->needs_ct = true;

return true;
}
#endif /* IS_BPF_HOST */
Expand Down
24 changes: 8 additions & 16 deletions bpf/tests/bpf_nat_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ int test_nat4_icmp_error_tcp(__maybe_unused struct __ctx_buff *ctx)
struct ipv4_nat_entry state;

ret = snat_v4_new_mapping(ctx, &tuple, &state, &target,
snat_v4_needs_ct(&tuple, &target),
NULL);
false, NULL);
assert(ret == 0);

/* This is the entry-point of the test, calling
Expand Down Expand Up @@ -299,8 +298,7 @@ int test_nat4_icmp_error_udp(__maybe_unused struct __ctx_buff *ctx)
struct ipv4_nat_entry state;

ret = snat_v4_new_mapping(ctx, &tuple, &state, &target,
snat_v4_needs_ct(&tuple, &target),
NULL);
false, NULL);
assert(ret == 0);

/* This is the entry-point of the test, calling
Expand Down Expand Up @@ -408,8 +406,7 @@ int test_nat4_icmp_error_icmp(__maybe_unused struct __ctx_buff *ctx)
struct ipv4_nat_entry state;

ret = snat_v4_new_mapping(ctx, &tuple, &state, &target,
snat_v4_needs_ct(&tuple, &target),
NULL);
false, NULL);
assert(ret == 0);

/* This is the entry-point of the test, calling
Expand Down Expand Up @@ -506,8 +503,7 @@ int test_nat4_icmp_error_sctp(__maybe_unused struct __ctx_buff *ctx)
struct ipv4_nat_entry state;

ret = snat_v4_new_mapping(ctx, &tuple, &state, &target,
snat_v4_needs_ct(&tuple, &target),
NULL);
false, NULL);
assert(ret == 0);

/* This is the entry-point of the test, calling
Expand Down Expand Up @@ -568,8 +564,7 @@ int test_nat4_icmp_error_tcp_egress(__maybe_unused struct __ctx_buff *ctx)
struct ipv4_nat_entry state;

ret = snat_v4_new_mapping(ctx, &tuple, &state, &target,
snat_v4_needs_ct(&tuple, &target),
NULL);
false, NULL);
assert(ret == 0);

struct ipv4_ct_tuple icmp_tuple = {};
Expand Down Expand Up @@ -683,8 +678,7 @@ int test_nat4_icmp_error_udp_egress(__maybe_unused struct __ctx_buff *ctx)
struct ipv4_nat_entry state;

ret = snat_v4_new_mapping(ctx, &tuple, &state, &target,
snat_v4_needs_ct(&tuple, &target),
NULL);
false, NULL);
assert(ret == 0);

struct ipv4_ct_tuple icmp_tuple = {};
Expand Down Expand Up @@ -797,8 +791,7 @@ int test_nat4_icmp_error_icmp_egress(__maybe_unused struct __ctx_buff *ctx)
struct ipv4_nat_entry state;

ret = snat_v4_new_mapping(ctx, &tuple, &state, &target,
snat_v4_needs_ct(&tuple, &target),
NULL);
false, NULL);
assert(ret == 0);

struct ipv4_ct_tuple icmp_tuple = {};
Expand Down Expand Up @@ -900,8 +893,7 @@ int test_nat4_icmp_error_sctp_egress(__maybe_unused struct __ctx_buff *ctx)
struct ipv4_nat_entry state;

ret = snat_v4_new_mapping(ctx, &tuple, &state, &target,
snat_v4_needs_ct(&tuple, &target),
NULL);
false, NULL);
assert(ret == 0);

struct ipv4_ct_tuple icmp_tuple = {};
Expand Down

0 comments on commit 93c8884

Please sign in to comment.