Skip to content

Commit

Permalink
bpf_host can handle packets passed from L7 proxy
Browse files Browse the repository at this point in the history
[ upstream commit e78ff16 ]

Previously cilium#25440 removed
bpf_host's logic for host-to-remote-pod packets.

However, we recently realized such host-to-remote-pod traffic can also
be pod-to-pod traffic passing through L7 proxy. This commit made
bpf_host capable of handling these host-to-remote-pod packets as long as
they are originated from L7 proxy.

Fixes: cilium#25440

Suggested-by: Paul Chaignon <paul.chaignon@gmail.com>
Signed-off-by: Zhichuan Liang <gray.liang@isovalent.com>
Signed-off-by: Julian Wiedmann <jwi@isovalent.com>
  • Loading branch information
jschwinger233 authored and julianwiedmann committed Mar 5, 2024
1 parent 4991e9a commit 383b1b1
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 6 deletions.
44 changes: 38 additions & 6 deletions bpf/bpf_host.c
Expand Up @@ -172,6 +172,8 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
int ret, l3_off = ETH_HLEN, hdrlen;
struct endpoint_info *ep;
__u8 nexthdr;
__u8 encrypt_key __maybe_unused = 0;
bool from_ingress_proxy = tc_index_from_ingress_proxy(ctx);

if (!revalidate_data(ctx, &data, &data_end, &ip6))
return DROP_INVALID;
Expand Down Expand Up @@ -283,10 +285,16 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
dst = (union v6addr *) &ip6->daddr;
info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN);

#ifdef ENABLE_IPSEC
/* See IPv4 comment. */
if (from_ingress_proxy && info)
encrypt_key = get_min_encrypt_key(info->key);
#endif

#ifdef TUNNEL_MODE
if (info != NULL && info->tunnel_endpoint != 0) {
return encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint,
secctx, info->sec_label,
encrypt_key, secctx, info->sec_label,
&trace);
} else {
struct tunnel_key key = {};
Expand All @@ -298,17 +306,25 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
key.ip6.p4 = 0;
key.family = ENDPOINT_KEY_IPV6;

ret = encap_and_redirect_netdev(ctx, &key, secctx, &trace);
ret = encap_and_redirect_netdev(ctx, &key, encrypt_key, secctx, &trace);
if (ret != DROP_NO_TUNNEL_ENDPOINT)
return ret;
}
#endif

if (!info || (!tc_index_from_ingress_proxy(ctx) &&
if (!info || (!from_ingress_proxy &&
info->sec_label == WORLD_ID)) {
/* See IPv4 comment. */
return DROP_UNROUTABLE;
}

#if defined(ENABLE_IPSEC) && !defined(TUNNEL_MODE)
/* See IPv4 comment. */
if (from_ingress_proxy && info->tunnel_endpoint && encrypt_key)
return set_ipsec_encrypt(ctx, encrypt_key, info->tunnel_endpoint,
info->sec_label);
#endif

return CTX_ACT_OK;
}

Expand Down Expand Up @@ -432,6 +448,8 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx,
void *data, *data_end;
struct iphdr *ip4;
int ret;
__u8 encrypt_key __maybe_unused = 0;
bool from_ingress_proxy = tc_index_from_ingress_proxy(ctx);

if (!revalidate_data(ctx, &data, &data_end, &ip4))
return DROP_INVALID;
Expand Down Expand Up @@ -560,10 +578,16 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx,

info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN);

#ifdef ENABLE_IPSEC
/* We encrypt host to remote pod packets only if they are from ingress proxy. */
if (from_ingress_proxy && info)
encrypt_key = get_min_encrypt_key(info->key);
#endif

#ifdef TUNNEL_MODE
if (info != NULL && info->tunnel_endpoint != 0) {
return encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint,
secctx, info->sec_label,
encrypt_key, secctx, info->sec_label,
&trace);
} else {
/* IPv4 lookup key: daddr & IPV4_MASK */
Expand All @@ -573,13 +597,13 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx,
key.family = ENDPOINT_KEY_IPV4;

cilium_dbg(ctx, DBG_NETDEV_ENCAP4, key.ip4, secctx);
ret = encap_and_redirect_netdev(ctx, &key, secctx, &trace);
ret = encap_and_redirect_netdev(ctx, &key, encrypt_key, secctx, &trace);
if (ret != DROP_NO_TUNNEL_ENDPOINT)
return ret;
}
#endif

if (!info || (!tc_index_from_ingress_proxy(ctx) &&
if (!info || (!from_ingress_proxy &&
info->sec_label == WORLD_ID)) {
/* We have received a packet for which no ipcache entry exists,
* we do not know what to do with this packet, drop it.
Expand All @@ -596,6 +620,14 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx,
*/
return DROP_UNROUTABLE;
}

#if defined(ENABLE_IPSEC) && !defined(TUNNEL_MODE)
/* We encrypt host to remote pod packets only if they are from ingress proxy. */
if (from_ingress_proxy && info->tunnel_endpoint && encrypt_key)
return set_ipsec_encrypt(ctx, encrypt_key, info->tunnel_endpoint,
info->sec_label);
#endif

return CTX_ACT_OK;
}

Expand Down
14 changes: 14 additions & 0 deletions bpf/lib/encap.h
Expand Up @@ -123,9 +123,16 @@ __encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 tunnel_endpoint,
*/
static __always_inline int
encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 tunnel_endpoint,
__u8 encrypt_key __maybe_unused,
__u32 seclabel, __u32 dstid,
const struct trace_ctx *trace)
{
#ifdef ENABLE_IPSEC
if (encrypt_key)
return set_ipsec_encrypt(ctx, encrypt_key, tunnel_endpoint,
seclabel);
#endif

return __encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, seclabel, dstid, NOT_VTEP_DST,
trace);
}
Expand Down Expand Up @@ -209,6 +216,7 @@ encap_and_redirect_lxc(struct __ctx_buff *ctx, __u32 tunnel_endpoint,

static __always_inline int
encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k,
__u8 encrypt_key __maybe_unused,
__u32 seclabel, const struct trace_ctx *trace)
{
struct tunnel_value *tunnel;
Expand All @@ -217,6 +225,12 @@ encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k,
if (!tunnel)
return DROP_NO_TUNNEL_ENDPOINT;

#ifdef ENABLE_IPSEC
if (encrypt_key)
return set_ipsec_encrypt(ctx, encrypt_key, tunnel->ip4,
seclabel);
#endif

return __encap_and_redirect_with_nodeid(ctx, tunnel->ip4, seclabel,
0, NOT_VTEP_DST, trace);
}
Expand Down

0 comments on commit 383b1b1

Please sign in to comment.