Skip to content

Commit

Permalink
bpf_host can handle packets passed from L7 proxy
Browse files Browse the repository at this point in the history
Previously cilium#25440 removed
bpf_host's logic for host-to-remote-pod packets.

However, we recently realized such host-to-remote-pod traffic can also
be pod-to-pod traffic passing through L7 proxy. This commit made
bpf_host capable of handling these host-to-remote-pod packets as long as
they are originated from L7 proxy.

Fixes: cilium#25440

Suggested-by: Paul Chaignon <paul.chaignon@gmail.com>
Signed-off-by: Zhichuan Liang <gray.liang@isovalent.com>
  • Loading branch information
jschwinger233 committed Dec 1, 2023
1 parent dd4233d commit 22515fe
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 10 deletions.
46 changes: 39 additions & 7 deletions bpf/bpf_host.c
Expand Up @@ -239,6 +239,8 @@ handle_ipv6_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
struct remote_endpoint_info *info = NULL;
struct endpoint_info *ep;
int ret;
__u8 encrypt_key __maybe_unused = 0;
bool from_ingress_proxy = tc_index_from_ingress_proxy(ctx);

if (!revalidate_data(ctx, &data, &data_end, &ip6))
return DROP_INVALID;
Expand Down Expand Up @@ -345,10 +347,16 @@ handle_ipv6_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
dst = (union v6addr *) &ip6->daddr;
info = lookup_ip6_remote_endpoint(dst, 0);

#ifdef ENABLE_IPSEC
/* See IPv4 comment. */
if (from_ingress_proxy && info)
encrypt_key = get_min_encrypt_key(info->key);
#endif

#ifdef TUNNEL_MODE
if (info != NULL && info->tunnel_endpoint != 0) {
return encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint,
secctx, info->sec_identity,
encrypt_key, secctx, info->sec_identity,
&trace);
} else {
struct tunnel_key key = {};
Expand All @@ -360,16 +368,24 @@ handle_ipv6_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
key.ip6.p4 = 0;
key.family = ENDPOINT_KEY_IPV6;

ret = encap_and_redirect_netdev(ctx, &key, secctx, &trace);
ret = encap_and_redirect_netdev(ctx, &key, encrypt_key, secctx, &trace);
if (ret != DROP_NO_TUNNEL_ENDPOINT)
return ret;
}
#endif

if (!info || (!tc_index_from_ingress_proxy(ctx) && identity_is_world_ipv6(info->sec_identity))) {
if (!info || (!from_ingress_proxy && identity_is_world_ipv6(info->sec_identity))) {
/* See IPv4 comment. */
return DROP_UNROUTABLE;
}

#if defined(ENABLE_IPSEC) && !defined(TUNNEL_MODE)
/* See IPv4 comment. */
if (from_ingress_proxy && info->tunnel_endpoint && encrypt_key)
return set_ipsec_encrypt(ctx, encrypt_key, info->tunnel_endpoint,
info->sec_identity);
#endif

return CTX_ACT_OK;
}

Expand Down Expand Up @@ -638,6 +654,8 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
struct remote_endpoint_info *info;
struct endpoint_info *ep;
int ret;
__u8 encrypt_key __maybe_unused = 0;
bool from_ingress_proxy = tc_index_from_ingress_proxy(ctx);

if (!revalidate_data(ctx, &data, &data_end, &ip4))
return DROP_INVALID;
Expand Down Expand Up @@ -762,10 +780,16 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,

info = lookup_ip4_remote_endpoint(ip4->daddr, 0);

#ifdef ENABLE_IPSEC
/* We encrypt host to remote pod packets only if they are from ingress proxy. */
if (from_ingress_proxy && info)
encrypt_key = get_min_encrypt_key(info->key);
#endif

#ifdef TUNNEL_MODE
if (info != NULL && info->tunnel_endpoint != 0) {
return encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint,
secctx, info->sec_identity,
encrypt_key, secctx, info->sec_identity,
&trace);
} else {
/* IPv4 lookup key: daddr & IPV4_MASK */
Expand All @@ -775,13 +799,13 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
key.family = ENDPOINT_KEY_IPV4;

cilium_dbg(ctx, DBG_NETDEV_ENCAP4, key.ip4, secctx);
ret = encap_and_redirect_netdev(ctx, &key, secctx, &trace);
ret = encap_and_redirect_netdev(ctx, &key, encrypt_key, secctx, &trace);
if (ret != DROP_NO_TUNNEL_ENDPOINT)
return ret;
}
#endif

if (!info || (!tc_index_from_ingress_proxy(ctx) && identity_is_world_ipv4(info->sec_identity))) {
if (!info || (!from_ingress_proxy && identity_is_world_ipv4(info->sec_identity))) {
/* We have received a packet for which no ipcache entry exists,
* we do not know what to do with this packet, drop it.
*
Expand All @@ -797,6 +821,14 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
*/
return DROP_UNROUTABLE;
}

#if defined(ENABLE_IPSEC) && !defined(TUNNEL_MODE)
/* We encrypt host to remote pod packets only if they are from ingress proxy. */
if (from_ingress_proxy && info->tunnel_endpoint && encrypt_key)
return set_ipsec_encrypt(ctx, encrypt_key, info->tunnel_endpoint,
info->sec_identity);
#endif

return CTX_ACT_OK;
}

Expand Down Expand Up @@ -940,7 +972,7 @@ static __always_inline int do_netdev_encrypt_encap(struct __ctx_buff *ctx, __u32

ctx->mark = 0;
bpf_clear_meta(ctx);
return encap_and_redirect_with_nodeid(ctx, ep->tunnel_endpoint,
return encap_and_redirect_with_nodeid(ctx, ep->tunnel_endpoint, 0,
src_id, 0, &trace);
}
#endif /* ENABLE_IPSEC && TUNNEL_MODE */
Expand Down
20 changes: 17 additions & 3 deletions bpf/lib/encap.h
Expand Up @@ -68,9 +68,16 @@ __encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip __maybe_un
*/
static __always_inline int
encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
__u8 encrypt_key __maybe_unused,
__u32 seclabel, __u32 dstid,
const struct trace_ctx *trace)
{
#ifdef ENABLE_IPSEC
if (encrypt_key)
return set_ipsec_encrypt(ctx, encrypt_key, tunnel_endpoint,
seclabel);
#endif

return __encap_and_redirect_with_nodeid(ctx, 0, tunnel_endpoint,
seclabel, dstid, NOT_VTEP_DST,
trace);
Expand Down Expand Up @@ -110,7 +117,7 @@ __encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
/* tell caller that this packet needs to go through the stack: */
return CTX_ACT_OK;
#else
return encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, seclabel,
return encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, 0, seclabel,
dstid, trace);
#endif /* !ENABLE_NODEPORT && ENABLE_HOST_FIREWALL */
}
Expand Down Expand Up @@ -161,13 +168,14 @@ encap_and_redirect_lxc(struct __ctx_buff *ctx,
seclabel);
}
# endif
return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, seclabel, dstid,
return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, 0, seclabel, dstid,
trace);
#endif /* ENABLE_HIGH_SCALE_IPCACHE */
}

static __always_inline int
encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k,
__u8 encrypt_key __maybe_unused,
__u32 seclabel, const struct trace_ctx *trace)
{
struct tunnel_value *tunnel;
Expand All @@ -176,7 +184,13 @@ encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k,
if (!tunnel)
return DROP_NO_TUNNEL_ENDPOINT;

return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, seclabel, 0,
#ifdef ENABLE_IPSEC
if (encrypt_key)
return set_ipsec_encrypt(ctx, encrypt_key, tunnel->ip4,
seclabel);
#endif

return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, 0, seclabel, 0,
trace);
}
#endif /* TUNNEL_MODE || ENABLE_HIGH_SCALE_IPCACHE */
Expand Down

0 comments on commit 22515fe

Please sign in to comment.