From d9235f9df74dd93f7ba8bc178202b8d1479a9f32 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Sat, 21 Oct 2023 00:00:40 +0800 Subject: [PATCH] bpf_host can handle packets passed from L7 proxy Previously https://github.com/cilium/cilium/pull/25440 removed bpf_host's logic for host-to-remote-pod packets. However, we recently realized such host-to-remote-pod traffic can also be pod-to-pod traffic passing through L7 proxy. This commit made bpf_host capable of handling these host-to-remote-pod packets as long as they are originated from L7 proxy. Fixes: cilium/cilium#25440 Suggested-by: Paul Chaignon Signed-off-by: Zhichuan Liang --- bpf/bpf_host.c | 46 +++++++++++++++++++++++++++++++++++++++------- bpf/lib/encap.h | 20 +++++++++++++++++--- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/bpf/bpf_host.c b/bpf/bpf_host.c index fa15a7dd22b8..5b9dbc3d37f8 100644 --- a/bpf/bpf_host.c +++ b/bpf/bpf_host.c @@ -239,6 +239,8 @@ handle_ipv6_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, struct remote_endpoint_info *info = NULL; struct endpoint_info *ep; int ret; + __u8 encrypt_key __maybe_unused = 0; + bool from_ingress_proxy = tc_index_from_ingress_proxy(ctx); if (!revalidate_data(ctx, &data, &data_end, &ip6)) return DROP_INVALID; @@ -345,10 +347,16 @@ handle_ipv6_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, dst = (union v6addr *) &ip6->daddr; info = lookup_ip6_remote_endpoint(dst, 0); +#ifdef ENABLE_IPSEC + /* See IPv4 comment. */ + if (from_ingress_proxy && info) + encrypt_key = get_min_encrypt_key(info->key); +#endif + #ifdef TUNNEL_MODE if (info != NULL && info->tunnel_endpoint != 0) { return encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint, - secctx, info->sec_identity, + encrypt_key, secctx, info->sec_identity, &trace); } else { struct tunnel_key key = {}; @@ -360,17 +368,25 @@ handle_ipv6_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, key.ip6.p4 = 0; key.family = ENDPOINT_KEY_IPV6; - ret = encap_and_redirect_netdev(ctx, &key, secctx, &trace); + ret = encap_and_redirect_netdev(ctx, &key, encrypt_key, secctx, &trace); if (ret != DROP_NO_TUNNEL_ENDPOINT) return ret; } #endif - if (!info || (!tc_index_from_ingress_proxy(ctx) && + if (!info || (!from_ingress_proxy && identity_is_world_ipv6(info->sec_identity))) { /* See IPv4 comment. */ return DROP_UNROUTABLE; } + +#if defined(ENABLE_IPSEC) && !defined(TUNNEL_MODE) + /* See IPv4 comment. */ + if (from_ingress_proxy && info->tunnel_endpoint && encrypt_key) + return set_ipsec_encrypt(ctx, encrypt_key, info->tunnel_endpoint, + info->sec_identity); +#endif + return CTX_ACT_OK; } @@ -639,6 +655,8 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, struct remote_endpoint_info *info; struct endpoint_info *ep; int ret; + __u8 encrypt_key __maybe_unused = 0; + bool from_ingress_proxy = tc_index_from_ingress_proxy(ctx); if (!revalidate_data(ctx, &data, &data_end, &ip4)) return DROP_INVALID; @@ -763,10 +781,16 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, info = lookup_ip4_remote_endpoint(ip4->daddr, 0); +#ifdef ENABLE_IPSEC + /* We encrypt host to remote pod packets only if they are from ingress proxy. */ + if (from_ingress_proxy && info) + encrypt_key = get_min_encrypt_key(info->key); +#endif + #ifdef TUNNEL_MODE if (info != NULL && info->tunnel_endpoint != 0) { return encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint, - secctx, info->sec_identity, + encrypt_key, secctx, info->sec_identity, &trace); } else { /* IPv4 lookup key: daddr & IPV4_MASK */ @@ -776,13 +800,13 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, key.family = ENDPOINT_KEY_IPV4; cilium_dbg(ctx, DBG_NETDEV_ENCAP4, key.ip4, secctx); - ret = encap_and_redirect_netdev(ctx, &key, secctx, &trace); + ret = encap_and_redirect_netdev(ctx, &key, encrypt_key, secctx, &trace); if (ret != DROP_NO_TUNNEL_ENDPOINT) return ret; } #endif - if (!info || (!tc_index_from_ingress_proxy(ctx) && + if (!info || (!from_ingress_proxy && identity_is_world_ipv4(info->sec_identity))) { /* We have received a packet for which no ipcache entry exists, * we do not know what to do with this packet, drop it. @@ -799,6 +823,14 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, */ return DROP_UNROUTABLE; } + +#if defined(ENABLE_IPSEC) && !defined(TUNNEL_MODE) + /* We encrypt host to remote pod packets only if they are from ingress proxy. */ + if (from_ingress_proxy && info->tunnel_endpoint && encrypt_key) + return set_ipsec_encrypt(ctx, encrypt_key, info->tunnel_endpoint, + info->sec_identity); +#endif + return CTX_ACT_OK; } @@ -942,7 +974,7 @@ static __always_inline int do_netdev_encrypt_encap(struct __ctx_buff *ctx, __u32 ctx->mark = 0; bpf_clear_meta(ctx); - return encap_and_redirect_with_nodeid(ctx, ep->tunnel_endpoint, + return encap_and_redirect_with_nodeid(ctx, ep->tunnel_endpoint, 0, src_id, 0, &trace); } #endif /* ENABLE_IPSEC && TUNNEL_MODE */ diff --git a/bpf/lib/encap.h b/bpf/lib/encap.h index eba2c67481da..0712ee8e8779 100644 --- a/bpf/lib/encap.h +++ b/bpf/lib/encap.h @@ -68,9 +68,16 @@ __encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip __maybe_un */ static __always_inline int encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint, + __u8 encrypt_key __maybe_unused, __u32 seclabel, __u32 dstid, const struct trace_ctx *trace) { +#ifdef ENABLE_IPSEC + if (encrypt_key) + return set_ipsec_encrypt(ctx, encrypt_key, tunnel_endpoint, + seclabel); +#endif + return __encap_and_redirect_with_nodeid(ctx, 0, tunnel_endpoint, seclabel, dstid, NOT_VTEP_DST, trace); @@ -110,7 +117,7 @@ __encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint, /* tell caller that this packet needs to go through the stack: */ return CTX_ACT_OK; #else - return encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, seclabel, + return encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, 0, seclabel, dstid, trace); #endif /* !ENABLE_NODEPORT && ENABLE_HOST_FIREWALL */ } @@ -161,13 +168,14 @@ encap_and_redirect_lxc(struct __ctx_buff *ctx, seclabel); } # endif - return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, seclabel, dstid, + return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, 0, seclabel, dstid, trace); #endif /* ENABLE_HIGH_SCALE_IPCACHE */ } static __always_inline int encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k, + __u8 encrypt_key __maybe_unused, __u32 seclabel, const struct trace_ctx *trace) { struct tunnel_value *tunnel; @@ -176,7 +184,13 @@ encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k, if (!tunnel) return DROP_NO_TUNNEL_ENDPOINT; - return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, seclabel, 0, +#ifdef ENABLE_IPSEC + if (encrypt_key) + return set_ipsec_encrypt(ctx, encrypt_key, tunnel->ip4, + seclabel); +#endif + + return encap_and_redirect_with_nodeid(ctx, tunnel->ip4, 0, seclabel, 0, trace); } #endif /* TUNNEL_MODE || ENABLE_HIGH_SCALE_IPCACHE */