From 45ef9527a1c071a1861cc4f9f32344e2e7b0d9c5 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Sat, 21 Oct 2023 00:00:40 +0800 Subject: [PATCH] bpf_host can handle packets passed from L7 proxy [ upstream commit e78ff1690e4ab862057a6aefe5f0729340694254 ] Previously https://github.com/cilium/cilium/pull/25440 removed bpf_host's logic for host-to-remote-pod packets. However, we recently realized such host-to-remote-pod traffic can also be pod-to-pod traffic passing through L7 proxy. This commit made bpf_host capable of handling these host-to-remote-pod packets as long as they are originated from L7 proxy. Fixes: cilium/cilium#25440 Suggested-by: Paul Chaignon Signed-off-by: Zhichuan Liang Signed-off-by: Julian Wiedmann --- bpf/bpf_host.c | 44 ++++++++++++++++++++++++++++++++++++++------ bpf/lib/encap.h | 14 ++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/bpf/bpf_host.c b/bpf/bpf_host.c index 9387b29d4952c..c7f80fbc9a968 100644 --- a/bpf/bpf_host.c +++ b/bpf/bpf_host.c @@ -172,6 +172,8 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, int ret, l3_off = ETH_HLEN, hdrlen; struct endpoint_info *ep; __u8 nexthdr; + __u8 encrypt_key __maybe_unused = 0; + bool from_ingress_proxy = tc_index_from_ingress_proxy(ctx); if (!revalidate_data(ctx, &data, &data_end, &ip6)) return DROP_INVALID; @@ -283,10 +285,16 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, dst = (union v6addr *) &ip6->daddr; info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN); +#ifdef ENABLE_IPSEC + /* See IPv4 comment. */ + if (from_ingress_proxy && info) + encrypt_key = get_min_encrypt_key(info->key); +#endif + #ifdef TUNNEL_MODE if (info != NULL && info->tunnel_endpoint != 0) { return encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint, - secctx, info->sec_label, + encrypt_key, secctx, info->sec_label, &trace); } else { struct tunnel_key key = {}; @@ -298,17 +306,25 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, key.ip6.p4 = 0; key.family = ENDPOINT_KEY_IPV6; - ret = encap_and_redirect_netdev(ctx, &key, secctx, &trace); + ret = encap_and_redirect_netdev(ctx, &key, encrypt_key, secctx, &trace); if (ret != DROP_NO_TUNNEL_ENDPOINT) return ret; } #endif - if (!info || (!tc_index_from_ingress_proxy(ctx) && + if (!info || (!from_ingress_proxy && info->sec_label == WORLD_ID)) { /* See IPv4 comment. */ return DROP_UNROUTABLE; } + +#if defined(ENABLE_IPSEC) && !defined(TUNNEL_MODE) + /* See IPv4 comment. */ + if (from_ingress_proxy && info->tunnel_endpoint && encrypt_key) + return set_ipsec_encrypt(ctx, encrypt_key, info->tunnel_endpoint, + info->sec_label); +#endif + return CTX_ACT_OK; } @@ -432,6 +448,8 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx, void *data, *data_end; struct iphdr *ip4; int ret; + __u8 encrypt_key __maybe_unused = 0; + bool from_ingress_proxy = tc_index_from_ingress_proxy(ctx); if (!revalidate_data(ctx, &data, &data_end, &ip4)) return DROP_INVALID; @@ -560,10 +578,16 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx, info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN); +#ifdef ENABLE_IPSEC + /* We encrypt host to remote pod packets only if they are from ingress proxy. */ + if (from_ingress_proxy && info) + encrypt_key = get_min_encrypt_key(info->key); +#endif + #ifdef TUNNEL_MODE if (info != NULL && info->tunnel_endpoint != 0) { return encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint, - secctx, info->sec_label, + encrypt_key, secctx, info->sec_label, &trace); } else { /* IPv4 lookup key: daddr & IPV4_MASK */ @@ -573,13 +597,13 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx, key.family = ENDPOINT_KEY_IPV4; cilium_dbg(ctx, DBG_NETDEV_ENCAP4, key.ip4, secctx); - ret = encap_and_redirect_netdev(ctx, &key, secctx, &trace); + ret = encap_and_redirect_netdev(ctx, &key, encrypt_key, secctx, &trace); if (ret != DROP_NO_TUNNEL_ENDPOINT) return ret; } #endif - if (!info || (!tc_index_from_ingress_proxy(ctx) && + if (!info || (!from_ingress_proxy && info->sec_label == WORLD_ID)) { /* We have received a packet for which no ipcache entry exists, * we do not know what to do with this packet, drop it. @@ -596,6 +620,14 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx, */ return DROP_UNROUTABLE; } + +#if defined(ENABLE_IPSEC) && !defined(TUNNEL_MODE) + /* We encrypt host to remote pod packets only if they are from ingress proxy. */ + if (from_ingress_proxy && info->tunnel_endpoint && encrypt_key) + return set_ipsec_encrypt(ctx, encrypt_key, info->tunnel_endpoint, + info->sec_label); +#endif + return CTX_ACT_OK; } diff --git a/bpf/lib/encap.h b/bpf/lib/encap.h index 982128d3d124c..2619cc1614282 100644 --- a/bpf/lib/encap.h +++ b/bpf/lib/encap.h @@ -123,9 +123,16 @@ __encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 tunnel_endpoint, */ static __always_inline int encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 tunnel_endpoint, + __u8 encrypt_key __maybe_unused, __u32 seclabel, __u32 dstid, const struct trace_ctx *trace) { +#ifdef ENABLE_IPSEC + if (encrypt_key) + return set_ipsec_encrypt(ctx, encrypt_key, tunnel_endpoint, + seclabel); +#endif + return __encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, seclabel, dstid, NOT_VTEP_DST, trace); } @@ -209,6 +216,7 @@ encap_and_redirect_lxc(struct __ctx_buff *ctx, __u32 tunnel_endpoint, static __always_inline int encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k, + __u8 encrypt_key __maybe_unused, __u32 seclabel, const struct trace_ctx *trace) { struct tunnel_value *tunnel; @@ -217,6 +225,12 @@ encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k, if (!tunnel) return DROP_NO_TUNNEL_ENDPOINT; +#ifdef ENABLE_IPSEC + if (encrypt_key) + return set_ipsec_encrypt(ctx, encrypt_key, tunnel->ip4, + seclabel); +#endif + return __encap_and_redirect_with_nodeid(ctx, tunnel->ip4, seclabel, 0, NOT_VTEP_DST, trace); }