From 1e63ac06ee8beb8bb483f73bbfd2923ed16e8df5 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Fri, 20 Oct 2023 15:57:02 +0800 Subject: [PATCH] Allow proxy replies to WORLD_ID [ upstream commit ac6385637a7bc39ec636e3808d3a5e9c13cb3c0e ] This is an alternative approach to fix cilium/cilium#21954, so that we can re-introduce the 2005 from-proxy routing rule in following patches to fix L7 proxy issues. This commit simply allows packets to WORLD as long as they are from ingress proxy. This was one of the solution suggested by Martynas, as recorded in commit message cilium/cilium@c534bb7: One fix was to extend the troublesome check https://github.com/cilium/cilium/blob/v1.12.3/bpf/bpf_host.c#L626 by allowing proxy replies to `WORLD_ID`. To tell if an skb is originated from ingress proxy, the commit extends the semantic of existing flags `TC_INDEX_F_SKIP_{INGRESS,EGRESS}_PROXY`, renames flags to clarify the changed meaning. Fixes: cilium/cilium#21954 (Reply from pod to outside is dropped when L7 ingress policy is used) Signed-off-by: Zhichuan Liang Signed-off-by: Julian Wiedmann --- bpf/bpf_host.c | 10 ++++++++-- bpf/bpf_lxc.c | 8 ++++---- bpf/lib/common.h | 4 ++-- bpf/lib/identity.h | 4 ++-- bpf/lib/proxy.h | 16 ++++++++-------- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/bpf/bpf_host.c b/bpf/bpf_host.c index 07fdf4fd9c701..9387b29d4952c 100644 --- a/bpf/bpf_host.c +++ b/bpf/bpf_host.c @@ -304,7 +304,8 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host, } #endif - if (info == NULL || info->sec_label == WORLD_ID) { + if (!info || (!tc_index_from_ingress_proxy(ctx) && + info->sec_label == WORLD_ID)) { /* See IPv4 comment. */ return DROP_UNROUTABLE; } @@ -578,7 +579,8 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx, } #endif - if (info == NULL || info->sec_label == WORLD_ID) { + if (!info || (!tc_index_from_ingress_proxy(ctx) && + info->sec_label == WORLD_ID)) { /* We have received a packet for which no ipcache entry exists, * we do not know what to do with this packet, drop it. * @@ -587,6 +589,10 @@ handle_ipv4(struct __ctx_buff *ctx, __u32 secctx, * entry. Therefore we need to test for WORLD_ID. It is clearly * wrong to route a ctx to cilium_host for which we don't know * anything about it as otherwise we'll run into a routing loop. + * + * Note that we do not drop packets from ingress proxy even if + * they are going to WORLD_ID. This is to avoid + * https://github.com/cilium/cilium/issues/21954. */ return DROP_UNROUTABLE; } diff --git a/bpf/bpf_lxc.c b/bpf/bpf_lxc.c index 4f4332bc55951..060c410d0f1a6 100644 --- a/bpf/bpf_lxc.c +++ b/bpf/bpf_lxc.c @@ -1432,7 +1432,7 @@ ipv6_policy(struct __ctx_buff *ctx, int ifindex, __u32 src_label, /* If packet is coming from the ingress proxy we have to skip * redirection to the ingress proxy as we would loop forever. */ - skip_ingress_proxy = tc_index_skip_ingress_proxy(ctx); + skip_ingress_proxy = tc_index_from_ingress_proxy(ctx); ct_buffer = map_lookup_elem(&CT_TAIL_CALL_BUFFER6, &zero); if (!ct_buffer) @@ -1461,7 +1461,7 @@ ipv6_policy(struct __ctx_buff *ctx, int ifindex, __u32 src_label, * Always redirect connections that originated from L7 LB. */ if (ct_state_is_from_l7lb(ct_state) || - (ct_state->proxy_redirect && !tc_index_skip_egress_proxy(ctx))) { + (ct_state->proxy_redirect && !tc_index_from_egress_proxy(ctx))) { /* This is a reply, the proxy port does not need to be embedded * into ctx->mark and *proxy_port can be left unset. */ @@ -1737,7 +1737,7 @@ ipv4_policy(struct __ctx_buff *ctx, int ifindex, __u32 src_label, enum ct_status /* If packet is coming from the ingress proxy we have to skip * redirection to the ingress proxy as we would loop forever. */ - skip_ingress_proxy = tc_index_skip_ingress_proxy(ctx); + skip_ingress_proxy = tc_index_from_ingress_proxy(ctx); orig_sip = ip4->saddr; @@ -1776,7 +1776,7 @@ ipv4_policy(struct __ctx_buff *ctx, int ifindex, __u32 src_label, enum ct_status /* Skip policy enforcement for return traffic. */ if (ret == CT_REPLY || ret == CT_RELATED) { if (ct_state_is_from_l7lb(ct_state) || - (ct_state->proxy_redirect && !tc_index_skip_egress_proxy(ctx))) { + (ct_state->proxy_redirect && !tc_index_from_egress_proxy(ctx))) { /* This is a reply, the proxy port does not need to be embedded * into ctx->mark and *proxy_port can be left unset. */ diff --git a/bpf/lib/common.h b/bpf/lib/common.h index 5ebc8807ad8e7..b2b88608a04f8 100644 --- a/bpf/lib/common.h +++ b/bpf/lib/common.h @@ -664,8 +664,8 @@ static __always_inline __u32 or_encrypt_key(__u8 key) * cilium_host @egress * bpf_host -> bpf_lxc */ -#define TC_INDEX_F_SKIP_INGRESS_PROXY 1 -#define TC_INDEX_F_SKIP_EGRESS_PROXY 2 +#define TC_INDEX_F_FROM_INGRESS_PROXY 1 +#define TC_INDEX_F_FROM_EGRESS_PROXY 2 #define TC_INDEX_F_SKIP_NODEPORT 4 #define TC_INDEX_F_SKIP_RECIRCULATION 8 #define TC_INDEX_F_SKIP_HOST_FIREWALL 16 diff --git a/bpf/lib/identity.h b/bpf/lib/identity.h index 59fea1b8dcfa2..e5da8f4b2cac8 100644 --- a/bpf/lib/identity.h +++ b/bpf/lib/identity.h @@ -100,14 +100,14 @@ static __always_inline __u32 inherit_identity_from_host(struct __ctx_buff *ctx, */ if (magic == MARK_MAGIC_PROXY_INGRESS) { *identity = get_identity(ctx); - ctx->tc_index |= TC_INDEX_F_SKIP_INGRESS_PROXY; + ctx->tc_index |= TC_INDEX_F_FROM_INGRESS_PROXY; /* (Return) packets from the egress proxy must skip the redirection to * the proxy, as the packet would loop and/or the connection be reset * otherwise. */ } else if (magic == MARK_MAGIC_PROXY_EGRESS) { *identity = get_identity(ctx); - ctx->tc_index |= TC_INDEX_F_SKIP_EGRESS_PROXY; + ctx->tc_index |= TC_INDEX_F_FROM_EGRESS_PROXY; } else if (magic == MARK_MAGIC_IDENTITY) { *identity = get_identity(ctx); } else if (magic == MARK_MAGIC_HOST) { diff --git a/bpf/lib/proxy.h b/bpf/lib/proxy.h index 7be46bfa808d8..e88ec55778ec5 100644 --- a/bpf/lib/proxy.h +++ b/bpf/lib/proxy.h @@ -337,30 +337,30 @@ out: __maybe_unused } /** - * tc_index_skip_ingress_proxy - returns true if packet originates from ingress proxy + * tc_index_from_ingress_proxy - returns true if packet originates from ingress proxy */ -static __always_inline bool tc_index_skip_ingress_proxy(struct __ctx_buff *ctx) +static __always_inline bool tc_index_from_ingress_proxy(struct __ctx_buff *ctx) { volatile __u32 tc_index = ctx->tc_index; #ifdef DEBUG - if (tc_index & TC_INDEX_F_SKIP_INGRESS_PROXY) + if (tc_index & TC_INDEX_F_FROM_INGRESS_PROXY) cilium_dbg(ctx, DBG_SKIP_PROXY, tc_index, 0); #endif - return tc_index & TC_INDEX_F_SKIP_INGRESS_PROXY; + return tc_index & TC_INDEX_F_FROM_INGRESS_PROXY; } /** - * tc_index_skip_egress_proxy - returns true if packet originates from egress proxy + * tc_index_from_egress_proxy - returns true if packet originates from egress proxy */ -static __always_inline bool tc_index_skip_egress_proxy(struct __ctx_buff *ctx) +static __always_inline bool tc_index_from_egress_proxy(struct __ctx_buff *ctx) { volatile __u32 tc_index = ctx->tc_index; #ifdef DEBUG - if (tc_index & TC_INDEX_F_SKIP_EGRESS_PROXY) + if (tc_index & TC_INDEX_F_FROM_EGRESS_PROXY) cilium_dbg(ctx, DBG_SKIP_PROXY, tc_index, 0); #endif - return tc_index & TC_INDEX_F_SKIP_EGRESS_PROXY; + return tc_index & TC_INDEX_F_FROM_EGRESS_PROXY; } #endif /* __LIB_PROXY_H_ */