Skip to content

Commit

Permalink
cilium: Fix 16bit ifindex limitation
Browse files Browse the repository at this point in the history
The limitation exists mainly on old kernels where the fib lookup helper
does not populate the outgoing ifindex. Only for this case we rely on
the CT lookup stored ifindex which back then was added as a 16bit field
due to limited padding space available. Nowadays this can be lifted
after the big rework in cilium#23884. We've seen users with high netdevice
churn run into this limitation where the agent bails out.

Apart from fixing the bleed, this can be further refined by not relying
on the asm.FnRedirectPeer helper presence but by actually doing a runtime
BPF program probe so that stable kernels can even be covered.

Fixes: cilium#16260
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
borkmann committed Aug 22, 2023
1 parent 6f676a3 commit bd8b4d0
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 13 deletions.
2 changes: 2 additions & 0 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -1142,7 +1142,9 @@ struct ct_state {
__be32 svc_addr;
#endif
__u32 src_sec_id;
#ifndef HAVE_FIB_IFINDEX
__u16 ifindex;
#endif
__u32 backend_id; /* Backend ID in lb4_backends */
};

Expand Down
8 changes: 6 additions & 2 deletions bpf/lib/conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ __ct_lookup(const void *map, struct __ctx_buff *ctx, const void *tuple,
ct_state->proxy_redirect = entry->proxy_redirect;
ct_state->from_l7lb = entry->from_l7lb;
ct_state->from_tunnel = entry->from_tunnel;
#ifndef HAVE_FIB_IFINDEX
ct_state->ifindex = entry->ifindex;
#endif
}
#ifdef CONNTRACK_ACCOUNTING
/* FIXME: This is slow, per-cpu counters? */
Expand Down Expand Up @@ -868,8 +870,9 @@ static __always_inline int ct_create6(const void *map_main, const void *map_rela
} else if (dir == CT_INGRESS || dir == CT_EGRESS) {
entry.node_port = ct_state->node_port;
entry.dsr = ct_state->dsr;
#ifndef HAVE_FIB_IFINDEX
entry.ifindex = ct_state->ifindex;

#endif
/* Note if this is a proxy connection so that replies can be redirected
* back to the proxy.
*/
Expand Down Expand Up @@ -944,8 +947,9 @@ static __always_inline int ct_create4(const void *map_main,
entry.node_port = ct_state->node_port;
entry.dsr = ct_state->dsr;
entry.from_tunnel = ct_state->from_tunnel;
#ifndef HAVE_FIB_IFINDEX
entry.ifindex = ct_state->ifindex;

#endif
/* Note if this is a proxy connection so that replies can be redirected
* back to the proxy.
*/
Expand Down
8 changes: 8 additions & 0 deletions bpf/lib/nodeport.h
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,9 @@ nodeport_rev_dnat_ingress_ipv6(struct __ctx_buff *ctx, struct trace_ctx *trace,
if (!revalidate_data(ctx, &data, &data_end, &ip6))
return DROP_INVALID;
ctx_snat_done_set(ctx);
#ifndef HAVE_FIB_IFINDEX
ifindex = ct_state.ifindex;
#endif
#ifdef TUNNEL_MODE
{
union v6addr *dst = (union v6addr *)&ip6->daddr;
Expand Down Expand Up @@ -1393,7 +1395,9 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,
redo:
ct_state_new.src_sec_id = WORLD_IPV6_ID;
ct_state_new.node_port = 1;
#ifndef HAVE_FIB_IFINDEX
ct_state_new.ifindex = (__u16)NATIVE_DEV_IFINDEX;
#endif
ret = ct_create6(get_ct_map6(&tuple), NULL, &tuple, ctx,
CT_EGRESS, &ct_state_new, false, false, ext_err);
if (IS_ERR(ret))
Expand Down Expand Up @@ -2420,7 +2424,9 @@ nodeport_rev_dnat_ingress_ipv4(struct __ctx_buff *ctx, struct trace_ctx *trace,
if (!revalidate_data(ctx, &data, &data_end, &ip4))
return DROP_INVALID;
ctx_snat_done_set(ctx);
#ifndef HAVE_FIB_IFINDEX
ifindex = ct_state.ifindex;
#endif
#if defined(TUNNEL_MODE)
{
struct remote_endpoint_info *info;
Expand Down Expand Up @@ -2887,7 +2893,9 @@ static __always_inline int nodeport_lb4(struct __ctx_buff *ctx,
redo:
ct_state_new.src_sec_id = WORLD_IPV4_ID;
ct_state_new.node_port = 1;
#ifndef HAVE_FIB_IFINDEX
ct_state_new.ifindex = (__u16)NATIVE_DEV_IFINDEX;
#endif
ret = ct_create4(get_ct_map4(&tuple), NULL, &tuple, ctx,
CT_EGRESS, &ct_state_new, false, false, ext_err);
if (IS_ERR(ret))
Expand Down
19 changes: 12 additions & 7 deletions daemon/cmd/kube_proxy_replacement.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,13 +457,18 @@ func finishKubeProxyReplacementInit() error {
option.Config.NodePortMode == option.NodePortModeSNAT &&
probes.HaveLargeInstructionLimit() == nil

for _, iface := range option.Config.GetDevices() {
link, err := netlink.LinkByName(iface)
if err != nil {
return fmt.Errorf("Cannot retrieve %s link: %w", iface, err)
}
if idx := link.Attrs().Index; idx > math.MaxUint16 {
return fmt.Errorf("%s link ifindex %d exceeds max(uint16)", iface, idx)
// In the case where the fib lookup does not return the outgoing ifindex
// the datapath needs to store it in our CT map, and the map's field is
// limited to 16 bit.
if probes.HaveFibIfindex() != nil {
for _, iface := range option.Config.GetDevices() {
link, err := netlink.LinkByName(iface)
if err != nil {
return fmt.Errorf("Cannot retrieve %s link: %w", iface, err)
}
if idx := link.Attrs().Index; idx > math.MaxUint16 {
return fmt.Errorf("%s link ifindex %d exceeds max(uint16)", iface, idx)
}
}
}

Expand Down
14 changes: 10 additions & 4 deletions pkg/datapath/linux/probes/probes.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ type ProgramHelper struct {

type miscFeatures struct {
HaveLargeInsnLimit bool
HaveFibIfindex bool
}

type FeatureProbes struct {
Expand Down Expand Up @@ -381,6 +382,13 @@ func HaveBoundedLoops() error {
return nil
}

// HaveFibIfindex checks if kernel has d1c362e1dd68 ("bpf: Always return target
// ifindex in bpf_fib_lookup") which is 5.10+. This got merged in the same kernel
// as the new redirect helpers.
func HaveFibIfindex() error {
return features.HaveProgramHelper(ebpf.SchedCLS, asm.FnRedirectPeer)
}

// HaveV2ISA is a wrapper around features.HaveV2ISA() to check if the kernel
// supports the V2 ISA.
// On unexpected probe results this function will terminate with log.Fatal().
Expand Down Expand Up @@ -562,6 +570,7 @@ func ExecuteHeaderProbes() *FeatureProbes {
}

probes.Misc.HaveLargeInsnLimit = (HaveLargeInstructionLimit() == nil)
probes.Misc.HaveFibIfindex = (HaveFibIfindex() == nil)

return &probes
}
Expand All @@ -582,10 +591,7 @@ func writeCommonHeader(writer io.Writer, probes *FeatureProbes) error {
"HAVE_LARGE_INSN_LIMIT": probes.Misc.HaveLargeInsnLimit,
"HAVE_SET_RETVAL": probes.ProgramHelpers[ProgramHelper{ebpf.CGroupSock, asm.FnSetRetval}],
"HAVE_FIB_NEIGH": probes.ProgramHelpers[ProgramHelper{ebpf.SchedCLS, asm.FnRedirectNeigh}],
// Check if kernel has d1c362e1dd68 ("bpf: Always return target ifindex
// in bpf_fib_lookup") which is 5.10+. This got merged in the same kernel
// as the new redirect helpers.
"HAVE_FIB_IFINDEX": probes.ProgramHelpers[ProgramHelper{ebpf.SchedCLS, asm.FnRedirectPeer}],
"HAVE_FIB_IFINDEX": probes.Misc.HaveFibIfindex,
}

return writeFeatureHeader(writer, features, true)
Expand Down

0 comments on commit bd8b4d0

Please sign in to comment.