From 7adefddbdd34dcae57a74516b8f978ca4551ac43 Mon Sep 17 00:00:00 2001 From: Dave Tucker Date: Fri, 3 May 2024 17:22:33 +0100 Subject: [PATCH] chore(pkg/bpf): Replace libbpfgo with cilium/ebpf cilium/ebpf is a pure Go eBPF package and is used in a number of popular cloud-native projects. The benefits to Kepler are: 1. Bytecode is built using bpf2go and the C and Go structs are kept in sync automatically 2. There is no requirement for Cgo anymore and therefore no requirement to have libbpf or libelf installed to compile and/or to be dynamically linked at runtime 3. Simplified packaging as the correct bytecode is contained within the kepler binary Overall I'm happy with this change, but there is only one thing that bugs me. We have to check in the bytecode object files (e.g kepler.bpfel.o) or the Go tooling (go lint/go vet) complains about the missing files. I couldn't reliably get `go generate ./...` to work to compile these files in CI. This is something which should be relatively easy to fix in the Makefile/CI environment before we cut a release. Signed-off-by: Dave Tucker --- .github/workflows/c.yml | 12 +- .github/workflows/integration_test.yml | 2 +- .github/workflows/unit_test.yml | 14 +- .gitignore | 3 - Makefile | 16 +- bpfassets/libbpf/Makefile | 27 - build/Dockerfile | 2 - go.mod | 17 +- go.sum | 38 +- hack/libbpf-headers.sh | 2 +- packaging/rpm/kepler.spec | 5 - pkg/bpf/bpf_suite_test.go | 19 +- pkg/bpf/exporter.go | 482 ++++------ pkg/bpf/exporter_stub.go | 2 +- pkg/bpf/gen.go | 3 + .../bpf}/include/bpf/LICENSE.BSD-2-Clause | 0 .../bpf}/include/bpf/bpf_helper_defs.h | 0 .../bpf}/include/bpf/bpf_helpers.h | 0 pkg/bpf/kepler_bpfeb.go | 168 ++++ pkg/bpf/kepler_bpfeb.o | Bin 0 -> 15776 bytes pkg/bpf/kepler_bpfel.go | 168 ++++ pkg/bpf/kepler_bpfel.o | Bin 0 -> 15776 bytes .../libbpf => pkg/bpf}/src/kepler.bpf.c | 0 .../libbpf => pkg/bpf}/src/kepler.bpf.h | 2 +- pkg/bpf/test_utils.go | 19 +- pkg/bpf/types.go | 18 +- .../bpf/process_bpf_collector.go | 40 +- .../aquasecurity/libbpfgo/.clang-format | 130 --- .../aquasecurity/libbpfgo/.clang-tidy | 163 ---- .../aquasecurity/libbpfgo/.gitignore | 9 - .../aquasecurity/libbpfgo/.gitmodules | 3 - .../aquasecurity/libbpfgo/.revive.toml | 154 ---- .../github.com/aquasecurity/libbpfgo/LICENSE | 202 ----- .../github.com/aquasecurity/libbpfgo/Makefile | 252 ------ .../aquasecurity/libbpfgo/Readme.md | 139 --- .../github.com/aquasecurity/libbpfgo/btf.go | 26 - .../aquasecurity/libbpfgo/buf-common.go | 16 - .../aquasecurity/libbpfgo/buf-perf.go | 112 --- .../aquasecurity/libbpfgo/buf-ring.go | 112 --- .../github.com/aquasecurity/libbpfgo/elf.go | 62 -- .../github.com/aquasecurity/libbpfgo/go.work | 6 - .../aquasecurity/libbpfgo/libbpf_cb.go | 35 - .../aquasecurity/libbpfgo/libbpfgo.c | 469 ---------- .../aquasecurity/libbpfgo/libbpfgo.go | 110 --- .../aquasecurity/libbpfgo/libbpfgo.h | 104 --- .../aquasecurity/libbpfgo/link-reader.go | 27 - .../github.com/aquasecurity/libbpfgo/link.go | 124 --- .../aquasecurity/libbpfgo/logger_cb.go | 71 -- .../aquasecurity/libbpfgo/map-common.go | 207 ----- .../aquasecurity/libbpfgo/map-iterator.go | 66 -- .../aquasecurity/libbpfgo/map-low.go | 476 ---------- .../github.com/aquasecurity/libbpfgo/map.go | 576 ------------ .../github.com/aquasecurity/libbpfgo/misc.go | 16 - .../aquasecurity/libbpfgo/module-iterator.go | 81 -- .../aquasecurity/libbpfgo/module.go | 393 --------- .../aquasecurity/libbpfgo/prog-common.go | 239 ----- .../github.com/aquasecurity/libbpfgo/prog.go | 559 ------------ .../aquasecurity/libbpfgo/rwArray.go | 75 -- .../aquasecurity/libbpfgo/tchook-common.go | 30 - .../aquasecurity/libbpfgo/tchook.go | 161 ---- vendor/github.com/cilium/ebpf/.clang-format | 8 + vendor/github.com/cilium/ebpf/.gitattributes | 1 + vendor/github.com/cilium/ebpf/.golangci.yaml | 17 +- vendor/github.com/cilium/ebpf/ARCHITECTURE.md | 86 -- vendor/github.com/cilium/ebpf/CODEOWNERS | 9 + vendor/github.com/cilium/ebpf/CONTRIBUTING.md | 41 +- vendor/github.com/cilium/ebpf/MAINTAINERS.md | 7 +- vendor/github.com/cilium/ebpf/Makefile | 64 +- vendor/github.com/cilium/ebpf/README.md | 49 +- vendor/github.com/cilium/ebpf/asm/alu.go | 95 +- .../github.com/cilium/ebpf/asm/alu_string.go | 48 +- vendor/github.com/cilium/ebpf/asm/func.go | 28 +- .../github.com/cilium/ebpf/asm/func_string.go | 14 +- .../github.com/cilium/ebpf/asm/instruction.go | 113 ++- vendor/github.com/cilium/ebpf/asm/jump.go | 22 +- .../github.com/cilium/ebpf/asm/load_store.go | 39 +- .../cilium/ebpf/asm/load_store_string.go | 12 +- vendor/github.com/cilium/ebpf/asm/opcode.go | 70 +- vendor/github.com/cilium/ebpf/asm/register.go | 9 +- .../cilium/ebpf/attachtype_string.go | 18 +- vendor/github.com/cilium/ebpf/btf/btf.go | 830 +++++++----------- .../github.com/cilium/ebpf/btf/btf_types.go | 406 ++++++--- .../cilium/ebpf/btf/btf_types_string.go | 38 +- vendor/github.com/cilium/ebpf/btf/core.go | 575 +++++++++--- vendor/github.com/cilium/ebpf/btf/ext_info.go | 294 +++++-- vendor/github.com/cilium/ebpf/btf/feature.go | 123 +++ vendor/github.com/cilium/ebpf/btf/format.go | 139 +-- vendor/github.com/cilium/ebpf/btf/handle.go | 204 ++++- vendor/github.com/cilium/ebpf/btf/kernel.go | 159 ++++ vendor/github.com/cilium/ebpf/btf/marshal.go | 611 +++++++++++++ vendor/github.com/cilium/ebpf/btf/strings.go | 136 ++- .../github.com/cilium/ebpf/btf/traversal.go | 123 +++ vendor/github.com/cilium/ebpf/btf/types.go | 759 +++++++++------- .../github.com/cilium/ebpf/btf/workarounds.go | 26 + vendor/github.com/cilium/ebpf/collection.go | 313 +++++-- .../cilium/ebpf/{internal => }/cpu.go | 28 +- vendor/github.com/cilium/ebpf/elf_reader.go | 441 ++++++---- vendor/github.com/cilium/ebpf/elf_sections.go | 109 +++ vendor/github.com/cilium/ebpf/info.go | 144 ++- .../github.com/cilium/ebpf/internal/align.go | 6 - .../github.com/cilium/ebpf/internal/auxv.go | 60 ++ .../github.com/cilium/ebpf/internal/buffer.go | 31 + .../github.com/cilium/ebpf/internal/deque.go | 91 ++ .../cilium/ebpf/internal/endian_be.go | 6 +- .../cilium/ebpf/internal/endian_le.go | 8 +- .../github.com/cilium/ebpf/internal/errors.go | 44 +- .../cilium/ebpf/internal/feature.go | 180 +++- vendor/github.com/cilium/ebpf/internal/io.go | 66 ++ .../cilium/ebpf/internal/kallsyms/kallsyms.go | 74 ++ .../cilium/ebpf/internal/kconfig/kconfig.go | 293 +++++++ .../github.com/cilium/ebpf/internal/math.go | 13 + .../github.com/cilium/ebpf/internal/output.go | 13 + .../cilium/ebpf/internal/pinning.go | 20 +- .../cilium/ebpf/internal/platform.go | 43 + .../github.com/cilium/ebpf/internal/prog.go | 11 + .../github.com/cilium/ebpf/internal/statfs.go | 23 + .../github.com/cilium/ebpf/internal/sys/fd.go | 53 +- .../cilium/ebpf/internal/sys/fd_trace.go | 93 ++ .../ebpf/internal/sys/mapflags_string.go | 53 ++ .../cilium/ebpf/internal/sys/ptr.go | 16 +- .../cilium/ebpf/internal/sys/ptr_32_be.go | 1 - .../cilium/ebpf/internal/sys/ptr_32_le.go | 1 - .../cilium/ebpf/internal/sys/ptr_64.go | 1 - .../cilium/ebpf/internal/sys/signals.go | 83 ++ .../cilium/ebpf/internal/sys/syscall.go | 111 ++- .../cilium/ebpf/internal/sys/types.go | 508 +++++++++-- .../cilium/ebpf/internal/sysenc/buffer.go | 83 ++ .../cilium/ebpf/internal/sysenc/doc.go | 3 + .../cilium/ebpf/internal/sysenc/layout.go | 41 + .../cilium/ebpf/internal/sysenc/marshal.go | 177 ++++ .../cilium/ebpf/internal/tracefs/kprobe.go | 360 ++++++++ .../ebpf/internal/tracefs/probetype_string.go | 24 + .../cilium/ebpf/internal/tracefs/uprobe.go | 16 + .../cilium/ebpf/internal/unix/doc.go | 11 + .../cilium/ebpf/internal/unix/types_linux.go | 202 ++--- .../cilium/ebpf/internal/unix/types_other.go | 213 +++-- .../github.com/cilium/ebpf/internal/vdso.go | 31 +- .../cilium/ebpf/internal/version.go | 21 +- vendor/github.com/cilium/ebpf/link/anchor.go | 137 +++ vendor/github.com/cilium/ebpf/link/cgroup.go | 60 +- vendor/github.com/cilium/ebpf/link/iter.go | 7 +- vendor/github.com/cilium/ebpf/link/kprobe.go | 443 +++------- .../cilium/ebpf/link/kprobe_multi.go | 180 ++++ vendor/github.com/cilium/ebpf/link/link.go | 374 +++++++- .../github.com/cilium/ebpf/link/netfilter.go | 70 ++ vendor/github.com/cilium/ebpf/link/netkit.go | 71 ++ .../github.com/cilium/ebpf/link/perf_event.go | 190 +--- .../github.com/cilium/ebpf/link/platform.go | 25 - vendor/github.com/cilium/ebpf/link/program.go | 85 +- vendor/github.com/cilium/ebpf/link/query.go | 111 +++ .../cilium/ebpf/link/socket_filter.go | 4 +- .../github.com/cilium/ebpf/link/syscalls.go | 111 ++- vendor/github.com/cilium/ebpf/link/tcx.go | 71 ++ .../github.com/cilium/ebpf/link/tracepoint.go | 19 +- vendor/github.com/cilium/ebpf/link/tracing.go | 78 +- vendor/github.com/cilium/ebpf/link/uprobe.go | 186 ++-- .../cilium/ebpf/link/uprobe_multi.go | 224 +++++ vendor/github.com/cilium/ebpf/linker.go | 293 ++++++- vendor/github.com/cilium/ebpf/map.go | 763 ++++++++++------ vendor/github.com/cilium/ebpf/marshalers.go | 295 +++---- vendor/github.com/cilium/ebpf/netlify.toml | 4 + vendor/github.com/cilium/ebpf/prog.go | 504 ++++++++--- .../github.com/cilium/ebpf/rlimit/rlimit.go | 123 +++ vendor/github.com/cilium/ebpf/run-tests.sh | 107 ++- vendor/github.com/cilium/ebpf/syscalls.go | 111 ++- vendor/github.com/cilium/ebpf/types.go | 219 ++--- vendor/github.com/cilium/ebpf/types_string.go | 13 +- vendor/github.com/godbus/dbus/v5/auth.go | 10 +- vendor/github.com/godbus/dbus/v5/conn.go | 27 +- .../github.com/godbus/dbus/v5/conn_other.go | 9 +- vendor/github.com/godbus/dbus/v5/dbus.go | 14 +- vendor/github.com/godbus/dbus/v5/doc.go | 10 +- vendor/github.com/godbus/dbus/v5/escape.go | 84 ++ vendor/github.com/godbus/dbus/v5/export.go | 19 +- vendor/github.com/godbus/dbus/v5/homedir.go | 29 +- .../godbus/dbus/v5/homedir_dynamic.go | 15 - .../godbus/dbus/v5/homedir_static.go | 45 - vendor/github.com/godbus/dbus/v5/message.go | 16 +- .../godbus/dbus/v5/server_interfaces.go | 2 +- vendor/github.com/godbus/dbus/v5/sig.go | 2 +- .../godbus/dbus/v5/transport_unix.go | 12 +- .../godbus/dbus/v5/transport_zos.go | 6 + vendor/github.com/godbus/dbus/v5/variant.go | 2 +- .../moby/sys/mountinfo/mounted_linux.go | 58 +- .../moby/sys/mountinfo/mounted_unix.go | 11 +- .../moby/sys/mountinfo/mountinfo.go | 6 +- .../moby/sys/mountinfo/mountinfo_bsd.go | 44 +- .../sys/mountinfo/mountinfo_freebsdlike.go | 14 + .../moby/sys/mountinfo/mountinfo_linux.go | 50 +- .../moby/sys/mountinfo/mountinfo_openbsd.go | 11 + .../sys/mountinfo/mountinfo_unsupported.go | 4 +- vendor/github.com/opencontainers/runc/NOTICE | 4 +- .../runc/libcontainer/cgroups/cgroups.go | 15 + .../cgroups/devices/devices_emulator.go | 386 -------- .../cgroups/ebpf/devicefilter/devicefilter.go | 208 ----- .../libcontainer/cgroups/ebpf/ebpf_linux.go | 253 ------ .../runc/libcontainer/cgroups/file.go | 20 +- .../runc/libcontainer/cgroups/fs2/cpu.go | 33 +- .../libcontainer/cgroups/fs2/defaultpath.go | 3 + .../runc/libcontainer/cgroups/fs2/devices.go | 75 -- .../runc/libcontainer/cgroups/fs2/fs2.go | 99 ++- .../runc/libcontainer/cgroups/fs2/memory.go | 5 + .../runc/libcontainer/cgroups/fs2/misc.go | 52 ++ .../runc/libcontainer/cgroups/fs2/psi.go | 89 ++ .../runc/libcontainer/cgroups/stats.go | 27 +- .../runc/libcontainer/cgroups/utils.go | 93 +- .../runc/libcontainer/cgroups/v1_utils.go | 31 +- .../runc/libcontainer/configs/blkio_device.go | 8 +- .../runc/libcontainer/configs/cgroup_linux.go | 11 + .../runc/libcontainer/configs/config.go | 122 ++- .../runc/libcontainer/configs/config_linux.go | 31 +- .../runc/libcontainer/configs/mount.go | 43 +- .../runc/libcontainer/configs/mount_linux.go | 66 ++ .../libcontainer/configs/mount_unsupported.go | 10 + .../libcontainer/configs/namespaces_linux.go | 7 + .../configs/namespaces_syscall.go | 13 + .../runc/libcontainer/user/lookup_unix.go | 157 ---- .../runc/libcontainer/user/user.go | 605 ------------- .../runc/libcontainer/user/user_fuzzer.go | 43 - .../runc/libcontainer/userns/userns.go | 1 - .../runc/libcontainer/userns/userns_fuzzer.go | 11 +- .../runc/libcontainer/userns/userns_linux.go | 44 +- .../libcontainer/userns/userns_unsupported.go | 4 +- .../libcontainer/userns/usernsfd_linux.go | 156 ++++ .../runc/libcontainer/utils/cmsg.go | 85 +- .../runc/libcontainer/utils/utils.go | 58 +- .../runc/libcontainer/utils/utils_unix.go | 154 +++- .../runtime-spec/specs-go/config.go | 10 + .../runtime-spec/specs-go/version.go | 2 +- vendor/github.com/sirupsen/logrus/README.md | 8 +- vendor/github.com/sirupsen/logrus/writer.go | 34 +- vendor/modules.txt | 28 +- 232 files changed, 12295 insertions(+), 11743 deletions(-) delete mode 100644 bpfassets/libbpf/Makefile create mode 100644 pkg/bpf/gen.go rename {bpfassets/libbpf => pkg/bpf}/include/bpf/LICENSE.BSD-2-Clause (100%) rename {bpfassets/libbpf => pkg/bpf}/include/bpf/bpf_helper_defs.h (100%) rename {bpfassets/libbpf => pkg/bpf}/include/bpf/bpf_helpers.h (100%) create mode 100644 pkg/bpf/kepler_bpfeb.go create mode 100644 pkg/bpf/kepler_bpfeb.o create mode 100644 pkg/bpf/kepler_bpfel.go create mode 100644 pkg/bpf/kepler_bpfel.o rename {bpfassets/libbpf => pkg/bpf}/src/kepler.bpf.c (100%) rename {bpfassets/libbpf => pkg/bpf}/src/kepler.bpf.h (99%) delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/.clang-format delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/.clang-tidy delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/.gitignore delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/.gitmodules delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/.revive.toml delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/LICENSE delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/Makefile delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/Readme.md delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/btf.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/buf-common.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/buf-perf.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/buf-ring.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/elf.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/go.work delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/libbpf_cb.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/libbpfgo.c delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/libbpfgo.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/libbpfgo.h delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/link-reader.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/link.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/logger_cb.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/map-common.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/map-iterator.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/map-low.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/map.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/misc.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/module-iterator.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/module.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/prog-common.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/prog.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/rwArray.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/tchook-common.go delete mode 100644 vendor/github.com/aquasecurity/libbpfgo/tchook.go create mode 100644 vendor/github.com/cilium/ebpf/.gitattributes delete mode 100644 vendor/github.com/cilium/ebpf/ARCHITECTURE.md create mode 100644 vendor/github.com/cilium/ebpf/CODEOWNERS create mode 100644 vendor/github.com/cilium/ebpf/btf/feature.go create mode 100644 vendor/github.com/cilium/ebpf/btf/kernel.go create mode 100644 vendor/github.com/cilium/ebpf/btf/marshal.go create mode 100644 vendor/github.com/cilium/ebpf/btf/traversal.go create mode 100644 vendor/github.com/cilium/ebpf/btf/workarounds.go rename vendor/github.com/cilium/ebpf/{internal => }/cpu.go (69%) create mode 100644 vendor/github.com/cilium/ebpf/elf_sections.go delete mode 100644 vendor/github.com/cilium/ebpf/internal/align.go create mode 100644 vendor/github.com/cilium/ebpf/internal/auxv.go create mode 100644 vendor/github.com/cilium/ebpf/internal/buffer.go create mode 100644 vendor/github.com/cilium/ebpf/internal/deque.go create mode 100644 vendor/github.com/cilium/ebpf/internal/kallsyms/kallsyms.go create mode 100644 vendor/github.com/cilium/ebpf/internal/kconfig/kconfig.go create mode 100644 vendor/github.com/cilium/ebpf/internal/math.go create mode 100644 vendor/github.com/cilium/ebpf/internal/platform.go create mode 100644 vendor/github.com/cilium/ebpf/internal/prog.go create mode 100644 vendor/github.com/cilium/ebpf/internal/statfs.go create mode 100644 vendor/github.com/cilium/ebpf/internal/sys/fd_trace.go create mode 100644 vendor/github.com/cilium/ebpf/internal/sys/mapflags_string.go create mode 100644 vendor/github.com/cilium/ebpf/internal/sys/signals.go create mode 100644 vendor/github.com/cilium/ebpf/internal/sysenc/buffer.go create mode 100644 vendor/github.com/cilium/ebpf/internal/sysenc/doc.go create mode 100644 vendor/github.com/cilium/ebpf/internal/sysenc/layout.go create mode 100644 vendor/github.com/cilium/ebpf/internal/sysenc/marshal.go create mode 100644 vendor/github.com/cilium/ebpf/internal/tracefs/kprobe.go create mode 100644 vendor/github.com/cilium/ebpf/internal/tracefs/probetype_string.go create mode 100644 vendor/github.com/cilium/ebpf/internal/tracefs/uprobe.go create mode 100644 vendor/github.com/cilium/ebpf/internal/unix/doc.go create mode 100644 vendor/github.com/cilium/ebpf/link/anchor.go create mode 100644 vendor/github.com/cilium/ebpf/link/kprobe_multi.go create mode 100644 vendor/github.com/cilium/ebpf/link/netfilter.go create mode 100644 vendor/github.com/cilium/ebpf/link/netkit.go delete mode 100644 vendor/github.com/cilium/ebpf/link/platform.go create mode 100644 vendor/github.com/cilium/ebpf/link/query.go create mode 100644 vendor/github.com/cilium/ebpf/link/tcx.go create mode 100644 vendor/github.com/cilium/ebpf/link/uprobe_multi.go create mode 100644 vendor/github.com/cilium/ebpf/netlify.toml create mode 100644 vendor/github.com/cilium/ebpf/rlimit/rlimit.go create mode 100644 vendor/github.com/godbus/dbus/v5/escape.go delete mode 100644 vendor/github.com/godbus/dbus/v5/homedir_dynamic.go delete mode 100644 vendor/github.com/godbus/dbus/v5/homedir_static.go create mode 100644 vendor/github.com/godbus/dbus/v5/transport_zos.go create mode 100644 vendor/github.com/moby/sys/mountinfo/mountinfo_freebsdlike.go create mode 100644 vendor/github.com/moby/sys/mountinfo/mountinfo_openbsd.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/user/user.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go diff --git a/.github/workflows/c.yml b/.github/workflows/c.yml index 5d03a022ff..f4d194d2ab 100644 --- a/.github/workflows/c.yml +++ b/.github/workflows/c.yml @@ -14,14 +14,4 @@ jobs: - uses: jidicula/clang-format-action@v4.11.0 with: clang-format-version: "17" - check-path: "bpfassets/libbpf/src" - - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4.1.1 - - name: Install libbpf - run: | - sudo apt-get update && sudo apt-get install -y libelf-dev libbpf-dev - - name: Build - run: make -C bpfassets/libbpf + check-path: "pkg/bpf/src" diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index fa89285e11..23b0ade2ed 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -47,7 +47,7 @@ jobs: strategy: fail-fast: false matrix: - cluster_provider: [kind,microshift] + cluster_provider: [kind, microshift] steps: - name: checkout source uses: actions/checkout@v4.1.1 diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml index 4052741c54..b7669cf12c 100644 --- a/.github/workflows/unit_test.yml +++ b/.github/workflows/unit_test.yml @@ -22,6 +22,7 @@ jobs: - uses: actions/setup-go@v5 with: go-version-file: go.mod + - run: sudo apt-get install clang llvm - name: Get ginkgo run: make ginkgo-set env: @@ -32,21 +33,8 @@ jobs: sudo apt-get install -y cpuid clang cd doc/ && sudo ./dev/prepare_dev_env.sh && cd - git config --global --add safe.directory /kepler - - name: install libbpf - uses: sustainable-computing-io/kepler-action@v0.0.6 - with: - ebpfprovider: libbpf - name: Run run: | - sudo apt remove libbpf-dev - mkdir temp-libbpf - cd temp-libbpf - git clone https://github.com/libbpf/libbpf - cd libbpf/src - sudo make install_headers - sudo make install_uapi_headers - sudo prefix=/usr BUILD_STATIC_ONLY=y make install - cd ../../../ make VERBOSE=1 test go tool cover -func=coverage.out -o=coverage.out - name: Go Coverage Badge # Pass the `coverage.out` output to this action diff --git a/.gitignore b/.gitignore index 2b3815b637..fbcc082d64 100644 --- a/.gitignore +++ b/.gitignore @@ -41,8 +41,5 @@ local-dev-cluster /tmp .env -# object files -*.o - # test coverage coverage.out diff --git a/Makefile b/Makefile index 2c932e2337..fa7e23690f 100644 --- a/Makefile +++ b/Makefile @@ -68,9 +68,6 @@ GOOS := $(shell go env GOOS) GOARCH := $(shell go env GOARCH) GOENV := GOOS=$(GOOS) GOARCH=$(GOARCH) -LIBBPF_HEADERS := /usr/include/bpf -GOENV = GO111MODULE="" GOOS=$(GOOS) GOARCH=$(GOARCH) CGO_ENABLED=1 CC=clang CGO_CFLAGS="-I $(LIBBPF_HEADERS) -I/usr/include/" CGO_LDFLAGS="-lelf -lz -lbpf" - DOCKERFILE := $(SRC_ROOT)/build/Dockerfile IMAGE_BUILD_TAG := $(GIT_VERSION)-linux-$(GOARCH) GO_BUILD_TAGS := $(GENERAL_TAGS)$(GOOS)$(GPU_TAGS) @@ -172,10 +169,11 @@ clean-cross-build: build: clean_build_local _build_local copy_build_local ## Build binary and copy to $(OUTPUT_DIR)/bin .PHONY: build -_build_ebpf_local: - @make -C bpfassets/libbpf +.PHONY: generate +generate: ## Generate BPF code locally. + +@$(GOENV) go generate ./pkg/bpf -_build_local: _build_ebpf_local ## Build Kepler binary locally. +_build_local: generate ## Build Kepler binary locally. @echo TAGS=$(GO_BUILD_TAGS) @mkdir -p "$(CROSS_BUILD_BINDIR)/$(GOOS)_$(GOARCH)" +@$(GOENV) go build \ @@ -280,7 +278,7 @@ SUDO_TEST_PKGS := $(shell go list ./... | grep pkg/bpf) test: unit-test bpf-test bench ## Run all tests. .PHONY: unit-test -unit-test: ginkgo-set tidy-vendor ## Run unit tests. +unit-test: generate ginkgo-set tidy-vendor ## Run unit tests. @echo TAGS=$(GO_TEST_TAGS) $(if $(VERBOSE),@echo GOENV=$(GOENV)) @$(GOENV) go test -tags $(GO_TEST_TAGS) \ @@ -298,7 +296,7 @@ bench: ## Run benchmarks. -bench=. --count=1 $(TEST_PKGS) .PHONY: bpf-test -bpf-test: _build_ebpf_local ## Run BPF tests. +bpf-test: generate ## Run BPF tests. for pkg in $(SUDO_TEST_PKGS); do \ $(GOENV) go test -c $$pkg -tags $(GO_TEST_TAGS) -cover \ -covermode=atomic -coverprofile=coverage.bpf.out \ @@ -325,7 +323,7 @@ format: c-format: @echo "Checking c format" - @git ls-files -- '*.c' '*.h' ':!:vendor' ':!:bpfassets/libbpf/include/' | xargs clang-format --dry-run --Werror + @git ls-files -- '*.c' '*.h' ':!:vendor' ':!:pkg/bpf/include/' | xargs clang-format --dry-run --Werror golint: @mkdir -p $(base_dir)/.cache/golangci-lint diff --git a/bpfassets/libbpf/Makefile b/bpfassets/libbpf/Makefile deleted file mode 100644 index 1e06aa2684..0000000000 --- a/bpfassets/libbpf/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -ARCH=$(shell uname -m) -GOARCH=$(shell go env GOARCH) - -TARGET := kepler - -BPF_SRC := $(wildcard src/*.bpf.c src/*.bpf.h) -# in libbpf if $(ARCH) is x86_64, then set TARGET_ARCH to x86. if arch is aarch64, then set TARGET_ARCH to arm -TARGET_ARCH := $(shell echo $(ARCH) | sed -e 's/x86_64/x86/' -e 's/aarch64/arm64/' -e 's/s390x/s390/') - -all: bpf.o/kepler.bpfeb.o bpf.o/kepler.bpfel.o - -bpf.o: - mkdir -p bpf.o - -bpf.o/kepler.bpfeb.o: $(BPF_SRC) | bpf.o - clang \ - -I./include \ - -target bpfeb \ - -O2 -g -c \ - -o $@ $< - -bpf.o/kepler.bpfel.o: $(BPF_SRC) | bpf.o - clang \ - -I./include \ - -target bpfel \ - -O2 -g -c \ - -o $@ $< diff --git a/build/Dockerfile b/build/Dockerfile index aa1458fe42..f4192aca7f 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -56,9 +56,7 @@ COPY --from=builder /libbpf-source/linux-5.14.0-424.el9/tools/bpf/bpftool/bpftoo COPY --from=builder /libbpf-source/linux-5.14.0-424.el9/tools/bpf/bpftool/bpftool /usr/bin/bpftool RUN mkdir -p /var/lib/kepler/data -RUN mkdir -p /var/lib/kepler/bpfassets COPY --from=builder /workspace/data/cpus.yaml /var/lib/kepler/data/cpus.yaml -COPY --from=builder /workspace/bpfassets/libbpf/bpf.o /var/lib/kepler/bpfassets # copy model weight COPY --from=builder /workspace/data/model_weight/acpi_AbsPowerModel.json /var/lib/kepler/data/acpi_AbsPowerModel.json diff --git a/go.mod b/go.mod index 125f798398..cf3aee7f75 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,14 @@ module github.com/sustainable-computing-io/kepler -go 1.21 +go 1.21.0 + +toolchain go1.21.9 require ( github.com/HabanaAI/gohlml v1.15.0 github.com/NVIDIA/go-dcgm v0.0.0-20240118201113-3385e277e49f github.com/NVIDIA/go-nvml v0.12.0-1 - github.com/aquasecurity/libbpfgo v0.6.0-libbpf-1.3 + github.com/cilium/ebpf v0.15.0 github.com/containerd/cgroups v1.1.0 github.com/containerd/cgroups/v3 v3.0.2 github.com/jaypipes/ghw v0.12.0 @@ -15,12 +17,12 @@ require ( github.com/klauspost/cpuid/v2 v2.2.6 github.com/onsi/ginkgo/v2 v2.15.0 github.com/onsi/gomega v1.31.1 - github.com/opencontainers/runc v1.1.12 - github.com/opencontainers/runtime-spec v1.1.0 + github.com/opencontainers/runc v1.2.0-rc.1 + github.com/opencontainers/runtime-spec v1.2.0 github.com/prometheus/client_golang v1.19.0 github.com/prometheus/common v0.48.0 github.com/prometheus/prometheus v0.48.1 - github.com/sirupsen/logrus v1.9.0 + github.com/sirupsen/logrus v1.9.3 golang.org/x/sys v0.20.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.28.2 @@ -35,7 +37,6 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bits-and-blooms/bitset v1.13.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/cilium/ebpf v0.9.1 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/cyphar/filepath-securejoin v0.2.4 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -48,7 +49,7 @@ require ( github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/swag v0.22.4 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect - github.com/godbus/dbus/v5 v5.0.6 // indirect + github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/gnostic-models v0.6.8 // indirect @@ -64,7 +65,7 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/moby/spdystream v0.2.0 // indirect - github.com/moby/sys/mountinfo v0.5.0 // indirect + github.com/moby/sys/mountinfo v0.7.1 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect diff --git a/go.sum b/go.sum index 5df3c2a89d..db6a139298 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,6 @@ github.com/NVIDIA/go-nvml v0.12.0-1 h1:6mdjtlFo+17dWL7VFPfuRMtf0061TF4DKls9pkSw6 github.com/NVIDIA/go-nvml v0.12.0-1/go.mod h1:hy7HYeQy335x6nEss0Ne3PYqleRa6Ct+VKD9RQ4nyFs= github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA= github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8= -github.com/aquasecurity/libbpfgo v0.6.0-libbpf-1.3 h1:mhDe1mAZR80LjnsCnteS+R2/EeBFi9qFjKslsIJwVSo= -github.com/aquasecurity/libbpfgo v0.6.0-libbpf-1.3/go.mod h1:0rEApF1YBHGuZ4C8OYI9q5oDBVpgqtRqYATePl9mCDk= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -18,8 +16,8 @@ github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJR github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4= -github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY= +github.com/cilium/ebpf v0.15.0 h1:7NxJhNiBT3NG8pZJ3c+yfrVdHY8ScgKD27sScgjLMMk= +github.com/cilium/ebpf v0.15.0/go.mod h1:DHp1WyrLeiBh19Cf/tfiSMhqheEiK8fXFZ4No0P1Hso= github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= github.com/containerd/cgroups/v3 v3.0.2 h1:f5WFqIVSgo5IZmtTT3qVBo6TzI1ON6sycSBKkymb9L0= @@ -37,8 +35,6 @@ github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4 github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/emicklei/go-restful/v3 v3.10.2 h1:hIovbnmBTLjHXkqEBUz3HGpXZdM7ZrE9fJIZIqlJLqE= github.com/emicklei/go-restful/v3 v3.10.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= -github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= @@ -54,11 +50,13 @@ github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI= +github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro= -github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= +github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -115,8 +113,8 @@ github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= -github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9ObI= -github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= +github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g= +github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -130,10 +128,10 @@ github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM= github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= -github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss= -github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= -github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= -github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runc v1.2.0-rc.1 h1:SMjop2pxxYRTfKdsigna/8xRoaoCfIQfD2cVuOb64/o= +github.com/opencontainers/runc v1.2.0-rc.1/go.mod h1:m9JwxfHzXz5YTTXBQr7EY9KTuazFAGPyMQx2nRR3vTw= +github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= +github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -149,12 +147,10 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/prometheus/prometheus v0.48.1 h1:CTszphSNTXkuCG6O0IfpKdHcJkvvnAAE1GbELKS+NFk= github.com/prometheus/prometheus v0.48.1/go.mod h1:SRw624aMAxTfryAcP8rOjg4S/sHHaetx2lyJJ2nM83g= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 h1:RpforrEYXWkmGwJHIGnLZ3tTWStkjVVstwzNGqxX2Ds= -github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= -github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= -github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -193,7 +189,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= diff --git a/hack/libbpf-headers.sh b/hack/libbpf-headers.sh index 9370c17ada..fbcf5eba5b 100755 --- a/hack/libbpf-headers.sh +++ b/hack/libbpf-headers.sh @@ -15,4 +15,4 @@ headers=( # Fetch libbpf release and extract the desired headers curl -sL "https://github.com/libbpf/libbpf/archive/refs/tags/v${LIBBPF_VERSION}.tar.gz" | \ - tar -C ./bpfassets/libbpf/include/bpf -xz --xform='s#.*/##' "${headers[@]}" + tar -C ./pkg/bpf/include/bpf -xz --xform='s#.*/##' "${headers[@]}" diff --git a/packaging/rpm/kepler.spec b/packaging/rpm/kepler.spec index e647e872ff..71f9b69d19 100644 --- a/packaging/rpm/kepler.spec +++ b/packaging/rpm/kepler.spec @@ -54,13 +54,10 @@ install -d %{buildroot}%{_bindir} install -d %{buildroot}%{_sysconfdir}/kepler/ install -d %{buildroot}/var/lib/kepler/data -install -d %{buildroot}/var/lib/kepler/bpfassets install -d %{buildroot}/etc/kepler/kepler.config install -p -m755 ./_output/kepler %{buildroot}%{_bindir}/kepler install -p -m644 ./packaging/rpm/kepler.service %{buildroot}%{_unitdir}/kepler.service -install -p -m644 ./bpfassets/libbpf/bpf.o/kepler.bpfel.o %{buildroot}/var/lib/kepler/bpfassets/kepler.bpfel.o -install -p -m644 ./bpfassets/libbpf/bpf.o/kepler.bpfeb.o %{buildroot}/var/lib/kepler/bpfassets/kepler.bpfeb.o install -p -m644 ./_output/ENABLE_PROCESS_METRICS %{buildroot}/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS install -p -m644 ./data/cpus.yaml %{buildroot}/var/lib/kepler/data/cpus.yaml install -p -m644 ./data/model_weight/acpi_AbsPowerModel.json %{buildroot}/var/lib/kepler/data/acpi_AbsPowerModel.json @@ -76,8 +73,6 @@ install -p -m644 ./data/model_weight/intel_rapl_DynPowerModel.json %{buildroot}/ %license LICENSE %{_bindir}/kepler %{_unitdir}/kepler.service -/var/lib/kepler/bpfassets/kepler.bpfel.o -/var/lib/kepler/bpfassets/kepler.bpfeb.o /var/lib/kepler/data/cpus.yaml /var/lib/kepler/data/acpi_AbsPowerModel.json /var/lib/kepler/data/acpi_DynPowerModel.json diff --git a/pkg/bpf/bpf_suite_test.go b/pkg/bpf/bpf_suite_test.go index 368ff82283..b8a5b0e108 100644 --- a/pkg/bpf/bpf_suite_test.go +++ b/pkg/bpf/bpf_suite_test.go @@ -13,18 +13,19 @@ import ( func TestBpf(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Attacher Suite") + RunSpecs(t, "Bpf Suite") } -func checkDataCollected(processesData []ProcessBPFMetrics) { - // len > 0 +func checkDataCollected(processesData []ProcessMetrics) { Expect(len(processesData)).To(BeNumerically(">", 0)) - Expect(processesData[0].PID).To(BeNumerically(">=", uint64(0))) - Expect(processesData[0].Command).NotTo(BeEmpty()) - Expect(processesData[0].CPUCycles).To(BeNumerically(">=", uint64(0))) - Expect(processesData[0].CPUInstr).To(BeNumerically(">=", uint64(0))) - Expect(processesData[0].CacheMisses).To(BeNumerically(">=", uint64(0))) - Expect(processesData[0].CGroupID).To(BeNumerically(">", uint64(0))) + for _, p := range processesData { + Expect(p.Pid).To(BeNumerically(">=", 0)) + Expect(p.Comm).NotTo(BeEmpty()) + Expect(p.CpuCycles).To(BeNumerically(">=", uint64(0))) + Expect(p.CpuInstr).To(BeNumerically(">=", uint64(0))) + Expect(p.CacheMiss).To(BeNumerically(">=", uint64(0))) + Expect(p.CgroupId).To(BeNumerically(">=", uint64(0))) + } } var _ = Describe("BPF Exporter test", func() { diff --git a/pkg/bpf/exporter.go b/pkg/bpf/exporter.go index e875015b66..6b53a69648 100644 --- a/pkg/bpf/exporter.go +++ b/pkg/bpf/exporter.go @@ -20,59 +20,45 @@ limitations under the License. package bpf import ( - "bytes" - "encoding/binary" + "errors" "fmt" - "os" - "path/filepath" "runtime" "time" "unsafe" - bpf "github.com/aquasecurity/libbpfgo" + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/link" + "github.com/cilium/ebpf/rlimit" "github.com/jaypipes/ghw" "github.com/sustainable-computing-io/kepler/pkg/config" - "github.com/sustainable-computing-io/kepler/pkg/utils" "golang.org/x/sys/unix" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" ) -const ( - objectFilename = "kepler.%s.o" - bpfAssestsLocation = "/var/lib/kepler/bpfassets" - cpuOnline = "/sys/devices/system/cpu/online" - bpfPerfArraySuffix = "_event_reader" - TableProcessName = "processes" - TableCPUFreqName = "cpu_freq_array" - CPUNumSize = 128 -) - type exporter struct { - module *bpf.Module - counters map[string]perfCounter - ebpfBatchGet bool - ebpfBatchGetAndDelete bool - cpuCores int - // due to performance reason we keep an empty struct to verify if a new read is also empty - emptyct ProcessBPFMetrics - byteOrder binary.ByteOrder - perfEventFds []int + bpfObjects keplerObjects + + schedSwitchLink link.Link + irqLink link.Link + pageWriteLink link.Link + pageReadLink link.Link + + cpuCyclesPerfEvents []int + cpuInstructionsPerfEvents []int + cacheMissPerfEvents []int + enabledHardwareCounters sets.Set[string] enabledSoftwareCounters sets.Set[string] } func NewExporter() (Exporter, error) { e := &exporter{ - module: nil, - ebpfBatchGet: true, - ebpfBatchGetAndDelete: true, - cpuCores: getCPUCores(), - emptyct: ProcessBPFMetrics{}, - byteOrder: utils.DetermineHostByteOrder(), - perfEventFds: []int{}, - enabledHardwareCounters: sets.New[string](), - enabledSoftwareCounters: sets.New[string](), + cpuCyclesPerfEvents: []int{}, + cpuInstructionsPerfEvents: []int{}, + cacheMissPerfEvents: []int{}, + enabledHardwareCounters: sets.New[string](), + enabledSoftwareCounters: sets.New[string](), } err := e.attach() if err != nil { @@ -81,11 +67,6 @@ func NewExporter() (Exporter, error) { return e, err } -type perfCounter struct { - EvType int - EvConfig int -} - func (e *exporter) SupportedMetrics() SupportedMetrics { return SupportedMetrics{ HardwareCounters: e.enabledHardwareCounters.Clone(), @@ -93,253 +74,199 @@ func (e *exporter) SupportedMetrics() SupportedMetrics { } } -func getLibbpfObjectFilePath(byteOrder binary.ByteOrder) (string, error) { - var endianness string - if byteOrder == binary.LittleEndian { - endianness = "bpfel" - } else if byteOrder == binary.BigEndian { - endianness = "bpfeb" - } - filename := fmt.Sprintf(objectFilename, endianness) - bpfassetsPath := fmt.Sprintf("%s/%s", bpfAssestsLocation, filename) - _, err := os.Stat(bpfassetsPath) - if err != nil { - // attempt to find the bpf assets in the same directory as the binary - // this is useful for running locally - var matches []string - err = filepath.Walk(".", func(path string, info os.FileInfo, err error) error { - if info.Name() == filename { - matches = append(matches, path) - return filepath.SkipAll - } - return nil - }) - if err != nil { - return "", fmt.Errorf("failed to find bpf object file: %w", err) - } - if len(matches) < 1 { - return "", fmt.Errorf("failed to find bpf object file: no matches found") - } - klog.Infof("found bpf object file: %s", matches[0]) - return matches[0], nil - } - return bpfassetsPath, nil -} - func (e *exporter) attach() error { - libbpfObjectFilePath, err := getLibbpfObjectFilePath(e.byteOrder) - if err != nil { - return fmt.Errorf("failed to load module: %w", err) + // Remove resource limits for kernels <5.11. + if err := rlimit.RemoveMemlock(); err != nil { + return fmt.Errorf("error removing memlock: %v", err) } - e.module, err = bpf.NewModuleFromFile(libbpfObjectFilePath) + // Load eBPF Specs + specs, err := loadKepler() if err != nil { - return fmt.Errorf("failed to load eBPF module from libbpf object: %w", err) + return fmt.Errorf("error loading eBPF specs: %v", err) } - // resize array entries - klog.Infof("%d CPU cores detected. Resizing eBPF Perf Event Arrays", e.cpuCores) - toResize := []string{ - "cpu_cycles_event_reader", "cpu_instructions_event_reader", "cache_miss_event_reader", - "cpu_cycles", "cpu_instructions", "cache_miss", "cpu_freq_array", - } - for _, arrayName := range toResize { - if err = resizeArrayEntries(e.module, arrayName, e.cpuCores); err != nil { - return fmt.Errorf("failed to resize array %s: %w", arrayName, err) + // Adjust map sizes to the number of available CPUs + numCPU := getCPUCores() + klog.Infof("Number of CPUs: %d", numCPU) + for _, m := range specs.Maps { + // Only resize maps that have a MaxEntries of NUM_CPUS constant + if m.MaxEntries == 128 { + m.MaxEntries = uint32(numCPU) } } - // set the sample rate, this must be done before loading the object - sampleRate := config.BPFSampleRate - if err := e.module.InitGlobalVariable("SAMPLE_RATE", int32(sampleRate)); err != nil { - return fmt.Errorf("failed to set sample rate: %w", err) + // Set program global variables + err = specs.RewriteConstants(map[string]interface{}{ + "SAMPLE_RATE": int32(config.BPFSampleRate), + }) + if err != nil { + return fmt.Errorf("error rewriting program constants: %v", err) } - if err := e.module.BPFLoadObject(); err != nil { - return fmt.Errorf("failed to load eBPF object: %w", err) + // Load the eBPF program(s) + if err := specs.LoadAndAssign(&e.bpfObjects, nil); err != nil { + return fmt.Errorf("error loading eBPF objects: %v", err) } - // attach to kprobe__finish_task_switch kprobe function - prog, err := e.module.GetProgram("kepler_sched_switch_trace") + // Attach the eBPF program(s) + e.schedSwitchLink, err = link.Tracepoint("sched", "sched_switch", e.bpfObjects.KeplerSchedSwitchTrace, nil) if err != nil { - return fmt.Errorf("failed to get kepler_sched_switch_trace: %w", err) - } - - if _, err = prog.AttachGeneric(); err != nil { - klog.Infof("failed to attach tracepoint/sched/sched_switch: %v", err) - } else { - e.enabledSoftwareCounters[config.CPUTime] = struct{}{} + return fmt.Errorf("error attaching sched_switch tracepoint: %v", err) } + e.enabledSoftwareCounters[config.CPUTime] = struct{}{} if config.ExposeIRQCounterMetrics { - err := func() error { - // attach softirq_entry tracepoint to kepler_irq_trace function - irq_prog, err := e.module.GetProgram("kepler_irq_trace") - if err != nil { - return fmt.Errorf("could not get kepler_irq_trace: %w", err) - } - if _, err := irq_prog.AttachGeneric(); err != nil { - return fmt.Errorf("could not attach irq/softirq_entry: %w", err) - } - e.enabledSoftwareCounters[config.IRQNetTXLabel] = struct{}{} - e.enabledSoftwareCounters[config.IRQNetRXLabel] = struct{}{} - e.enabledSoftwareCounters[config.IRQBlockLabel] = struct{}{} - return nil - }() + e.irqLink, err = link.Tracepoint("irq", "softirq_entry", e.bpfObjects.KeplerIrqTrace, nil) if err != nil { - klog.Warningf("IRQ tracing disabled: %v", err) + return fmt.Errorf("could not attach irq/softirq_entry: %w", err) } + e.enabledSoftwareCounters[config.IRQNetTXLabel] = struct{}{} + e.enabledSoftwareCounters[config.IRQNetRXLabel] = struct{}{} + e.enabledSoftwareCounters[config.IRQBlockLabel] = struct{}{} } - // attach function - page_write, err := e.module.GetProgram("kepler_write_page_trace") + e.pageWriteLink, err = link.Tracepoint("writeback", "writeback_dirty_folio", e.bpfObjects.KeplerWritePageTrace, nil) if err != nil { - return fmt.Errorf("failed to get kepler_write_page_trace: %w", err) + klog.Warningf("failed to attach tp/writeback/writeback_dirty_folio: %v. Kepler will not collect page cache write events. This will affect the DRAM power model estimation on VMs.", err) } else { - _, err = page_write.AttachTracepoint("writeback", "writeback_dirty_folio") - if err != nil { - klog.Warningf("failed to attach tp/writeback/writeback_dirty_folio: %v. Kepler will not collect page cache write events. This will affect the DRAM power model estimation on VMs.", err) - } else { - e.enabledSoftwareCounters[config.PageCacheHit] = struct{}{} - } + e.enabledSoftwareCounters[config.PageCacheHit] = struct{}{} } - // attach function - page_read, err := e.module.GetProgram("kepler_read_page_trace") + e.pageReadLink, err = link.AttachTracing(link.TracingOptions{ + Program: e.bpfObjects.KeplerReadPageTrace, + AttachType: ebpf.AttachTraceFEntry, + }) if err != nil { - return fmt.Errorf("failed to get kepler_read_page_trace: %v", err) - } else { - if _, err = page_read.AttachGeneric(); err != nil { - klog.Warningf("failed to attach fentry/mark_page_accessed: %v. Kepler will not collect page cache read events. This will affect the DRAM power model estimation on VMs.", err) - } else { - e.enabledSoftwareCounters[config.PageCacheHit] = struct{}{} - } + klog.Warningf("failed to attach fentry/mark_page_accessed: %v. Kepler will not collect page cache read events. This will affect the DRAM power model estimation on VMs.", err) + } else if !e.enabledSoftwareCounters.Has(config.PageCacheHit) { + e.enabledSoftwareCounters[config.PageCacheHit] = struct{}{} } + // Return early if hardware counters are not enabled if !config.ExposeHardwareCounterMetrics { klog.Infof("Hardware counter metrics are disabled") return nil } - // attach performance counter fd to BPF_PERF_EVENT_ARRAY - hardwareCounters := map[string]perfCounter{ - config.CPUCycle: {unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_CPU_CYCLES}, - // CPURefCycles aren't populated from the eBPF programs - // If this is a bug, we should fix that and bring this map back - // config.CPURefCycle: {unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_REF_CPU_CYCLES, true}, - config.CPUInstruction: {unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_INSTRUCTIONS}, - config.CacheMiss: {unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_CACHE_MISSES}, - } - cleanup := func() error { - unixClosePerfEvents(e.perfEventFds) - e.perfEventFds = []int{} + unixClosePerfEvents(e.cpuCyclesPerfEvents) + e.cpuCyclesPerfEvents = nil + unixClosePerfEvents(e.cpuInstructionsPerfEvents) + e.cpuInstructionsPerfEvents = nil + unixClosePerfEvents(e.cacheMissPerfEvents) + e.cacheMissPerfEvents = nil e.enabledHardwareCounters.Clear() return nil } - for arrayName, counter := range hardwareCounters { - bpfPerfArrayName := arrayName + bpfPerfArraySuffix - bpfMap, perfErr := e.module.GetMap(bpfPerfArrayName) - if perfErr != nil { - klog.Warningf("could not get ebpf map for perf event %s: %v\n", bpfPerfArrayName, perfErr) - cleanup() + // Create perf events and update each eBPF map + e.cpuCyclesPerfEvents, err = unixOpenPerfEvent(unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_CPU_CYCLES, numCPU) + if err != nil { + klog.Warning("Failed to open perf event for CPU cycles: ", err) + return cleanup() + } + e.enabledHardwareCounters[config.CPUCycle] = struct{}{} + + e.cpuInstructionsPerfEvents, err = unixOpenPerfEvent(unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_INSTRUCTIONS, numCPU) + if err != nil { + klog.Warning("Failed to open perf event for CPU instructions: ", err) + return cleanup() + } + e.enabledHardwareCounters[config.CPUInstruction] = struct{}{} + + e.cacheMissPerfEvents, err = unixOpenPerfEvent(unix.PERF_TYPE_HW_CACHE, unix.PERF_COUNT_HW_CACHE_MISSES, numCPU) + if err != nil { + klog.Warning("Failed to open perf event for cache misses: ", err) + return cleanup() + } + e.enabledHardwareCounters[config.CacheMiss] = struct{}{} + + for i, fd := range e.cpuCyclesPerfEvents { + if err := e.bpfObjects.CpuCyclesEventReader.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil { + klog.Warningf("Failed to update cpu_cycles_event_reader map: %v", err) + return cleanup() } - fds, perfErr := unixOpenPerfEvent(counter.EvType, counter.EvConfig, e.cpuCores) - if perfErr != nil { - klog.Warningf("could not attach perf event %s: %v. Are you using a VM?\n", bpfPerfArrayName, perfErr) - cleanup() + } + for i, fd := range e.cpuInstructionsPerfEvents { + if err := e.bpfObjects.CpuInstructionsEventReader.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil { + klog.Warningf("Failed to update cpu_instructions_event_reader map: %v", err) + return cleanup() } - for i, fd := range fds { - err = bpfMap.Update(unsafe.Pointer(&i), unsafe.Pointer(&fd)) - if err != nil { - klog.Warningf("failed to update bpf map: %v", err) - cleanup() - } + } + for i, fd := range e.cacheMissPerfEvents { + if err := e.bpfObjects.CacheMissEventReader.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil { + klog.Warningf("Failed to update cache_miss_event_reader map: %v", err) + return cleanup() } - e.perfEventFds = append(e.perfEventFds, fds...) - e.enabledHardwareCounters[arrayName] = struct{}{} } - klog.Infof("Successfully load eBPF module from libbpf object") return nil } func (e *exporter) Detach() { - unixClosePerfEvents(e.perfEventFds) - e.perfEventFds = []int{} - if e.module != nil { - e.module.Close() - e.module = nil + // Links + if e.schedSwitchLink != nil { + e.schedSwitchLink.Close() + e.schedSwitchLink = nil } -} -func (e *exporter) CollectProcesses() (processesData []ProcessBPFMetrics, err error) { - processesData = []ProcessBPFMetrics{} - if e.module == nil { - // nil error should be threw at attachment point, return empty data - return + if e.irqLink != nil { + e.irqLink.Close() + e.irqLink = nil } - var processes *bpf.BPFMap - processes, err = e.module.GetMap(TableProcessName) - if err != nil { - return - } - if e.ebpfBatchGetAndDelete { - processesData, err = e.libbpfCollectProcessBatchSingleHash(processes) - } else { - processesData, err = e.libbpfCollectProcessSingleHash(processes) + + if e.pageWriteLink != nil { + e.pageWriteLink.Close() + e.pageWriteLink = nil } - if err == nil { - return - } else { - e.ebpfBatchGetAndDelete = false - processesData, err = e.libbpfCollectProcessSingleHash(processes) + + if e.pageReadLink != nil { + e.pageReadLink.Close() + e.pageReadLink = nil } - return + + // Perf events + unixClosePerfEvents(e.cpuCyclesPerfEvents) + e.cpuCyclesPerfEvents = nil + unixClosePerfEvents(e.cpuInstructionsPerfEvents) + e.cpuInstructionsPerfEvents = nil + unixClosePerfEvents(e.cacheMissPerfEvents) + e.cacheMissPerfEvents = nil + + // Objects + e.bpfObjects.Close() } -func (e *exporter) CollectCPUFreq() (cpuFreqData map[int32]uint64, err error) { - cpuFreqData = make(map[int32]uint64) - var cpuFreq *bpf.BPFMap - cpuFreq, err = e.module.GetMap(TableCPUFreqName) - if err != nil { - return - } - // cpuFreqkeySize := int(unsafe.Sizeof(uint32Key)) - iterator := cpuFreq.Iterator() - var freq uint32 - // keySize := int(unsafe.Sizeof(freq)) - retry := 0 - next := iterator.Next() - for next { - keyBytes := iterator.Key() - cpu := int32(e.byteOrder.Uint32(keyBytes)) - data, getErr := cpuFreq.GetValue(unsafe.Pointer(&cpu)) - if getErr != nil { - retry += 1 - if retry > config.MaxLookupRetry { - klog.V(5).Infof("failed to get data: %v with max retry: %d \n", getErr, config.MaxLookupRetry) - next = iterator.Next() - retry = 0 - } - continue - } - getErr = binary.Read(bytes.NewReader(data), e.byteOrder, &freq) - if getErr != nil { - klog.V(5).Infof("failed to decode received data: %v\n", getErr) - next = iterator.Next() - retry = 0 - continue +func (e *exporter) CollectProcesses() ([]ProcessMetrics, error) { + start := time.Now() + // Get the max number of entries in the map + maxEntries := e.bpfObjects.Processes.MaxEntries() + total := 0 + deleteKeys := make([]uint32, maxEntries) + deleteValues := make([]ProcessMetrics, maxEntries) + var cursor ebpf.MapBatchCursor + for { + count, err := e.bpfObjects.Processes.BatchLookupAndDelete( + &cursor, + deleteKeys, + deleteValues, + &ebpf.BatchOptions{}, + ) + total += count + if errors.Is(err, ebpf.ErrKeyNotExist) { + break } - if retry > 0 { - klog.V(5).Infof("successfully get data with retry=%d \n", retry) + if err != nil { + return nil, fmt.Errorf("failed to batch lookup and delete: %v", err) } - cpuFreqData[cpu] = uint64(freq) - next = iterator.Next() - retry = 0 } - return + klog.V(5).Infof("collected %d process samples in %v", total, time.Since(start)) + return deleteValues[:total], nil +} + +func (e *exporter) CollectCPUFreq() (map[int32]uint64, error) { + return make(map[int32]uint64), nil } /////////////////////////////////////////////////////////////////////////// @@ -354,18 +281,18 @@ func unixOpenPerfEvent(typ, conf, cpuCores int) ([]int, error) { fds := []int{} for i := 0; i < cpuCores; i++ { cloexecFlags := unix.PERF_FLAG_FD_CLOEXEC - fd, err := unix.PerfEventOpen(sysAttr, -1, int(i), -1, cloexecFlags) + fd, err := unix.PerfEventOpen(sysAttr, -1, i, -1, cloexecFlags) if fd < 0 { return nil, fmt.Errorf("failed to open bpf perf event on cpu %d: %w", i, err) } - fds = append(fds, int(fd)) + fds = append(fds, fd) } return fds, nil } func unixClosePerfEvents(fds []int) { for _, fd := range fds { - unix.SetNonblock(fd, true) + _ = unix.SetNonblock(fd, true) unix.Close(fd) } } @@ -379,94 +306,3 @@ func getCPUCores() int { } return cores } - -func resizeArrayEntries(module *bpf.Module, name string, size int) error { - m, err := module.GetMap(name) - if err != nil { - return err - } - - if err = m.Resize(uint32(size)); err != nil { - return err - } - - if current := m.GetMaxEntries(); current != uint32(size) { - return fmt.Errorf("failed to resize map %s, expected %d, returned %d", name, size, current) - } - - return nil -} - -// for an unkown reason, the GetValueAndDeleteBatch never return the error (os.IsNotExist) that indicates the end of the table -// but it is not a big problem since we request all possible keys that the map can store in a single request -func (e *exporter) libbpfCollectProcessBatchSingleHash(processes *bpf.BPFMap) ([]ProcessBPFMetrics, error) { - start := time.Now() - var err error - entries := processes.MaxEntries() - keys := make([]uint32, entries) - nextKey := uint32(0) - - values, err := processes.GetValueAndDeleteBatch(unsafe.Pointer(&keys[0]), nil, unsafe.Pointer(&nextKey), uint32(entries)) - if err != nil { - // os.IsNotExist means we reached the end of the table - if !os.IsNotExist(err) { - klog.V(5).Infof("GetValueAndDeleteBatch failed: %v. A partial value might have been collected.", err) - } - } - - processesData := []ProcessBPFMetrics{} - for _, value := range values { - var ct ProcessBPFMetrics - if err := binary.Read(bytes.NewReader(value), e.byteOrder, &ct); err != nil { - klog.Warningf("failed to decode received data: %v\n", err) - continue - } - - if ct != e.emptyct { - processesData = append(processesData, ct) - } - } - klog.V(5).Infof("successfully get data with batch get and delete with %d pids in %v", len(processesData), time.Since(start)) - return processesData, err -} - -func (e *exporter) libbpfCollectProcessSingleHash(processes *bpf.BPFMap) (processesData []ProcessBPFMetrics, err error) { - iterator := processes.Iterator() - var ct ProcessBPFMetrics - keys := []uint32{} - retry := 0 - next := iterator.Next() - for next { - keyBytes := iterator.Key() - key := e.byteOrder.Uint32(keyBytes) - data, getErr := processes.GetValue(unsafe.Pointer(&key)) - if getErr != nil { - retry += 1 - if retry > config.MaxLookupRetry { - klog.V(5).Infof("failed to get data: %v with max retry: %d \n", getErr, config.MaxLookupRetry) - next = iterator.Next() - retry = 0 - } - continue - } - getErr = binary.Read(bytes.NewReader(data), e.byteOrder, &ct) - if getErr != nil { - klog.V(5).Infof("failed to decode received data: %v\n", getErr) - next = iterator.Next() - retry = 0 - continue - } - if retry > 0 { - klog.V(5).Infof("successfully get data with retry=%d \n", retry) - } - processesData = append(processesData, ct) - keys = append(keys, key) - next = iterator.Next() - retry = 0 - } - for _, key := range keys { - // TODO delete keys in batch - processes.DeleteKey(unsafe.Pointer(&key)) - } - return -} diff --git a/pkg/bpf/exporter_stub.go b/pkg/bpf/exporter_stub.go index ab7fc7291a..a515107a48 100644 --- a/pkg/bpf/exporter_stub.go +++ b/pkg/bpf/exporter_stub.go @@ -37,7 +37,7 @@ func (a *stubAttacher) SupportedMetrics() SupportedMetrics { func (a *stubAttacher) Detach() { } -func (a *stubAttacher) CollectProcesses() (processesData []ProcessBPFMetrics, err error) { +func (a *stubAttacher) CollectProcesses() (processesData []ProcessMetrics, err error) { return nil, nil } diff --git a/pkg/bpf/gen.go b/pkg/bpf/gen.go new file mode 100644 index 0000000000..3925d5dfcd --- /dev/null +++ b/pkg/bpf/gen.go @@ -0,0 +1,3 @@ +package bpf + +//go:generate go run github.com/cilium/ebpf/cmd/bpf2go@v0.15.0 kepler ./src/kepler.bpf.c -- -I./include diff --git a/bpfassets/libbpf/include/bpf/LICENSE.BSD-2-Clause b/pkg/bpf/include/bpf/LICENSE.BSD-2-Clause similarity index 100% rename from bpfassets/libbpf/include/bpf/LICENSE.BSD-2-Clause rename to pkg/bpf/include/bpf/LICENSE.BSD-2-Clause diff --git a/bpfassets/libbpf/include/bpf/bpf_helper_defs.h b/pkg/bpf/include/bpf/bpf_helper_defs.h similarity index 100% rename from bpfassets/libbpf/include/bpf/bpf_helper_defs.h rename to pkg/bpf/include/bpf/bpf_helper_defs.h diff --git a/bpfassets/libbpf/include/bpf/bpf_helpers.h b/pkg/bpf/include/bpf/bpf_helpers.h similarity index 100% rename from bpfassets/libbpf/include/bpf/bpf_helpers.h rename to pkg/bpf/include/bpf/bpf_helpers.h diff --git a/pkg/bpf/kepler_bpfeb.go b/pkg/bpf/kepler_bpfeb.go new file mode 100644 index 0000000000..01c013ae63 --- /dev/null +++ b/pkg/bpf/kepler_bpfeb.go @@ -0,0 +1,168 @@ +// Code generated by bpf2go; DO NOT EDIT. +//go:build mips || mips64 || ppc64 || s390x + +package bpf + +import ( + "bytes" + _ "embed" + "fmt" + "io" + + "github.com/cilium/ebpf" +) + +type keplerProcessMetricsT struct { + CgroupId uint64 + Pid uint64 + ProcessRunTime uint64 + CpuCycles uint64 + CpuInstr uint64 + CacheMiss uint64 + PageCacheHit uint64 + VecNr [10]uint16 + Pad uint32 + Comm [16]int8 +} + +// loadKepler returns the embedded CollectionSpec for kepler. +func loadKepler() (*ebpf.CollectionSpec, error) { + reader := bytes.NewReader(_KeplerBytes) + spec, err := ebpf.LoadCollectionSpecFromReader(reader) + if err != nil { + return nil, fmt.Errorf("can't load kepler: %w", err) + } + + return spec, err +} + +// loadKeplerObjects loads kepler and converts it into a struct. +// +// The following types are suitable as obj argument: +// +// *keplerObjects +// *keplerPrograms +// *keplerMaps +// +// See ebpf.CollectionSpec.LoadAndAssign documentation for details. +func loadKeplerObjects(obj interface{}, opts *ebpf.CollectionOptions) error { + spec, err := loadKepler() + if err != nil { + return err + } + + return spec.LoadAndAssign(obj, opts) +} + +// keplerSpecs contains maps and programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type keplerSpecs struct { + keplerProgramSpecs + keplerMapSpecs +} + +// keplerSpecs contains programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type keplerProgramSpecs struct { + KeplerIrqTrace *ebpf.ProgramSpec `ebpf:"kepler_irq_trace"` + KeplerReadPageTrace *ebpf.ProgramSpec `ebpf:"kepler_read_page_trace"` + KeplerSchedSwitchTrace *ebpf.ProgramSpec `ebpf:"kepler_sched_switch_trace"` + KeplerWritePageTrace *ebpf.ProgramSpec `ebpf:"kepler_write_page_trace"` +} + +// keplerMapSpecs contains maps before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type keplerMapSpecs struct { + CacheMiss *ebpf.MapSpec `ebpf:"cache_miss"` + CacheMissEventReader *ebpf.MapSpec `ebpf:"cache_miss_event_reader"` + CpuCycles *ebpf.MapSpec `ebpf:"cpu_cycles"` + CpuCyclesEventReader *ebpf.MapSpec `ebpf:"cpu_cycles_event_reader"` + CpuFreqArray *ebpf.MapSpec `ebpf:"cpu_freq_array"` + CpuInstructions *ebpf.MapSpec `ebpf:"cpu_instructions"` + CpuInstructionsEventReader *ebpf.MapSpec `ebpf:"cpu_instructions_event_reader"` + PidTgidMap *ebpf.MapSpec `ebpf:"pid_tgid_map"` + PidTimeMap *ebpf.MapSpec `ebpf:"pid_time_map"` + Processes *ebpf.MapSpec `ebpf:"processes"` +} + +// keplerObjects contains all objects after they have been loaded into the kernel. +// +// It can be passed to loadKeplerObjects or ebpf.CollectionSpec.LoadAndAssign. +type keplerObjects struct { + keplerPrograms + keplerMaps +} + +func (o *keplerObjects) Close() error { + return _KeplerClose( + &o.keplerPrograms, + &o.keplerMaps, + ) +} + +// keplerMaps contains all maps after they have been loaded into the kernel. +// +// It can be passed to loadKeplerObjects or ebpf.CollectionSpec.LoadAndAssign. +type keplerMaps struct { + CacheMiss *ebpf.Map `ebpf:"cache_miss"` + CacheMissEventReader *ebpf.Map `ebpf:"cache_miss_event_reader"` + CpuCycles *ebpf.Map `ebpf:"cpu_cycles"` + CpuCyclesEventReader *ebpf.Map `ebpf:"cpu_cycles_event_reader"` + CpuFreqArray *ebpf.Map `ebpf:"cpu_freq_array"` + CpuInstructions *ebpf.Map `ebpf:"cpu_instructions"` + CpuInstructionsEventReader *ebpf.Map `ebpf:"cpu_instructions_event_reader"` + PidTgidMap *ebpf.Map `ebpf:"pid_tgid_map"` + PidTimeMap *ebpf.Map `ebpf:"pid_time_map"` + Processes *ebpf.Map `ebpf:"processes"` +} + +func (m *keplerMaps) Close() error { + return _KeplerClose( + m.CacheMiss, + m.CacheMissEventReader, + m.CpuCycles, + m.CpuCyclesEventReader, + m.CpuFreqArray, + m.CpuInstructions, + m.CpuInstructionsEventReader, + m.PidTgidMap, + m.PidTimeMap, + m.Processes, + ) +} + +// keplerPrograms contains all programs after they have been loaded into the kernel. +// +// It can be passed to loadKeplerObjects or ebpf.CollectionSpec.LoadAndAssign. +type keplerPrograms struct { + KeplerIrqTrace *ebpf.Program `ebpf:"kepler_irq_trace"` + KeplerReadPageTrace *ebpf.Program `ebpf:"kepler_read_page_trace"` + KeplerSchedSwitchTrace *ebpf.Program `ebpf:"kepler_sched_switch_trace"` + KeplerWritePageTrace *ebpf.Program `ebpf:"kepler_write_page_trace"` +} + +func (p *keplerPrograms) Close() error { + return _KeplerClose( + p.KeplerIrqTrace, + p.KeplerReadPageTrace, + p.KeplerSchedSwitchTrace, + p.KeplerWritePageTrace, + ) +} + +func _KeplerClose(closers ...io.Closer) error { + for _, closer := range closers { + if err := closer.Close(); err != nil { + return err + } + } + return nil +} + +// Do not access this directly. +// +//go:embed kepler_bpfeb.o +var _KeplerBytes []byte diff --git a/pkg/bpf/kepler_bpfeb.o b/pkg/bpf/kepler_bpfeb.o new file mode 100644 index 0000000000000000000000000000000000000000..3b3496f8e63ac7c21883ab00da8934afabc79087 GIT binary patch literal 15776 zcmeHOTWnm%d7g82mlsL4*v1#yiQ^+*WJ;pQrQ{3kSn5iVWTuu4Mi;$x}9imbD*t8Ui0@2|b8@$G6*<2TmMu@*fY zi&`3TZ?Anq`a+ez&~st!g48vs&$B*lXU49t--~D7w?rXlOIp-x;#i;H<}+A>O($mV?Fy#nICWcn&!uCGC#7^ zj(lu4HReY~V{iU>aeQiiaR2E%c=K#s{B|B=+_uv0thT;xd|LI3%|2$FWH|D#} z7VhUr!(jL4%vxjblarCbc3pWyk7MkbKi{*zq5G@T=6;Iam3kp*^iB+@o7h^{Z=3Xs zbRE4b^~4?(BSr4#wcKxNzDnpJh8`nli1^LkW13cO_D-|Do$OL~lH=v|Ro@RMuljzv zwWxH4e7VKQ*m2}KT(wI^auwLgqJ1y5$+;@5eMBnh- zTWhOSg!-D)D^g#PdRgkrQZGq;N$QJIU(j_;)`>f19eu;BAB%c)F;9?X>?_w~-gb!} zU5npjZ}2l_U-0s@?mn^iwd5%fJ)xI}b@%h)!`h7U`z;u<`eUgVW&LpSuJfE7RUH)Z z?6l1HtxB~~{{9Sa3DtQzL!^SOnyOTx%k1OK3+ZP9gMEK|KW@OjzCSTflQdxMV18=+ ze`SZhARX^071Q(1;Q#;qRWopCIy)X6JbtMEiQ%Kv@r&;UtYo$7=uwK-7Cnvh0Sbu+ zKhRP*W1)`NK~EMol-@^q_UgoyuySZexXdPN=Jp->hQQPU`7UX*y0=B2P3*tkPL*UH z_GmN0#rY{J)I(_V{z>6_RN7V4zlDqbE#Z1nFYrojMqBrjI7xm+{Ni5x3=GhR3@)xZ z=FU^9BJC9F8EIEhzas4=)UTPgR{tGor%-=i+EvtC7jJ_LOQ;ud;Z6@TsNcbb@d&G^ ze=PoRH~vRyb0>OF0wyVi>u18_UiWipV@Xk~BF7N5|4u%+A2NrGaZThn&bqrJce}{x z{^-Cy5w7dyoo9n8E+_9nT=ISg=-Qm*{YLbDegnNXi{3AIdhZ6_VE$mF_;)X`8~1kLjr`N@c=qn6EQ&wB-F%2W^@`Zz z2}RrUblr&Wdqvld7P@Z0ui&eqYgY?hH{#bR(Y3pUt{d=6eMfZl-=Hq8GgVxi<94iH z5M2YHxnunx;42shxnF}n0bkF1(7%f|*WVP{Tpu{bOKAT!YR-ifw10>iClaa*-o+2( zgA}fRLL2g2LxP{84f$XN*Hu{qh<~mffo;?&$nfA!5_$iQdN1k^w;0+Zfu z{!sFM(2zE-$>Yl*oFUh+pKWgDZ|N5P5#c92{$Ak^3ICAr8H!Hl2J0izy5(7sd(y~l z$zdar^sC=CzHuqpG6uSYq7a@g5q{@{M7VacffC{BWdkL` z)yD=(gp2DmON46=8z>Qcn1l~Dj6<4g*7hT4!$*A$q2@T#=NFw=j)KN=lPXMBR62d& z*s%l8rjLK+YlqV(o*h1%POIs?dqC&!^klhkdNMx}QM58!%&Rl`S@nE&d^)ctvNP%Y zWTjNdmsPPalCGS_&qTJUilwPszFbaE2z*3Hx5N?7ADJ;lFDUsWBK$%p$zTW)A_W)V}*)(KA%fZmQ=c!9Z|Wdi3v6R@PqUh zzjc4drzTHF{87^b50i=CI3eh5&DL2n~GpFup@|Vu+&Lbg7iCl%stSG^WpR0;EspE9uE{&yFvt z&PHI6K(F2V8x+e%h+y_$g6?QYfC#=#K@;B+!Pf)G-ZW0L<%uFPsweDJ2{TkXA#qTq zm@kdW;OS`}<LUCd?bC~>1S!yMdhVDBRw*MqbC2(c`lEql zGAZ-#_<^T}j~-4RJ8^wVE?`U&HO$t5L=qC7Nxpg=R$2*{s3t zn^x0$RlD~$R-S0rKCNOSDZ3HdjrMx7da2;XdtIUWywp;5+BB^T-n6l6FOnv+qG^G! zd6n#NMWz_HT+7lcvhE8Cfcr!50eQ?`ag>88 z(`Q@VhE-z*r+%*4!2LQ;`;X!A4n@&q{;b`3(0z`h*k>ZFTk8dpNp{o16G6{*r!)43 ziHZ6)<1_TGQ@_F8ky0-zz4hA#E_*gzo*JzbO3(3xWseGg>G}x)pkHM4(Qnp057Z^? zF`$KZdl+zr)Q`~)qfQ;1L4Nc`k0wUMOXX)CQE0UT>Z{ncFG@z-7GdjgtXbR-J(=D6 z!Nj@q>!`kg=^qWil~TSkU7C!r!pWgXZ{+&CYLYgZpD9%OC$gn8IIiLpo6YfPoFB0} za?_;P&}#GHch|1Esk5a5)@sKVj&PcGDw{jgXr)IArOIr2bZQ*OMrUPI$FmbS3`WOz zhWzsI$@H&T&v65fmJK^2=Ppnl@# zRP1|@)9>&sZUc^l|C)+j;(NLt!GEJ-Kjb?$_@nz1^W9`h@Ovt@ME;P2$sZQ{p^Cjj zTr@a#2VMd2UCKGZ`&IBuK1 z;lBja?ciVbu=v~d=PLFQ@)5r#{3X0i!FwLQKBRwb#Na171piFMx4>Q^GPu1{#cx4= z6Dh$*RJLmSDt?0fR}H@-rQ#KgPg_#(OPG3?4{d{j|3Za7!Ftpt{&xIGh09pK+J=Nr zjPYxG#?zM({4Xl}0N+L0#Q)CwRk(tDv{eniGo`|-$Vb~b!Dm#sdMof12Q#lLg8xv3 zAJLy{9$(_=l>SJm9*@rN;jI|vM|-#6l(EOZbmDk*{+o)$$rpdEKkZ`;{w?Is8;n~k z{wfwrz1IM+P|v1ADt?KeL-4m$d>MJvdkp!+@SAf(%fF@K?~~tSFj}UFUz-+G{0jG# zA(8)275{+W2c9u_^Ia;w0>6~_sc{5c75+gLzlw3ydmxj6qV0Z5CAQFR!_UMP*DuSD zM1Dafy6DHC=zUQoZo&Ael;A&7iSB!VtzQRK;tRC944P;6kZ1RhXIJy1hTS30?orR~ zuxFR+YU_ezF<)Ncx^L-0t-%cY9s00CpKH*=^A4S_uSI`|T-Xvm(4faISt?@W`*D$v z6&-r6LFbxf3->E4A9wvJJM@f|#{T!;n54|FTjH>LHwa>vYm_Y^|3X6OT-$63zuutp z9fowFxA`G`5+7pzkFV?WSz%58+L<8IJUk3%x@|ZFwdxpYof(79=;4*|Bj5g z-wn*Ov&i#I)`XXVdDa$wtOsr0FBYHoFy*5LpJ$RLJO+%t&-Rbk(B9_YYiK+C1nqH6 z3vi+EOrY+?CAh5%?a09yv>iV8v?fgZjz0bE$dT8&u7lgh(SI8*xj))p7JU}a1GB%y z-$pxf@CCFvUY7p@v^icDFQLuxvG^xwb9^kmf;Pv);;U$LJS<*Cn>iEQ(SbJe8N-Fr z!TrOHf;YQ+HU`EV=zhdrF}aM8n656^qJ=HZJTUh?p=hgUqj+KfX$H-5c` zI{6PHkDv5#%EQAR&Um=!;i`w{JzVqfMGr4|c-g}%9$sz6`ZjRlnujlXc*(=d9$xYAYBScir4xU=%i~8LPI@@y;b9MFJY4i})x+~1 zu6g*PhnGCO?BNv;uQp@7O*`=?x;%d5;iQLC9v=2^#=}JqS3NxM;hKjpdU(mh%N}0w z@M<&W6INe`zyJnZ3&hl?JrdU)Q$+#kDe$^OxH$-|dD%>B@nzvkhO zn{hk$Lr0#^z)iTv!-F0k26pzl_Kb&%9te?1V7>pl znDM%p@oIc;Uw?8-_Z_{vcMjZl|MmxZcI?5+XngE0_OJh*!OvjMUuN`G@W&Ney6@nL zBYgzU-=#Qjx9gwJG0~*$LM8TG8}XgPl2uP2WI{9z2*#KdN*!Flg$BOtnYq0jVDpveW~* z9!T#cd2f0Tt39Slrtz0kTIHSrQ}0a*mU*ka1JzLn{E#u#A?c-IAE{^@+;ibF(I4NCrS69q^C7V!@?@DBnNQZ)%s0(M*f zaG`$R%$eQU;Zm}y0{zn?-8=K0bLN~g@45FbfB$gzk$5boEXCA+sw%QvrF!>P^C_K8 zsSVJadD!s3f@B%4(u(mrze45HV$gLr9FjE_&7_IO9e$E*KK<0CEO! z+CG_YnRs10^~Ql6#~ajIQ_sf5{j*d#Og%O6_PCiZU8dZVX_InZa4PKVG84kilT&en z?YiQu--_tlq?1W_>#q+o%O@yS`axzE<1S%%=^@Kc&n0@y>|fs{R_S zHxV=I=)@JXUaXaPC*mzyzE;+^QebeYMf78Vk^g~CRH?<_iCCLvOk6SJ%i1fo7`|Jd zOQ?@KZ_2>eWL)_1`|A?x!5U;%n0aym`9Nctk5>Rd_gYs^DboS8*MqC{>Vh6;rB`oD zIxp!>N#`WJA?d87*Cm~nRMv|XW`3mKl67?Bl$OuD#pn=U4C9jZM*pyd83)sucW?tO zM|*HzaNDWgJhAhd?F5sR_1kUldh7j6M!Qjdxod{4{#eo(SwE`nZt9cnHyf(!=WhGh z24y<3t21mD^3S#y?8oEB@yY$0`8g3w*7zy482o?bhrzsy z2H_g@|McK3aA+(&6dXKpsPo01Zd&{9uVear#(W!wEU&MuBGMFRvva&L;SngPK&5+x z%P099v&(RSte!zQE^%Z(Vo(#04UbEnGi0;LFDkRb@OiKCqsm4ApNRljj!ubBeFk~P zkizx6cFnxn{nC3)IA&n0dsI=tcS}F0MVKA};1rxW*-4 zM*20$&m#SX6=OjY7XR<|d2L-rfnSueb34uryI112P`_b@Wl zv-STZ?aq@y8JFAc31f8)$ANW#tn8-R$36S=Q4}cSa_z1WyN`kI+I3*W8x6T;(o3vi`y`QvR9c(b@3mH0Lz@5Z+USg0mG8&ZcI~0QYmf8b3CL;hvlJSlJ58?cK5$;fLQLKLOUSQ_JSB1@ zD?V4vH`DM3T+Gpj2X%t=%;0s0fN?~;v(oHm6?v<{>yC&#>B)DC{HVweiJZO=t0g^e z?fS?y@3iQ>ggo^!z)1Cj-S0>#%tbW_KAK4LO%bI6@XeEr$;%$8`Aa;oe?M}Yk z$#*#Uq?6y}(72>{x#^^x^x4RSZ9?TDXK6$aQygz)8UEF zfAMg5@^sJPFjQl^cY)9Q*hnEeFcS3zR4q>AB6T*JQ0LP_W04w8kB8AnF`tbJDwpjG zivzd~r*kToAI(ICLO2{jm??xsl^Mv7j^)B^9~CR|{MblX%nnB?lN$>&6PY0xVm3Qc zDCSirof(Y6;cNlc(*sdx@L;y6&PSPWB(K6;x=&?BhlkbJo@W^^?v;3lMn?t$-qcvv z9tv@<2n&Owc?30>&f8dQ6%dWCLN%f=I*&?*`6%5NAt)Q(Sf-dA9Vy&h>eOMeVt+n5 z7pC+1^n@y)rhQ@ILbjM0MD6=WQDZh!bUxJe(>c>}VCC{zC{S1^ri+mpiN=dYPv@v- zGFp!vqjW=LisR_L+)$JcYaABy=}e@GxlYYBcL_!n@|jLkrsGVmzazsT6D(<@4gH3O z32Mx<;ML}jPpAU;aLuVa3Cs%BZanYpHNG%?YYKFk#RyZB|Nd*%Hve{$RUh!BcZz zG*nE(8AUs*A{4fM#&Ufaa7+3@by2oGIM{O}JaFtZ-I_Mi6T04vxm!!w(4?{I*3&iY z)^^E&>lLUUc4hUcnYj-XaK+@kLtUg=CrI5um5H>deJ~&N>dFy<(pnq5n!!*kYzs>0 zCEH#Y0#T)d?_xktBhFlo&Gb+v)MLjCuwnt#Gjn`f&>3_klSvtWCl0*a(|tHRe&FO` zjJ>3B&4DR{yJlIcYBbjq@7&4JRZoJD97p0>*IqA|!M=UL$pa@|3Xh*UcI@-VUNljd zw!{Xu)M(RiDnyI!ZK>nTx-GZo7U1|Q#a;-?O$hs^z z(9`R_6{3HQS<`ox9qiv9?B3-HZL2QP4VWK#4~Q^&B~W&z44+LEAJ&ZNoXWYTPWTm> zjvw9Q9g2dH=z`sOP`t!m>@YQ~cxwlfjqElHPXujSoWj@}hKDQLj4#l;PGy3-J*8e# zdh53fT>3&-80{}+^XGWNvPT8LuyR5G=oB4&^qX+c0~Jks3|PXtJq$Qq>etbBqi#9` zgZ9xIJ+kN#uPHxqkHV_$P~XI+eL*tdwg_J@V9nxw=;`d-4pE?Zv&*Jf~{> z0`7h0SyN*^*Uvfl05G3}X(OXG@a_`%XZcV#*`Vzc^Rsiy_Zsvwy2Xva0n~`UfouKs z2K^jI{MWc1{!xQsJQlA6P6_@w<+Ba?S&VYxmB2kB|A6@229*>12p;O5n2)JTg7*VI z{+pPZ5{yLCRorQ>0QrjBF?Cb$H-Q@$V(PZww<-TPrr#w}pSTg24QVmuO@jXh^3`~2 z&?flTz-!Afm2_|`@LmTu0;dG8#>3{?H)5(+unrctalvhn$MI(4vfwkooA<`mlwe)| z_PBnp%J?kZ5V@`|It^9S@$34&5LX`w*7ZG#g7|Oi*9hF=;1$69UP<|%!2jy*xJnAP z^+iZpPR#nI9L)OC1q(ehzK~4jfE*o8V*cw;F@^s9;;)9tX3&uQ-_X?Gp>f_1h1pgi6tNsGxQ}9Q?jnf#Pf^B>Ar3LF}sY!5CBi?&lk138L%Bip86Z|~n zJAN2bgM!ZjH~ywUUH0&l;MXBv4(B&KJSX_iAaDJ!LERQS3%rrRA#VC7ZUt@<{7c}* zRS6XcZpIq6c6~yn1Rnuj-kMN7f=>fKl1!+a;I9HV?@g$2!EXaUeJG(O1^*Oy`wIzm z)ss&P{&&bXcPG>h!5gt~K7BHw76hk&Lk2|f$FVIi(=I(Reiyx>2C zeC3C6b=#9KIGE)X>y6}l&~L!MR>3LpmlXW3lqVWguO}aLNq+q`2i@ll9dkwOh z=;si@b@V$D;@<;ajX}db#^RO0eAi3-4(0D9D*Fc5nv%Fvehu>VHxre8;y-{NeLtbU zV^RJ%_Ogcggc=lm;sCfT_#klnRzmNA2wjtnWA&>vY)*M zyN&CYsg%h72>7YiWtF(J-^Z5ey>^k`L!MvY8o$6bevf*7yFI@>p5K!g)OR6{1D{&a zmK-5d*e_h_6n>A#FA4v$POByKi)+|<#p7=keH;;LXut4t9zP>|u2(L#2*1bU7lhyI z@&5~P#}qn8$cmp7eXdU~?GZlLHJ835e2#yY&RTuX4s{y17>n~Gfk5AaZ$?6fd zNYHZo4Yx+xpuhgsu++<1zj+D$YCG6c|03;Ut7}t!Yhk-_9+ND81pJ2(sii3i`7gm( zhZ9n-2m(omk&<5)97sBel>8~dfuzTgl0PdrkaQd=`SXGUNqN2{AHix0Orm~_l>C(7 zKvMqBWSzoGe<52hetXt-gda$n5x&JX{jzR{YX7o6B+j3QoPOlGM7pH;SY(AP|AdZ* zU+E3$Zx6`wn+f9yASVU-uGCdY$)*m^rz9r)Zp!l{$zq-lE#|jmGtZ5j-(tq~c&Xw! z2lLw{&qpMh;kRd=XGj+F{9O#`hgZ9spl; zF#GF=4rYJNI+*?QGY7MOZaSF#am&H%j|B&}0XH?_24c6cz1Mp<@bF&2jDIVxc3dQ4 zwh`wSiMR!q{YFO2-)#IYE)p?+r;#H7+45zLs@TpGE1&Y@(;lAn@SKO|J-pyyT?z24 zzZO=?!GVX99!`1K?t|7|uP4uWxa{Fc4^Me`+QYLRp7Zd$hZj7oWIo$?&HhDK^6<2WXFWXU;du`)c-U@LmHNwiWpUu)q=!=;?(uN1hwVAW z+An+ZNe@qXc-q6W9-i~?yoVP&tOvd`-dh-$g98sIJ)H8eJ$I4Kq1Nlka~>{xc+$gD z9-j8_tcT}3Jn!KJ58ERc*^C!Qcuf`u9!`2V6@$&w6;y z!}A_q@UW8o(Z*}{Pm2Rj&gWl}m8U%1Z64n1;T{Kbzw7mI&ckI7yXOOzpYr6>9=_pV?x*hlPkh^x zf8=4i|FeG7x9^E9=I>xh7H{t8Os;BuqSO$f3ade zndqOdNcHaZHK6XxdnsGz#~>X;L+6LXbYEW{KML~S=U3nP%a@SK=OFuOq~=#4=Ie;4 zF8Y$O#K#ndw&Vwu4t%>Trq8I3!o)CBe66VYs=>nS#F}p)-3JdQ!{?Pwy7rp%GbY(3 zX_utW3R}`Hopy!0$=)6AVzSF5$q>Js(k6FxnRItju%ul{GeRHi{vH;4*$Qh6{enL% z1idcj1cS`pVDn73;o^QF%L!SUQj+=!=aWkT3g4}~T?cF#nU)r+$55D0i>V67WX5-x zG)3be&T-b$mgkyC;xiB1sYADambJVLlkDHDMyFd+tU6E!lxPj*we;u`=7u6t>=G`56vL^+-s284{e@l zC$4(_Z|*c&3o?)BpMS^T``-iS4c&J}Au8jl&<$CCcouNUkM(^p{INs*sja5eP3ahx N3`foU$Zqfh% literal 0 HcmV?d00001 diff --git a/bpfassets/libbpf/src/kepler.bpf.c b/pkg/bpf/src/kepler.bpf.c similarity index 100% rename from bpfassets/libbpf/src/kepler.bpf.c rename to pkg/bpf/src/kepler.bpf.c diff --git a/bpfassets/libbpf/src/kepler.bpf.h b/pkg/bpf/src/kepler.bpf.h similarity index 99% rename from bpfassets/libbpf/src/kepler.bpf.h rename to pkg/bpf/src/kepler.bpf.h index bfc015b130..d9f2377540 100644 --- a/bpfassets/libbpf/src/kepler.bpf.h +++ b/pkg/bpf/src/kepler.bpf.h @@ -98,7 +98,7 @@ typedef struct process_metrics_t { u64 cache_miss; u64 page_cache_hit; u16 vec_nr[10]; - u32 pad; + u32 _pad; char comm[16]; } process_metrics_t; diff --git a/pkg/bpf/test_utils.go b/pkg/bpf/test_utils.go index 98ae591247..de1ca477cd 100644 --- a/pkg/bpf/test_utils.go +++ b/pkg/bpf/test_utils.go @@ -45,18 +45,19 @@ func (m *mockExporter) SupportedMetrics() SupportedMetrics { func (m *mockExporter) Detach() {} -func (m *mockExporter) CollectProcesses() ([]ProcessBPFMetrics, error) { - return []ProcessBPFMetrics{ +func (m *mockExporter) CollectProcesses() ([]ProcessMetrics, error) { + return []ProcessMetrics{ { - CGroupID: 0, - PID: 0, + CgroupId: 0, + Pid: 0, ProcessRunTime: 0, - CPUCycles: 0, - CPUInstr: 0, - CacheMisses: 0, + CpuCycles: 0, + CpuInstr: 0, + CacheMiss: 0, PageCacheHit: 0, - VecNR: [10]uint16{}, - Command: [16]byte{}, + VecNr: [10]uint16{}, + Pad: 0, + Comm: [16]int8{}, }, }, nil } diff --git a/pkg/bpf/types.go b/pkg/bpf/types.go index 2a3a3b5bd1..28224df822 100644 --- a/pkg/bpf/types.go +++ b/pkg/bpf/types.go @@ -17,7 +17,6 @@ limitations under the License. package bpf import ( - "github.com/sustainable-computing-io/kepler/pkg/config" "k8s.io/apimachinery/pkg/util/sets" ) @@ -31,10 +30,12 @@ const ( IRQBlock = 4 ) +type ProcessMetrics = keplerProcessMetricsT + type Exporter interface { SupportedMetrics() SupportedMetrics Detach() - CollectProcesses() ([]ProcessBPFMetrics, error) + CollectProcesses() ([]ProcessMetrics, error) CollectCPUFreq() (map[int32]uint64, error) } @@ -42,16 +43,3 @@ type SupportedMetrics struct { HardwareCounters sets.Set[string] SoftwareCounters sets.Set[string] } - -// must be in sync with bpf program -type ProcessBPFMetrics struct { - CGroupID uint64 - PID uint64 /* TGID of the threads, i.e. user space pid */ - ProcessRunTime uint64 /* in ms */ - CPUCycles uint64 - CPUInstr uint64 - CacheMisses uint64 - PageCacheHit uint64 - VecNR [config.MaxIRQ]uint16 // irq counter, 10 is the max number of irq vectors - Command [16]byte -} diff --git a/pkg/collector/resourceutilization/bpf/process_bpf_collector.go b/pkg/collector/resourceutilization/bpf/process_bpf_collector.go index 08be0c086f..5a14ee4150 100644 --- a/pkg/collector/resourceutilization/bpf/process_bpf_collector.go +++ b/pkg/collector/resourceutilization/bpf/process_bpf_collector.go @@ -30,7 +30,7 @@ import ( "k8s.io/klog/v2" ) -type ProcessBPFMetrics = bpf.ProcessBPFMetrics +type ProcessBPFMetrics = bpf.ProcessMetrics // update software counter metrics func updateSWCounters(key uint64, ct *ProcessBPFMetrics, processStats map[uint64]*stats.ProcessStats, bpfSupportedMetrics bpf.SupportedMetrics) { @@ -43,11 +43,11 @@ func updateSWCounters(key uint64, ct *ProcessBPFMetrics, processStats map[uint64 case config.PageCacheHit: processStats[key].ResourceUsage[config.PageCacheHit].AddDeltaStat(utils.GenericSocketID, ct.PageCacheHit/(1000*1000)) case config.IRQNetTXLabel: - processStats[key].ResourceUsage[config.IRQNetTXLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNR[bpf.IRQNetTX])) + processStats[key].ResourceUsage[config.IRQNetTXLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNr[bpf.IRQNetTX])) case config.IRQNetRXLabel: - processStats[key].ResourceUsage[config.IRQNetRXLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNR[bpf.IRQNetRX])) + processStats[key].ResourceUsage[config.IRQNetRXLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNr[bpf.IRQNetRX])) case config.IRQBlockLabel: - processStats[key].ResourceUsage[config.IRQBlockLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNR[bpf.IRQBlock])) + processStats[key].ResourceUsage[config.IRQBlockLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNr[bpf.IRQBlock])) default: klog.Errorf("counter %s is not supported\n", counterKey) } @@ -61,16 +61,16 @@ func updateHWCounters(key uint64, ct *ProcessBPFMetrics, processStats map[uint64 var event string switch counterKey { case config.CPUCycle: - val = ct.CPUCycles + val = ct.CpuCycles event = config.CPUCycle case config.CPURefCycle: - val = ct.CPUCycles + val = ct.CpuCycles event = config.CPURefCycle case config.CPUInstruction: - val = ct.CPUInstr + val = ct.CpuInstr event = config.CPUInstruction case config.CacheMiss: - val = ct.CacheMisses + val = ct.CacheMiss event = config.CacheMiss default: klog.Errorf("counter %s is not supported\n", counterKey) @@ -87,34 +87,34 @@ func UpdateProcessBPFMetrics(bpfExporter bpf.Exporter, processStats map[uint64]* return } for _, ct := range processesData { - comm := C.GoString((*C.char)(unsafe.Pointer(&ct.Command))) + comm := C.GoString((*C.char)(unsafe.Pointer(&ct.Comm))) - if ct.PID != 0 { - klog.V(6).Infof("process %s (pid=%d, cgroup=%d) has %d cpu time %d CPU cycles, %d instructions, %d cache misses, %d page cache hits", - comm, ct.PID, ct.CGroupID, ct.ProcessRunTime, ct.CPUCycles, ct.CPUInstr, ct.CacheMisses, ct.PageCacheHit) + if ct.Pid != 0 { + klog.V(6).Infof("process %s (pid=%d, cgroup=%d) has %d CPU cycles, %d instructions, %d cache misses, %d page cache hits", + comm, ct.Pid, ct.CgroupId, ct.CpuCycles, ct.CpuInstr, ct.CacheMiss, ct.PageCacheHit) } // skip process without resource utilization - if ct.ProcessRunTime == 0 && ct.CacheMisses == 0 && ct.PageCacheHit == 0 { + if ct.CacheMiss == 0 && ct.PageCacheHit == 0 { continue } // if the pid is within a container, it will have a container ID - containerID, err := cgroup.GetContainerID(ct.CGroupID, ct.PID, config.EnabledEBPFCgroupID) + containerID, err := cgroup.GetContainerID(ct.CgroupId, ct.Pid, config.EnabledEBPFCgroupID) if err != nil { - klog.V(6).Infof("failed to resolve container for PID %v (command=%s): %v, set containerID=%s", ct.PID, comm, err, utils.SystemProcessName) + klog.V(6).Infof("failed to resolve container for PID %v (command=%s): %v, set containerID=%s", ct.Pid, comm, err, utils.SystemProcessName) } // if the pid is within a VM, it will have an VM ID vmID := utils.EmptyString if config.IsExposeVMStatsEnabled() { - vmID, err = libvirt.GetVMID(ct.PID) + vmID, err = libvirt.GetVMID(ct.Pid) if err != nil { - klog.V(6).Infof("failed to resolve VM ID for PID %v (command=%s): %v", ct.PID, comm, err) + klog.V(6).Infof("failed to resolve VM ID for PID %v (command=%s): %v", ct.Pid, comm, err) } } - mapKey := ct.PID - if ct.CGroupID == 1 && config.EnabledEBPFCgroupID { + mapKey := ct.Pid + if ct.CgroupId == 1 && config.EnabledEBPFCgroupID { // we aggregate all kernel process to minimize overhead // all kernel process has cgroup id as 1 and pid 1 is also a kernel process mapKey = 1 @@ -124,7 +124,7 @@ func UpdateProcessBPFMetrics(bpfExporter bpf.Exporter, processStats map[uint64]* var ok bool var pStat *stats.ProcessStats if pStat, ok = processStats[mapKey]; !ok { - pStat = stats.NewProcessStats(ct.PID, ct.CGroupID, containerID, vmID, comm, bpfSupportedMetrics) + pStat = stats.NewProcessStats(ct.Pid, ct.CgroupId, containerID, vmID, comm, bpfSupportedMetrics) processStats[mapKey] = pStat } else if pStat.Command == "" { pStat.Command = comm diff --git a/vendor/github.com/aquasecurity/libbpfgo/.clang-format b/vendor/github.com/aquasecurity/libbpfgo/.clang-format deleted file mode 100644 index 1835787a3f..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/.clang-format +++ /dev/null @@ -1,130 +0,0 @@ ---- -Language: Cpp -AccessModifierOffset: -2 -AlignAfterOpenBracket: Align -AlignConsecutiveMacros: Consecutive -AlignConsecutiveAssignments: None -AlignConsecutiveBitFields: None -AlignConsecutiveDeclarations: None -AlignEscapedNewlines: Right -AlignOperands: Align -AlignTrailingComments: true -AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: true -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortEnumsOnASingleLine: false -AllowShortBlocksOnASingleLine: Never -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: Empty -AllowShortLambdasOnASingleLine: Empty -AllowShortIfStatementsOnASingleLine: Never -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -PenaltyReturnTypeOnItsOwnLine: 1000000 -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: MultiLine -AttributeMacros: - - __capability -BinPackArguments: false -BinPackParameters: false -BraceWrapping: - AfterCaseLabel: false - AfterClass: false - AfterControlStatement: Never - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterStruct: false - AfterUnion: false - AfterExternBlock: false - BeforeCatch: false - BeforeElse: false - BeforeLambdaBody: false - BeforeWhile: false - IndentBraces: false - SplitEmptyFunction: true - SplitEmptyRecord: true - SplitEmptyNamespace: true -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Linux -BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakConstructorInitializers: BeforeColon -BreakStringLiterals: true -ColumnLimit: 100 -CommentPragmas: '^ IWYU pragma:' -CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: false -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DeriveLineEnding: true -DerivePointerAlignment: true -DisableFormat: false -ExperimentalAutoDetectBinPacking: false -FixNamespaceComments: true -IncludeBlocks: Preserve -IncludeIsMainRegex: '(Test)?$' -IncludeIsMainSourceRegex: '' -IndentCaseLabels: true -IndentCaseBlocks: false -IndentGotoLabels: true -IndentPPDirectives: BeforeHash -IndentExternBlock: AfterExternBlock -IndentRequires: false -IndentWidth: 4 -IndentWrappedFunctionNames: false -InsertTrailingCommas: None -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -PenaltyBreakAssignment: 2 -PenaltyBreakBeforeFirstCallParameter: 19 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyBreakTemplateDeclaration: 10 -PenaltyExcessCharacter: 1000000 -PointerAlignment: Right -ReflowComments: true -SortIncludes: false -SortUsingDeclarations: true -SpaceAfterCStyleCast: true -SpaceAfterLogicalNot: false -SpaceAfterTemplateKeyword: true -SpaceBeforeAssignmentOperators: true -SpaceBeforeCaseColon: false -SpaceBeforeCpp11BracedList: false -SpaceBeforeCtorInitializerColon: false -SpaceBeforeInheritanceColon: false -SpaceBeforeParens: ControlStatements -SpaceAroundPointerQualifiers: Default -SpaceBeforeRangeBasedForLoopColon: false -SpaceInEmptyBlock: false -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInConditionalStatement: false -SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -SpaceBeforeSquareBrackets: false -BitFieldColonSpacing: Both -Standard: Latest -TabWidth: 8 -UseCRLF: false -UseTab: Never -WhitespaceSensitiveMacros: - - STRINGIZE - - PP_STRINGIZE - - BOOST_PP_STRINGIZE - - NS_SWIFT_NAME - - CF_SWIFT_NAME -... - diff --git a/vendor/github.com/aquasecurity/libbpfgo/.clang-tidy b/vendor/github.com/aquasecurity/libbpfgo/.clang-tidy deleted file mode 100644 index fb4b07245a..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/.clang-tidy +++ /dev/null @@ -1,163 +0,0 @@ -Checks: ', - -*, - abseil-duration-addition, - abseil-duration-comparison, - abseil-duration-conversion-cast, - abseil-duration-division, - abseil-duration-factory-float, - abseil-duration-factory-scale, - abseil-duration-subtraction, - abseil-duration-unnecessary-conversion, - abseil-faster-strsplit-delimiter, - abseil-no-internal-dependencies, - abseil-no-namespace, - abseil-redundant-strcat-calls, - abseil-str-cat-append, - abseil-string-find-startswith, - abseil-string-find-str-contains, - abseil-time-comparison, - abseil-time-subtraction, - abseil-upgrade-duration-conversions, - , - boost-use-to-string, - , - bugprone-argument-comment, - bugprone-assert-side-effect, - bugprone-bad-signal-to-kill-thread, - bugprone-bool-pointer-implicit-conversion, - -bugprone-branch-clone, - bugprone-copy-constructor-init, - bugprone-dangling-handle, - bugprone-dynamic-static-initializers, - bugprone-exception-escape, - bugprone-fold-init-type, - bugprone-forward-declaration-namespace, - bugprone-forwarding-reference-overload, - -bugprone-implicit-widening-of-multiplication-result, # TODO: check - bugprone-inaccurate-erase, - bugprone-incorrect-roundings, - bugprone-infinite-loop, - bugprone-integer-division, - bugprone-lambda-function-name, - bugprone-macro-repeated-side-effects, - bugprone-misplaced-operator-in-strlen-in-alloc, - bugprone-misplaced-pointer-arithmetic-in-alloc, - bugprone-misplaced-widening-cast, - bugprone-move-forwarding-reference, - bugprone-multiple-statement-macro, - bugprone-no-escape, - bugprone-not-null-terminated-result, - bugprone-parent-virtual-call, - bugprone-posix-return, - bugprone-redundant-branch-condition, - bugprone-signal-handler, - bugprone-signed-char-misuse, - bugprone-sizeof-container, - bugprone-spuriously-wake-up-functions, - bugprone-string-constructor, - bugprone-string-integer-assignment, - bugprone-string-literal-with-embedded-nul, - bugprone-suspicious-enum-usage, - bugprone-suspicious-include, - bugprone-suspicious-memset-usage, - bugprone-suspicious-missing-comma, - bugprone-suspicious-semicolon, - bugprone-suspicious-string-compare, - bugprone-swapped-arguments, - bugprone-terminating-continue, - bugprone-throw-keyword-missing, - bugprone-too-small-loop-variable, - bugprone-undefined-memory-manipulation, - bugprone-undelegated-constructor, - bugprone-unhandled-exception-at-new, - bugprone-unhandled-self-assignment, - bugprone-unused-raii, - bugprone-unused-return-value, - bugprone-use-after-move, - bugprone-virtual-near-miss, - , - cert-con36-c, - cert-env33-c, - cert-err34-c, - cert-fio38-c, - cert-flp30-c, - cert-msc30-c, - cert-msc32-c, - cert-pos44-c, - cert-pos47-c, - cert-sig30-c, - cert-str34-c, - , - fuchsia-default-arguments-calls, - fuchsia-default-arguments-declarations, - fuchsia-overloaded-operator, - fuchsia-trailing-return, - fuchsia-virtual-inheritance, - , - hicpp-avoid-c-arrays, - hicpp-avoid-goto, - -hicpp-deprecated-headers, - hicpp-exception-baseclass, - hicpp-explicit-conversions, - hicpp-function-size, - hicpp-invalid-access-moved, - hicpp-member-init, - hicpp-move-const-arg, - -hicpp-multiway-paths-covered, - hicpp-named-parameter, - hicpp-no-array-decay, - -hicpp-no-assembler, - -hicpp-no-malloc, - hicpp-noexcept-move, - hicpp-static-assert, - hicpp-uppercase-literal-suffix, - hicpp-use-auto, - hicpp-use-emplace, - hicpp-use-equals-default, - hicpp-use-equals-delete, - hicpp-use-noexcept, - hicpp-use-nullptr, - hicpp-use-override, - hicpp-vararg, - , - readability-avoid-const-params-in-decls, - -readability-braces-around-statements, - -readability-const-return-type, - readability-container-size-empty, - readability-convert-member-functions-to-static, - readability-delete-null-pointer, - -readability-else-after-return, - -readability-function-cognitive-complexity, - readability-function-size, - readability-identifier-naming, - readability-implicit-bool-conversion, - readability-inconsistent-declaration-parameter-name, - -readability-isolate-declaration, - -readability-magic-numbers, - readability-make-member-function-const, - readability-misleading-indentation, - readability-misplaced-array-index, - readability-named-parameter, - readability-non-const-parameter, - readability-qualified-auto, - readability-redundant-access-specifiers, - readability-redundant-control-flow, - readability-redundant-declaration, - readability-redundant-function-ptr-dereference, - readability-redundant-member-init, - readability-redundant-preprocessor, - readability-redundant-smartptr-get, - readability-redundant-string-cstr, - readability-redundant-string-init, - readability-simplify-boolean-expr, - readability-simplify-subscript-expr, - readability-static-accessed-through-instance, - readability-static-definition-in-anonymous-namespace, - readability-string-compare, - readability-suspicious-call-argument, - readability-uniqueptr-delete-release, - readability-uppercase-literal-suffix, - readability-use-anyofallof, - , - ' -FormatStyle: 'file' diff --git a/vendor/github.com/aquasecurity/libbpfgo/.gitignore b/vendor/github.com/aquasecurity/libbpfgo/.gitignore deleted file mode 100644 index e2e03e39d9..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -output* -selftest/*/*.o -selftest/*/*.skel.h -selftest/*/*-static -selftest/*/*-dynamic -selftest/uprobe/ctest -selftest/uprobe/gotest -testing/tracee -.vagrant* diff --git a/vendor/github.com/aquasecurity/libbpfgo/.gitmodules b/vendor/github.com/aquasecurity/libbpfgo/.gitmodules deleted file mode 100644 index 1706b4a8fc..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "libbpf"] - path = libbpf - url = https://github.com/libbpf/libbpf.git diff --git a/vendor/github.com/aquasecurity/libbpfgo/.revive.toml b/vendor/github.com/aquasecurity/libbpfgo/.revive.toml deleted file mode 100644 index bd0e9870ad..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/.revive.toml +++ /dev/null @@ -1,154 +0,0 @@ -severity = "warning" -confidence = 0.8 -errorCode = 1 -warningCode = 1 - -# Enable all available rules -enableAllRules = true - -[rule.add-constant] - Disabled = true -[rule.argument-limit] - Disabled = false - Arguments = [6] -[rule.atomic] - Disabled = false -[rule.bare-return] - Disabled = false -[rule.banned-characters] - Disabled = true -[rule.blank-imports] - Disabled = false -[rule.bool-literal-in-expr] - Disabled = false -[rule.call-to-gc] - Disabled = false -[rule.confusing-naming] - Disabled = true -[rule.comment-spacings] - Disabled = false -[rule.confusing-results] - Disabled = true -[rule.cognitive-complexity] - Disabled = true -[rule.constant-logical-expr] - Disabled = false -[rule.context-as-argument] - Disabled = false -[rule.context-keys-type] - Disabled = false -[rule.cyclomatic] - Disabled = true -[rule.datarace] - Disabled = false -[rule.deep-exit] - Disabled = true -[rule.defer] - Disabled = false -[rule.dot-imports] - Disabled = false -[rule.duplicated-imports] - Disabled = false -[rule.early-return] - Disabled = false -[rule.empty-block] - Disabled = false -[rule.empty-lines] - Disabled = false -[rule.error-naming] - Disabled = false -[rule.error-return] - Disabled = false -[rule.error-strings] - Disabled = false -[rule.errorf] - Disabled = false -[rule.exported] - Disabled = true # TODO: add comments to exported functions -[rule.file-header] - Disabled = true -[rule.flag-parameter] - Disabled = true -[rule.function-result-limit] - Disabled = false - Arguments = [3] -[rule.function-length] - Disabled = true -[rule.get-return] - Disabled = false -[rule.identical-branches] - Disabled = false -[rule.if-return] - Disabled = false -[rule.increment-decrement] - Disabled = false -[rule.indent-error-flow] - Disabled = false -[rule.imports-blacklist] - Disabled = true -[rule.import-shadowing] - Disabled = false -[rule.line-length-limit] - Disabled = true # TODO: set max length to 100 max -[rule.max-public-structs] - Disabled = true -[rule.modifies-parameter] - Disabled = false -[rule.modifies-value-receiver] - Disabled = true -[rule.nested-structs] - Disabled = true -[rule.optimize-operands-order] - Disabled = false -[rule.package-comments] - Disabled = true # TODO: add comments to all packages -[rule.range] - Disabled = false -[rule.range-val-in-closure] - Disabled = false -[rule.range-val-address] - Disabled = false -[rule.receiver-naming] - Disabled = false -[rule.redefines-builtin-id] - Disabled = true -[rule.string-of-int] - Disabled = true -[rule.struct-tag] - Disabled = true # TODO: interesting to check -[rule.string-format] - Disabled = true # TODO: interesting to check -[rule.superfluous-else] - Disabled = false -[rule.time-equal] - Disabled = false -[rule.time-naming] - Disabled = false -[rule.var-naming] - Disabled = true # TODO: rename variables correctly -[rule.var-declaration] - Disabled = false -[rule.unchecked-type-assertion] - Disabled = true -[rule.unconditional-recursion] - Disabled = false -[rule.unexported-naming] - Disabled = false -[rule.unexported-return] - Disabled = true # TODO: consider enabling this -[rule.unhandled-error] - Disabled = true # TODO: already being done by errcheck (change ?) -[rule.unnecessary-stmt] - Disabled = true -[rule.unreachable-code] - Disabled = false -[rule.unused-parameter] - Disabled = true -[rule.unused-receiver] - Disabled = true -[rule.use-any] - Disabled = true # TODO: should we rename interface{} to any ? -[rule.useless-break] - Disabled = false -[rule.waitgroup-by-value] - Disabled = false diff --git a/vendor/github.com/aquasecurity/libbpfgo/LICENSE b/vendor/github.com/aquasecurity/libbpfgo/LICENSE deleted file mode 100644 index d645695673..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/vendor/github.com/aquasecurity/libbpfgo/Makefile b/vendor/github.com/aquasecurity/libbpfgo/Makefile deleted file mode 100644 index 0e6620a125..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/Makefile +++ /dev/null @@ -1,252 +0,0 @@ - -.ONESHELL: -SHELL = /bin/bash - -BASEDIR = $(abspath ./) - -OUTPUT = ./output -SELFTEST = ./selftest -HELPERS = ./helpers - -CLANG := clang -CC := $(CLANG) -GO := go -VAGRANT := vagrant -CLANG_FMT := clang-format-12 -GIT := $(shell which git || /bin/false) -REVIVE := revive - -HOSTOS = $(shell uname) -ARCH ?= $(shell uname -m | sed 's/x86_64/amd64/g; s/aarch64/arm64/g') - - -# libbpf - -LIBBPF_SRC = $(abspath ./libbpf/src) -LIBBPF_OBJ = $(abspath ./$(OUTPUT)/libbpf.a) -LIBBPF_OBJDIR = $(abspath ./$(OUTPUT)/libbpf) -LIBBPF_DESTDIR = $(abspath ./$(OUTPUT)) - -CFLAGS = -g -O2 -Wall -fpie -I$(abspath $(OUTPUT)) -LDFLAGS = - -# golang - -CGO_CFLAGS_STATIC = "-I$(abspath $(OUTPUT))" -CGO_LDFLAGS_STATIC = "-lelf -lz $(LIBBPF_OBJ)" -CGO_EXTLDFLAGS_STATIC = '-w -extldflags "-static"' - -CGO_CFLAGS_DYN = "-I. -I/usr/include/" -CGO_LDFLAGS_DYN = "-lelf -lz -lbpf" - -# default == shared lib from OS package - -all: libbpfgo-static -test: libbpfgo-static-test - -# libbpf uapi - -.PHONY: libbpf-uapi - -libbpf-uapi: $(LIBBPF_SRC) -# UAPI headers can be installed by a different package so they're not installed -# in by (libbpf) install rule. - UAPIDIR=$(LIBBPF_DESTDIR) \ - $(MAKE) -C $(LIBBPF_SRC) install_uapi_headers - -# libbpfgo test object - -libbpfgo-test-bpf-static: libbpfgo-static # needed for serialization - $(MAKE) -C $(SELFTEST)/build - -libbpfgo-test-bpf-dynamic: libbpfgo-dynamic # needed for serialization - $(MAKE) -C $(SELFTEST)/build - -libbpfgo-test-bpf-clean: - $(MAKE) -C $(SELFTEST)/build clean - -# libbpf: shared - -libbpfgo-dynamic: $(OUTPUT)/libbpf - CC=$(CLANG) \ - CGO_CFLAGS=$(CGO_CFLAGS_DYN) \ - CGO_LDFLAGS=$(CGO_LDFLAGS_DYN) \ - $(GO) build . - -libbpfgo-dynamic-test: libbpfgo-test-bpf-dynamic - CC=$(CLANG) \ - CGO_CFLAGS=$(CGO_CFLAGS_DYN) \ - CGO_LDFLAGS=$(CGO_LDFLAGS_DYN) \ - sudo -E $(GO) test . - -# libbpf: static - -libbpfgo-static: libbpf-uapi $(LIBBPF_OBJ) - CC=$(CLANG) \ - CGO_CFLAGS=$(CGO_CFLAGS_STATIC) \ - CGO_LDFLAGS=$(CGO_LDFLAGS_STATIC) \ - GOOS=linux GOARCH=$(ARCH) \ - $(GO) build \ - -tags netgo -ldflags $(CGO_EXTLDFLAGS_STATIC) \ - . - -libbpfgo-static-test: libbpfgo-test-bpf-static - sudo env PATH=$(PATH) \ - CC=$(CLANG) \ - CGO_CFLAGS=$(CGO_CFLAGS_STATIC) \ - CGO_LDFLAGS=$(CGO_LDFLAGS_STATIC) \ - GOOS=linux GOARCH=$(ARCH) \ - $(GO) test \ - -v -tags netgo -ldflags $(CGO_EXTLDFLAGS_STATIC) \ - . - -# static libbpf generation for the git submodule - -.PHONY: libbpf-static -libbpf-static: $(LIBBPF_OBJ) - -$(LIBBPF_OBJ): $(LIBBPF_SRC) $(wildcard $(LIBBPF_SRC)/*.[ch]) | $(OUTPUT)/libbpf - CC="$(CC)" CFLAGS="$(CFLAGS)" LD_FLAGS="$(LDFLAGS)" \ - $(MAKE) -C $(LIBBPF_SRC) \ - BUILD_STATIC_ONLY=1 \ - OBJDIR=$(LIBBPF_OBJDIR) \ - DESTDIR=$(LIBBPF_DESTDIR) \ - INCLUDEDIR= LIBDIR= UAPIDIR= install - -$(LIBBPF_SRC): -ifeq ($(wildcard $@), ) - echo "INFO: updating submodule 'libbpf'" - $(GIT) submodule update --init --recursive -endif - -# selftests - -SELFTESTS = $(shell find $(SELFTEST) -mindepth 1 -maxdepth 1 -type d ! -name 'common' ! -name 'build') - -define FOREACH - SELFTESTERR=0; \ - for DIR in $(SELFTESTS); do \ - echo "INFO: entering $$DIR..."; \ - $(MAKE) -j1 -C $$DIR $(1) || SELFTESTERR=1; \ - done; \ - if [ $$SELFTESTERR -eq 1 ]; then \ - exit 1; \ - fi -endef - -.PHONY: selftest -.PHONY: selftest-static -.PHONY: selftest-dynamic -.PHONY: selftest-run -.PHONY: selftest-static-run -.PHONY: selftest-dynamic-run -.PHONY: selftest-clean - -selftest: selftest-static - -selftest-static: - $(call FOREACH, main-static) -selftest-dynamic: - $(call FOREACH, main-dynamic) - -selftest-run: selftest-static-run - -selftest-static-run: - $(call FOREACH, run-static) -selftest-dynamic-run: - $(call FOREACH, run-dynamic) - -selftest-clean: - $(call FOREACH, clean) - -# helpers test - -.PHONY: helpers-test-run -.PHONY: helpers-test-static-run -.PHONY: helpers-test-dynamic-run - -helpers-test-run: helpers-test-static-run - -helpers-test-static-run: libbpfgo-static - CC=$(CLANG) \ - CGO_CFLAGS=$(CGO_CFLAGS_STATIC) \ - CGO_LDFLAGS=$(CGO_LDFLAGS_STATIC) \ - sudo -E env PATH=$(PATH) $(GO) test -v $(HELPERS)/... - -helpers-test-dynamic-run: libbpfgo-dynamic - sudo $(GO) test -v $(HELPERS)/... - -# vagrant - -VAGRANT_DIR = $(abspath ./builder) - -.PHONY: vagrant-up -.PHONY: vagrant-destroy -.PHONY: vagrant-halt -.PHONY: vagrant-ssh - -vagrant-up: .vagrant-up -vagrant-destroy: .vagrant-destroy -vagrant-halt: .vagrant-halt -vagrant-ssh: .vagrant-ssh - -.vagrant-%: - VAGRANT_VAGRANTFILE=$(VAGRANT_DIR)/Vagrantfile-ubuntu \ - ARCH=$(ARCH) \ - HOSTOS=$(HOSTOS) \ - $(VAGRANT) $* - -# -# code check and linting -# - -# fmt-check - -C_FILES_TO_BE_CHECKED = $(shell find -regextype posix-extended -regex '.*\.(h|c)' ! -regex '.*(libbpf|output)\/.*' | xargs) - -fmt-check: - @errors=0 - echo "Checking C and eBPF files and headers formatting..." - $(CLANG_FMT) --dry-run -i $(C_FILES_TO_BE_CHECKED) > /tmp/check-c-fmt 2>&1 - clangfmtamount=$$(cat /tmp/check-c-fmt | wc -l) - if [[ $$clangfmtamount -ne 0 ]]; then - head -n30 /tmp/check-c-fmt - errors=1 - fi - rm -f /tmp/check-c-fmt -# - if [[ $$errors -ne 0 ]]; then - echo - echo "Please fix formatting errors above!" - echo "Use: $(MAKE) fmt-fix target". - echo - exit 1 - fi - -# fmt-fix - -fmt-fix: - @echo "Fixing C and eBPF files and headers formatting..." - $(CLANG_FMT) -i --verbose $(C_FILES_TO_BE_CHECKED) - -# lint-check - -.PHONY: lint-check -lint-check: -# - @errors=0 - echo "Linting golang code..." - $(REVIVE) -config .revive.toml ./... - -# output - -$(OUTPUT): - mkdir -p $(OUTPUT) - -$(OUTPUT)/libbpf: - mkdir -p $(OUTPUT)/libbpf - -# cleanup - -clean: selftest-clean libbpfgo-test-bpf-clean - rm -rf $(OUTPUT) diff --git a/vendor/github.com/aquasecurity/libbpfgo/Readme.md b/vendor/github.com/aquasecurity/libbpfgo/Readme.md deleted file mode 100644 index ec9306e00d..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/Readme.md +++ /dev/null @@ -1,139 +0,0 @@ -# libbpfgo - - - -[![GitHub release (latest by date)](https://img.shields.io/github/v/release/aquasecurity/libbpfgo)](https://github.com/aquasecurity/libbpfgo/releases) -[![Go Report Card](https://goreportcard.com/badge/github.com/aquasecurity/libbpfgo)](https://goreportcard.com/report/github.com/aquasecurity/libbpfgo) -[![License](https://img.shields.io/github/license/aquasecurity/libbpfgo)](https://github.com/aquasecurity/libbpfgo/blob/main/LICENSE) - ----- - -* [Installing](#installing) -* [Building](#building) -* [Concepts](#concepts) -* [Example](#example) -* [Releases](#releases) -* [Learn more](#learn-more) - - -libbpfgo is a Go library for Linux's [eBPF](https://ebpf.io/) project. It was created for [Tracee](https://github.com/aquasecurity/tracee), our open source Runtime Security, and eBPF tracing tool, written in Go. If you are interested in eBPF and its applications, check out Tracee at Github: [https://github.com/aquasecurity/tracee](https://github.com/aquasecurity/tracee). - -libbpfgo is built around [libbpf](https://github.com/libbpf/libbpf) - the standard library for interacting with eBPF programs from userspace - which is a C library maintained in Linux upstream. We have created libbpfgo as a thin Go wrapper around the libbpf project. - -## Installing - -libbpfgo uses CGO to interop with libbpf and will expect to be linked with libbpf at run or link time. Simply importing libbpfgo is not enough to get started, and you will need to fulfill the required dependency in one of the following ways: - -1. Install libbpf as a shared object in the system. Libbpf may already be packaged for your distribution and, if not, you can build and install from source. More info [here](https://github.com/libbpf/libbpf). -1. Embed libbpf into your Go project as a vendored dependency. This means that the libbpf code is statically linked into the resulting binary, and there are no runtime dependencies. [Tracee](https://github.com/aquasecurity/tracee) takes this approach. - -## Building - -Currently you will find the following GNU Makefile rules: - -| Makefile Rule | Description | -|--------------------------|-----------------------------------| -| all | builds libbpfgo (dynamic) | -| clean | cleans entire tree | -| selftest | builds all selftests (static) | -| selftest-run | runs all selftests (static) | -| helpers-test-run | runs all helpers tests (static) | - -* libbpf dynamically linked (libbpf from OS) - -| Makefile Rule | Description | -|--------------------------|-----------------------------------| -| libbpfgo-dynamic | builds dynamic libbpfgo (libbpf) | -| libbpfgo-dynamic-test | 'go test' with dynamic libbpfgo | -| selftest-dynamic | build tests with dynamic libbpfgo | -| selftest-dynamic-run | run tests using dynamic libbpfgo | -| helpers-test-dynamic-run | run helpers package unit tests using dynamic libbpfgo | - -* statically compiled (libbpf submodule) - -| Makefile Rule | Description | -|--------------------------|-----------------------------------| -| libbpfgo-static | builds static libbpfgo (libbpf) | -| libbpfgo-static-test | 'go test' with static libbpfgo | -| selftest-static | build tests with static libbpfgo | -| selftest-static-run | run tests using static libbpfgo | -| helpers-test-static-run | run helpers package unit tests using static libbpfgo | - -* examples - -``` -$ make libbpfgo-static => libbpfgo statically linked with libbpf -$ make -C selftest/perfbuffers => single selftest build (static libbpf) -$ make -C selftest/perfbuffers run-dynamic => single selftest run (dynamic libbpf) -$ make selftest-static-run => will build & run all static selftests -``` - -> Note 01: dynamic builds need your OS to have a *recent enough* libbpf package (and its headers) installed. Sometimes, recent features might require the use of backported OS packages in order for your OS to contain latest *libbpf* features (sometimes required by libbpfgo). -> Note 02: static builds need `git submodule init` first. Make sure to sync the *libbpf* git submodule before trying to statically compile or test the *libbpfgo* repository. - -## Concepts - -libbpfgo tries to make it natural for Go developers to use, by abstracting away C technicalities. For example, it will translate low level return codes into Go `error`, it will organize functionality around Go `struct`, and it will use `channel` as to let you consume events. - -In a high level, this is a typical workflow for working with the library: - -1. Compile your bpf program into an object file. -1. Initialize a `Module` struct - that is a unit of BPF functionality around your compiled object file. -1. Load bpf programs from the object file using the `BPFProg` struct. -1. Attach `BPFProg` to system facilities, for example to "raw tracepoints" or "kprobes" using the `BPFProg`'s associated functions. -1. Instantiate and manipulate BPF Maps via the `BPFMap` struct and it's associated methods. -1. Instantiate and manipulate Perf Buffer for communicating events from your BPF program to the driving userspace program, using the `RingBuffer` struct and it's associated objects. - -## Example - -```go -// initializing -import bpf "github.com/aquasecurity/libbpfgo" -... -bpfModule := bpf.NewModuleFromFile(bpfObjectPath) -bpfModule.BPFLoadObject() - -// maps -mymap, _ := bpfModule.GetMap("mymap") -mymap.Update(key, value) - -// ring buffer -rb, _ := bpfModule.InitRingBuffer("events", eventsChannel, buffSize) -rb.Poll(300) -e := <-eventsChannel -``` - -## Releases - -libbpfgo does not yet have a regular schedule for cutting releases. There has not yet been a major release but API backwards compatibility will be maintained for all releases with the same major release number. Milestones are created when preparing for release. - -- __Major releases__ are cut when backwards compatibility is broken or major milestones are completed, such as reaching parity with libbpf's API. -- __Minor releases__ are cut to incorporate new support for libbpf APIs. -- __Patch releases__ are cut to incorporate important individual or groupings of bug fixes. -- __libbpf support numbering__ indicates the _minimum_ required libbpf version that must be linked in order to ensure libbpfgo compatibility. For example, `v0.2.1-libbpf-0.4.0` means that version 0.2.1 of libbpfgo requires v0.4.0 or newer of libbpf. - -*Note*: some distributions might have local changes to their libbpf package and their version might include backports and/or fixes differently than upstream versions. In those cases we recommend that libbpfgo is used statically compiled. - -## Contributing - -To better receive you, libbpfgo makes available GNU Makefile rules for vagrant machines (amd64/arm64) that can be used to compile and test on Linux and Darwin hosts: - -| Makefile Rule | Description | -|-------------------|-----------------------------------------------------| -| vagrant-up | starts and provisions the vagrant environment | -| vagrant-ssh | connects to machine via SSH | -| vagrant-halt | stops the vagrant machine | -| vagrant-destroy | stops and deletes all traces of the vagrant machine | - -Once connected to the vagrant box you are ready to [build](#building) libbpfgo (e.g. `make libbpfgo-static`). - -For further information, check [Vagrantfile.md](./docs/Vagrantfile.md). - -## Learn more - -Please check our github milestones for an idea of the project roadmap. The general goal is to fully implement/expose libbpf's API in Go as seamlessly as possible. - -- [How to Build eBPF Programs with libbpfgo](https://blog.aquasec.com/libbpf-ebpf-programs). -- [selftests](./selftest) are small program using libbpfgo and might be good usage examples. -- [tracee-ebpf](https://github.com/aquasecurity/tracee/tree/main/cmd/tracee-ebpf) is a robust consumer of this project. -- Feel free to ask questions by creating a new [Discussion](https://github.com/aquasecurity/libbpfgo/discussions), we'd love to help. diff --git a/vendor/github.com/aquasecurity/libbpfgo/btf.go b/vendor/github.com/aquasecurity/libbpfgo/btf.go deleted file mode 100644 index 8a60abb653..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/btf.go +++ /dev/null @@ -1,26 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "syscall" -) - -// -// BTF (low-level API) -// - -// GetBTFFDByID returns a file descriptor for the BTF with the given ID. -func GetBTFFDByID(id uint32) (int, error) { - fdC := C.bpf_btf_get_fd_by_id(C.uint(id)) - if fdC < 0 { - return int(fdC), fmt.Errorf("could not find BTF id %d: %w", id, syscall.Errno(-fdC)) - } - - return int(fdC), nil -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/buf-common.go b/vendor/github.com/aquasecurity/libbpfgo/buf-common.go deleted file mode 100644 index 1a381aed51..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/buf-common.go +++ /dev/null @@ -1,16 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -const ( - // Maximum number of channels (RingBuffers + PerfBuffers) supported - maxEventChannels = 512 -) - -var ( - eventChannels = newRWArray(maxEventChannels) -) diff --git a/vendor/github.com/aquasecurity/libbpfgo/buf-perf.go b/vendor/github.com/aquasecurity/libbpfgo/buf-perf.go deleted file mode 100644 index 463c6b0443..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/buf-perf.go +++ /dev/null @@ -1,112 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "sync" - "syscall" -) - -// -// PerfBuffer -// - -type PerfBuffer struct { - pb *C.struct_perf_buffer - bpfMap *BPFMap - slot uint - eventsChan chan []byte - lostChan chan uint64 - stop chan struct{} - closed bool - wg sync.WaitGroup -} - -// Poll will wait until timeout in milliseconds to gather -// data from the perf buffer. -func (pb *PerfBuffer) Poll(timeout int) { - pb.stop = make(chan struct{}) - pb.wg.Add(1) - go pb.poll(timeout) -} - -// Deprecated: use PerfBuffer.Poll() instead. -func (pb *PerfBuffer) Start() { - pb.Poll(300) -} - -func (pb *PerfBuffer) Stop() { - if pb.stop == nil { - return - } - - // Signal the poll goroutine to exit - close(pb.stop) - - // The event and lost channels should be drained here since the consumer - // may have stopped at this point. Failure to drain it will - // result in a deadlock: the channel will fill up and the poll - // goroutine will block in the callback. - go func() { - // revive:disable:empty-block - for range pb.eventsChan { - } - - if pb.lostChan != nil { - for range pb.lostChan { - } - } - // revive:enable:empty-block - }() - - // Wait for the poll goroutine to exit - pb.wg.Wait() - - // Close the channel -- this is useful for the consumer but - // also to terminate the drain goroutine above. - close(pb.eventsChan) - if pb.lostChan != nil { - close(pb.lostChan) - } - - // Reset pb.stop to allow multiple safe calls to Stop() - pb.stop = nil -} - -func (pb *PerfBuffer) Close() { - if pb.closed { - return - } - - pb.Stop() - C.perf_buffer__free(pb.pb) - eventChannels.remove(pb.slot) - pb.closed = true -} - -// todo: consider writing the perf polling in go as c to go calls (callback) are expensive -func (pb *PerfBuffer) poll(timeout int) error { - defer pb.wg.Done() - - for { - select { - case <-pb.stop: - return nil - default: - retC := C.perf_buffer__poll(pb.pb, C.int(timeout)) - if retC < 0 { - errno := syscall.Errno(-retC) - if errno == syscall.EINTR { - continue - } - - return fmt.Errorf("error polling perf buffer: %w", errno) - } - } - } -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/buf-ring.go b/vendor/github.com/aquasecurity/libbpfgo/buf-ring.go deleted file mode 100644 index 8e83b30474..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/buf-ring.go +++ /dev/null @@ -1,112 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "sync" - "syscall" -) - -// -// RingBuffer -// - -type RingBuffer struct { - rb *C.struct_ring_buffer - bpfMap *BPFMap - slot uint - stop chan struct{} - closed bool - wg sync.WaitGroup -} - -// Poll will wait until timeout in milliseconds to gather -// data from the ring buffer. -func (rb *RingBuffer) Poll(timeout int) { - rb.stop = make(chan struct{}) - rb.wg.Add(1) - go rb.poll(timeout) -} - -// Deprecated: use RingBuffer.Poll() instead. -func (rb *RingBuffer) Start() { - rb.Poll(300) -} - -func (rb *RingBuffer) Stop() { - if rb.stop == nil { - return - } - - // Signal the poll goroutine to exit - close(rb.stop) - - // The event channel should be drained here since the consumer - // may have stopped at this point. Failure to drain it will - // result in a deadlock: the channel will fill up and the poll - // goroutine will block in the callback. - eventChan := eventChannels.get(rb.slot).(chan []byte) - go func() { - // revive:disable:empty-block - for range eventChan { - } - // revive:enable:empty-block - }() - - // Wait for the poll goroutine to exit - rb.wg.Wait() - - // Close the channel -- this is useful for the consumer but - // also to terminate the drain goroutine above. - close(eventChan) - - // Reset pb.stop to allow multiple safe calls to Stop() - rb.stop = nil -} - -func (rb *RingBuffer) Close() { - if rb.closed { - return - } - - rb.Stop() - C.ring_buffer__free(rb.rb) - eventChannels.remove(rb.slot) - rb.closed = true -} - -func (rb *RingBuffer) isStopped() bool { - select { - case <-rb.stop: - return true - default: - return false - } -} - -func (rb *RingBuffer) poll(timeout int) error { - defer rb.wg.Done() - - for { - retC := C.ring_buffer__poll(rb.rb, C.int(timeout)) - if rb.isStopped() { - break - } - - if retC < 0 { - errno := syscall.Errno(-retC) - if errno == syscall.EINTR { - continue - } - - return fmt.Errorf("error polling ring buffer: %w", errno) - } - } - - return nil -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/elf.go b/vendor/github.com/aquasecurity/libbpfgo/elf.go deleted file mode 100644 index d328a4f3b3..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/elf.go +++ /dev/null @@ -1,62 +0,0 @@ -package libbpfgo - -import ( - "debug/elf" - "encoding/binary" - "errors" - "strings" -) - -type Symbol struct { - name string - size int - offset int - - sectionName string - byteOrder binary.ByteOrder -} - -func getGlobalVariableSymbol(e *elf.File, varName string) (*Symbol, error) { - regularSymbols, err := e.Symbols() - if err != nil { - return nil, err - } - - var symbols []Symbol - for _, s := range regularSymbols { - i := int(s.Section) - if i >= len(e.Sections) { - continue - } - sectionName := e.Sections[i].Name - if isGlobalVariableSection(sectionName) { - symbols = append(symbols, Symbol{ - name: s.Name, - size: int(s.Size), - offset: int(s.Value), - sectionName: sectionName, - byteOrder: e.ByteOrder, - }) - } - } - - for _, s := range symbols { - if s.name == varName { - return &s, nil - } - } - - return nil, errors.New("symbol not found") -} - -func isGlobalVariableSection(sectionName string) bool { - if sectionName == ".data" || sectionName == ".rodata" { - return true - } - if strings.HasPrefix(sectionName, ".data.") || - strings.HasPrefix(sectionName, ".rodata.") { - return true - } - - return false -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/go.work b/vendor/github.com/aquasecurity/libbpfgo/go.work deleted file mode 100644 index 28f130255c..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/go.work +++ /dev/null @@ -1,6 +0,0 @@ -go 1.18 - -use ( - . - ./helpers -) diff --git a/vendor/github.com/aquasecurity/libbpfgo/libbpf_cb.go b/vendor/github.com/aquasecurity/libbpfgo/libbpf_cb.go deleted file mode 100644 index 3a58190ee6..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/libbpf_cb.go +++ /dev/null @@ -1,35 +0,0 @@ -package libbpfgo - -import ( - "C" - "unsafe" -) - -// revive:disable - -// This callback definition needs to be in a different file from where it is declared in C -// Otherwise, multiple definition compilation error will occur - -//export perfCallback -func perfCallback(ctx unsafe.Pointer, cpu C.int, data unsafe.Pointer, size C.int) { - pb := eventChannels.get(uint(uintptr(ctx))).(*PerfBuffer) - pb.eventsChan <- C.GoBytes(data, size) -} - -//export perfLostCallback -func perfLostCallback(ctx unsafe.Pointer, cpu C.int, cnt C.ulonglong) { - pb := eventChannels.get(uint(uintptr(ctx))).(*PerfBuffer) - if pb.lostChan != nil { - pb.lostChan <- uint64(cnt) - } -} - -//export ringbufferCallback -func ringbufferCallback(ctx unsafe.Pointer, data unsafe.Pointer, size C.int) C.int { - ch := eventChannels.get(uint(uintptr(ctx))).(chan []byte) - ch <- C.GoBytes(data, size) - - return C.int(0) -} - -// revive:enable diff --git a/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.c b/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.c deleted file mode 100644 index 67cd88ebe4..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.c +++ /dev/null @@ -1,469 +0,0 @@ -#include "libbpfgo.h" - -extern void loggerCallback(enum libbpf_print_level level, char *output); -extern void perfCallback(void *ctx, int cpu, void *data, __u32 size); -extern void perfLostCallback(void *ctx, int cpu, __u64 cnt); -extern int ringbufferCallback(void *ctx, void *data, size_t size); - -int libbpf_print_fn(enum libbpf_print_level level, // libbpf print level - const char *format, // format used for the msg - va_list args) // args used by format -{ - int ret; - size_t len; - char *out; - va_list check; - - va_copy(check, args); - ret = vsnprintf(NULL, 0, format, check); // get output length - va_end(check); - - if (ret < 0) - return ret; - - len = ret + 1; // add 1 for NUL - out = malloc(len); - if (!out) - return -ENOMEM; - - va_copy(check, args); - ret = vsnprintf(out, len, format, check); - va_end(check); - - if (ret > 0) - loggerCallback(level, out); - - free(out); - - return ret; -} - -void cgo_libbpf_set_print_fn() -{ - libbpf_set_print(libbpf_print_fn); -} - -struct ring_buffer *cgo_init_ring_buf(int map_fd, uintptr_t ctx) -{ - struct ring_buffer *rb = NULL; - - rb = ring_buffer__new(map_fd, ringbufferCallback, (void *) ctx, NULL); - if (!rb) { - int saved_errno = errno; - fprintf(stderr, "Failed to initialize ring buffer: %s\n", strerror(errno)); - errno = saved_errno; - - return NULL; - } - - return rb; -} - -struct perf_buffer *cgo_init_perf_buf(int map_fd, int page_cnt, uintptr_t ctx) -{ - struct perf_buffer_opts pb_opts = {}; - struct perf_buffer *pb = NULL; - - pb_opts.sz = sizeof(struct perf_buffer_opts); - - pb = perf_buffer__new(map_fd, page_cnt, perfCallback, perfLostCallback, (void *) ctx, &pb_opts); - if (!pb) { - int saved_errno = errno; - fprintf(stderr, "Failed to initialize perf buffer: %s\n", strerror(errno)); - errno = saved_errno; - - return NULL; - } - - return pb; -} - -void cgo_bpf_map__initial_value(struct bpf_map *map, void *value) -{ - size_t psize; - const void *data; - - data = bpf_map__initial_value(map, &psize); - if (!data) - return; - - memcpy(value, data, psize); -} - -int cgo_bpf_prog_attach_cgroup_legacy(int prog_fd, // eBPF program file descriptor - int target_fd, // cgroup directory file descriptor - int type) // BPF_CGROUP_INET_{INGRESS,EGRESS}, ... -{ - union bpf_attr attr; - memset(&attr, 0, sizeof(attr)); - attr.target_fd = target_fd; - attr.attach_bpf_fd = prog_fd; - attr.attach_type = type; - attr.attach_flags = BPF_F_ALLOW_MULTI; // or BPF_F_ALLOW_OVERRIDE - - return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr)); -} - -int cgo_bpf_prog_detach_cgroup_legacy(int prog_fd, // eBPF program file descriptor - int target_fd, // cgroup directory file descriptor - int type) // BPF_CGROUP_INET_{INGRESS,EGRESS}, ... -{ - union bpf_attr attr; - memset(&attr, 0, sizeof(attr)); - attr.target_fd = target_fd; - attr.attach_bpf_fd = prog_fd; - attr.attach_type = type; - - return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr)); -} - -// -// struct handlers -// - -struct bpf_iter_attach_opts *cgo_bpf_iter_attach_opts_new(__u32 map_fd, - enum bpf_cgroup_iter_order order, - __u32 cgroup_fd, - __u64 cgroup_id, - __u32 tid, - __u32 pid, - __u32 pid_fd) -{ - union bpf_iter_link_info *linfo; - linfo = calloc(1, sizeof(*linfo)); - if (!linfo) - return NULL; - - linfo->map.map_fd = map_fd; - linfo->cgroup.order = order; - linfo->cgroup.cgroup_fd = cgroup_fd; - linfo->cgroup.cgroup_id = cgroup_id; - linfo->task.tid = tid; - linfo->task.pid = pid; - linfo->task.pid_fd = pid_fd; - - struct bpf_iter_attach_opts *opts; - opts = calloc(1, sizeof(*opts)); - if (!opts) { - free(linfo); - return NULL; - } - - opts->sz = sizeof(*opts); - opts->link_info_len = sizeof(*linfo); - opts->link_info = linfo; - - return opts; -} - -void cgo_bpf_iter_attach_opts_free(struct bpf_iter_attach_opts *opts) -{ - if (!opts) - return; - - free(opts->link_info); - free(opts); -} - -struct bpf_object_open_opts *cgo_bpf_object_open_opts_new(const char *btf_file_path, - const char *kconfig_path, - const char *bpf_obj_name) -{ - struct bpf_object_open_opts *opts; - opts = calloc(1, sizeof(*opts)); - if (!opts) - return NULL; - - opts->sz = sizeof(*opts); - opts->btf_custom_path = btf_file_path; - opts->kconfig = kconfig_path; - opts->object_name = bpf_obj_name; - - return opts; -} - -void cgo_bpf_object_open_opts_free(struct bpf_object_open_opts *opts) -{ - free(opts); -} - -struct bpf_map_create_opts *cgo_bpf_map_create_opts_new(__u32 btf_fd, - __u32 btf_key_type_id, - __u32 btf_value_type_id, - __u32 btf_vmlinux_value_type_id, - __u32 inner_map_fd, - __u32 map_flags, - __u64 map_extra, - __u32 numa_node, - __u32 map_ifindex) -{ - struct bpf_map_create_opts *opts; - opts = calloc(1, sizeof(*opts)); - if (!opts) - return NULL; - - opts->sz = sizeof(*opts); - opts->btf_fd = btf_fd; - opts->btf_key_type_id = btf_key_type_id; - opts->btf_value_type_id = btf_value_type_id; - opts->btf_vmlinux_value_type_id = btf_vmlinux_value_type_id; - opts->inner_map_fd = inner_map_fd; - opts->map_flags = map_flags; - opts->map_extra = map_extra; - opts->numa_node = numa_node; - opts->map_ifindex = map_ifindex; - - return opts; -} - -void cgo_bpf_map_create_opts_free(struct bpf_map_create_opts *opts) -{ - free(opts); -} - -struct bpf_map_batch_opts *cgo_bpf_map_batch_opts_new(__u64 elem_flags, __u64 flags) -{ - struct bpf_map_batch_opts *opts; - opts = calloc(1, sizeof(*opts)); - if (!opts) - return NULL; - - opts->sz = sizeof(*opts); - opts->elem_flags = elem_flags; - opts->flags = flags; - - return opts; -} - -void cgo_bpf_map_batch_opts_free(struct bpf_map_batch_opts *opts) -{ - free(opts); -} - -struct bpf_map_info *cgo_bpf_map_info_new() -{ - struct bpf_map_info *info; - info = calloc(1, sizeof(*info)); - if (!info) - return NULL; - - return info; -} - -__u32 cgo_bpf_map_info_size() -{ - return sizeof(struct bpf_map_info); -} - -void cgo_bpf_map_info_free(struct bpf_map_info *info) -{ - free(info); -} - -struct bpf_tc_opts *cgo_bpf_tc_opts_new( - int prog_fd, __u32 flags, __u32 prog_id, __u32 handle, __u32 priority) -{ - struct bpf_tc_opts *opts; - opts = calloc(1, sizeof(*opts)); - if (!opts) - return NULL; - - opts->sz = sizeof(*opts); - opts->prog_fd = prog_fd; - opts->flags = flags; - opts->prog_id = prog_id; - opts->handle = handle; - opts->priority = priority; - - return opts; -} - -void cgo_bpf_tc_opts_free(struct bpf_tc_opts *opts) -{ - free(opts); -} - -struct bpf_tc_hook *cgo_bpf_tc_hook_new() -{ - struct bpf_tc_hook *hook; - hook = calloc(1, sizeof(*hook)); - if (!hook) - return NULL; - - hook->sz = sizeof(*hook); - - return hook; -} - -void cgo_bpf_tc_hook_free(struct bpf_tc_hook *hook) -{ - free(hook); -} - -// -// struct getters -// - -// bpf_map_info - -__u32 cgo_bpf_map_info_type(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->type; -} - -__u32 cgo_bpf_map_info_id(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->id; -} - -__u32 cgo_bpf_map_info_key_size(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->key_size; -} - -__u32 cgo_bpf_map_info_value_size(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->value_size; -} - -__u32 cgo_bpf_map_info_max_entries(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->max_entries; -} - -__u32 cgo_bpf_map_info_map_flags(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->map_flags; -} - -char *cgo_bpf_map_info_name(struct bpf_map_info *info) -{ - if (!info) - return NULL; - - return info->name; -} - -__u32 cgo_bpf_map_info_ifindex(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->ifindex; -} - -__u32 cgo_bpf_map_info_btf_vmlinux_value_type_id(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->btf_vmlinux_value_type_id; -} - -__u64 cgo_bpf_map_info_netns_dev(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->netns_dev; -} - -__u64 cgo_bpf_map_info_netns_ino(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->netns_ino; -} - -__u32 cgo_bpf_map_info_btf_id(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->btf_id; -} - -__u32 cgo_bpf_map_info_btf_key_type_id(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->btf_key_type_id; -} - -__u32 cgo_bpf_map_info_btf_value_type_id(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->btf_value_type_id; -} - -__u64 cgo_bpf_map_info_map_extra(struct bpf_map_info *info) -{ - if (!info) - return 0; - - return info->map_extra; -} - -// bpf_tc_opts - -int cgo_bpf_tc_opts_prog_fd(struct bpf_tc_opts *opts) -{ - if (!opts) - return 0; - - return opts->prog_fd; -} - -__u32 cgo_bpf_tc_opts_flags(struct bpf_tc_opts *opts) -{ - if (!opts) - return 0; - - return opts->flags; -} - -__u32 cgo_bpf_tc_opts_prog_id(struct bpf_tc_opts *opts) -{ - if (!opts) - return 0; - - return opts->prog_id; -} - -__u32 cgo_bpf_tc_opts_handle(struct bpf_tc_opts *opts) -{ - if (!opts) - return 0; - - return opts->handle; -} - -__u32 cgo_bpf_tc_opts_priority(struct bpf_tc_opts *opts) -{ - if (!opts) - return 0; - - return opts->priority; -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.go b/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.go deleted file mode 100644 index 9d017fee01..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.go +++ /dev/null @@ -1,110 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "syscall" -) - -// -// Version -// - -// MajorVersion returns the major semver version of libbpf. -func MajorVersion() int { - return C.LIBBPF_MAJOR_VERSION -} - -// MinorVersion returns the minor semver version of libbpf. -func MinorVersion() int { - return C.LIBBPF_MINOR_VERSION -} - -// LibbpfVersionString returns the string representation of the libbpf version which -// libbpfgo is linked against -func LibbpfVersionString() string { - return fmt.Sprintf("v%d.%d", MajorVersion(), MinorVersion()) -} - -// -// Strict Mode -// - -// LibbpfStrictMode is an enum as defined in https://github.com/libbpf/libbpf/blob/2cd2d03f63242c048a896179398c68d2dbefe3d6/src/libbpf_legacy.h#L23 -type LibbpfStrictMode uint32 - -const ( - LibbpfStrictModeAll LibbpfStrictMode = C.LIBBPF_STRICT_ALL - LibbpfStrictModeNone LibbpfStrictMode = C.LIBBPF_STRICT_NONE - LibbpfStrictModeCleanPtrs LibbpfStrictMode = C.LIBBPF_STRICT_CLEAN_PTRS - LibbpfStrictModeDirectErrs LibbpfStrictMode = C.LIBBPF_STRICT_DIRECT_ERRS - LibbpfStrictModeSecName LibbpfStrictMode = C.LIBBPF_STRICT_SEC_NAME - LibbpfStrictModeNoObjectList LibbpfStrictMode = C.LIBBPF_STRICT_NO_OBJECT_LIST - LibbpfStrictModeAutoRlimitMemlock LibbpfStrictMode = C.LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK - LibbpfStrictModeMapDefinitions LibbpfStrictMode = C.LIBBPF_STRICT_MAP_DEFINITIONS -) - -func (b LibbpfStrictMode) String() (str string) { - x := map[LibbpfStrictMode]string{ - LibbpfStrictModeAll: "LIBBPF_STRICT_ALL", - LibbpfStrictModeNone: "LIBBPF_STRICT_NONE", - LibbpfStrictModeCleanPtrs: "LIBBPF_STRICT_CLEAN_PTRS", - LibbpfStrictModeDirectErrs: "LIBBPF_STRICT_DIRECT_ERRS", - LibbpfStrictModeSecName: "LIBBPF_STRICT_SEC_NAME", - LibbpfStrictModeNoObjectList: "LIBBPF_STRICT_NO_OBJECT_LIST", - LibbpfStrictModeAutoRlimitMemlock: "LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK", - LibbpfStrictModeMapDefinitions: "LIBBPF_STRICT_MAP_DEFINITIONS", - } - - str, ok := x[b] - if !ok { - str = LibbpfStrictModeNone.String() - } - - return str -} - -// SetStrictMode is no-op as of libbpf v1.0 -func SetStrictMode(mode LibbpfStrictMode) { - C.libbpf_set_strict_mode(uint32(mode)) -} - -// -// Support -// - -func BPFProgramTypeIsSupported(progType BPFProgType) (bool, error) { - supportedC := C.libbpf_probe_bpf_prog_type(C.enum_bpf_prog_type(int(progType)), nil) - if supportedC < 1 { - return false, syscall.Errno(-supportedC) - } - - return supportedC == 1, nil -} - -func BPFMapTypeIsSupported(mapType MapType) (bool, error) { - supportedC := C.libbpf_probe_bpf_map_type(C.enum_bpf_map_type(int(mapType)), nil) - if supportedC < 1 { - return false, syscall.Errno(-supportedC) - } - - return supportedC == 1, nil -} - -// -// Misc -// - -func NumPossibleCPUs() (int, error) { - nCPUsC := C.libbpf_num_possible_cpus() - if nCPUsC < 0 { - return 0, fmt.Errorf("failed to retrieve the number of CPUs: %w", syscall.Errno(-nCPUsC)) - } - - return int(nCPUsC), nil -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.h b/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.h deleted file mode 100644 index b6feba268e..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/libbpfgo.h +++ /dev/null @@ -1,104 +0,0 @@ -#ifndef __LIBBPF_GO_H__ -#define __LIBBPF_GO_H__ - -#ifdef __powerpc64__ - #define __SANE_USERSPACE_TYPES__ 1 -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include // uapi - -void cgo_libbpf_set_print_fn(); - -struct ring_buffer *cgo_init_ring_buf(int map_fd, uintptr_t ctx); -struct perf_buffer *cgo_init_perf_buf(int map_fd, int page_cnt, uintptr_t ctx); - -void cgo_bpf_map__initial_value(struct bpf_map *map, void *value); - -int cgo_bpf_prog_attach_cgroup_legacy(int prog_fd, int target_fd, int type); -int cgo_bpf_prog_detach_cgroup_legacy(int prog_fd, int target_fd, int type); - -// -// struct handlers -// - -struct bpf_iter_attach_opts *cgo_bpf_iter_attach_opts_new(__u32 map_fd, - enum bpf_cgroup_iter_order order, - __u32 cgroup_fd, - __u64 cgroup_id, - __u32 tid, - __u32 pid, - __u32 pid_fd); -void cgo_bpf_iter_attach_opts_free(struct bpf_iter_attach_opts *opts); - -struct bpf_object_open_opts *cgo_bpf_object_open_opts_new(const char *btf_file_path, - const char *kconfig_path, - const char *bpf_obj_name); -void cgo_bpf_object_open_opts_free(struct bpf_object_open_opts *opts); - -struct bpf_map_create_opts *cgo_bpf_map_create_opts_new(__u32 btf_fd, - __u32 btf_key_type_id, - __u32 btf_value_type_id, - __u32 btf_vmlinux_value_type_id, - __u32 inner_map_fd, - __u32 map_flags, - __u64 map_extra, - __u32 numa_node, - __u32 map_ifindex); -void cgo_bpf_map_create_opts_free(struct bpf_map_create_opts *opts); - -struct bpf_map_batch_opts *cgo_bpf_map_batch_opts_new(__u64 elem_flags, __u64 flags); -void cgo_bpf_map_batch_opts_free(struct bpf_map_batch_opts *opts); - -struct bpf_map_info *cgo_bpf_map_info_new(); -__u32 cgo_bpf_map_info_size(); -void cgo_bpf_map_info_free(struct bpf_map_info *info); - -struct bpf_tc_opts *cgo_bpf_tc_opts_new( - int prog_fd, __u32 flags, __u32 prog_id, __u32 handle, __u32 priority); -void cgo_bpf_tc_opts_free(struct bpf_tc_opts *opts); - -struct bpf_tc_hook *cgo_bpf_tc_hook_new(); -void cgo_bpf_tc_hook_free(struct bpf_tc_hook *hook); - -// -// struct getters -// - -// bpf_map_info - -__u32 cgo_bpf_map_info_type(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_id(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_key_size(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_value_size(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_max_entries(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_map_flags(struct bpf_map_info *info); -char *cgo_bpf_map_info_name(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_ifindex(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_btf_vmlinux_value_type_id(struct bpf_map_info *info); -__u64 cgo_bpf_map_info_netns_dev(struct bpf_map_info *info); -__u64 cgo_bpf_map_info_netns_ino(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_btf_id(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_btf_key_type_id(struct bpf_map_info *info); -__u32 cgo_bpf_map_info_btf_value_type_id(struct bpf_map_info *info); -__u64 cgo_bpf_map_info_map_extra(struct bpf_map_info *info); - -// bpf_tc_opts - -int cgo_bpf_tc_opts_prog_fd(struct bpf_tc_opts *opts); -__u32 cgo_bpf_tc_opts_flags(struct bpf_tc_opts *opts); -__u32 cgo_bpf_tc_opts_prog_id(struct bpf_tc_opts *opts); -__u32 cgo_bpf_tc_opts_handle(struct bpf_tc_opts *opts); -__u32 cgo_bpf_tc_opts_priority(struct bpf_tc_opts *opts); - -#endif diff --git a/vendor/github.com/aquasecurity/libbpfgo/link-reader.go b/vendor/github.com/aquasecurity/libbpfgo/link-reader.go deleted file mode 100644 index da458c078b..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/link-reader.go +++ /dev/null @@ -1,27 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import "syscall" - -// -// BPFLinkReader -// - -// BPFLinkReader read data from a BPF link -type BPFLinkReader struct { - l *BPFLink - fd int -} - -func (i *BPFLinkReader) Read(p []byte) (n int, err error) { - return syscall.Read(i.fd, p) -} - -func (i *BPFLinkReader) Close() error { - return syscall.Close(i.fd) -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/link.go b/vendor/github.com/aquasecurity/libbpfgo/link.go deleted file mode 100644 index a599087270..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/link.go +++ /dev/null @@ -1,124 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "syscall" - "unsafe" -) - -// -// LinkType -// - -type LinkType int - -const ( - Tracepoint LinkType = iota - RawTracepoint - Kprobe - Kretprobe - LSM - PerfEvent - Uprobe - Uretprobe - Tracing - XDP - Cgroup - CgroupLegacy - Netns - Iter -) - -// -// BPFLink -// - -type bpfLinkLegacy struct { - attachType BPFAttachType - cgroupDir string -} - -type BPFLink struct { - link *C.struct_bpf_link - prog *BPFProg - linkType LinkType - eventName string - legacy *bpfLinkLegacy // if set, this is a fake BPFLink -} - -func (l *BPFLink) DestroyLegacy(linkType LinkType) error { - switch l.linkType { - case CgroupLegacy: - return l.prog.DetachCgroupLegacy( - l.legacy.cgroupDir, - l.legacy.attachType, - ) - } - - return fmt.Errorf("unable to destroy legacy link") -} - -func (l *BPFLink) Destroy() error { - if l.legacy != nil { - return l.DestroyLegacy(l.linkType) - } - if retC := C.bpf_link__destroy(l.link); retC < 0 { - return syscall.Errno(-retC) - } - - l.link = nil - - return nil -} - -func (l *BPFLink) FileDescriptor() int { - return int(C.bpf_link__fd(l.link)) -} - -// Deprecated: use BPFLink.FileDescriptor() instead. -func (l *BPFLink) GetFd() int { - return l.FileDescriptor() -} - -func (l *BPFLink) Pin(pinPath string) error { - pathC := C.CString(pinPath) - defer C.free(unsafe.Pointer(pathC)) - - retC := C.bpf_link__pin(l.link, pathC) - if retC < 0 { - return fmt.Errorf("failed to pin link %s to path %s: %w", l.eventName, pinPath, syscall.Errno(-retC)) - } - - return nil -} - -func (l *BPFLink) Unpin() error { - retC := C.bpf_link__unpin(l.link) - if retC < 0 { - return fmt.Errorf("failed to unpin link %s: %w", l.eventName, syscall.Errno(-retC)) - } - - return nil -} - -// -// BPF Link Reader (low-level) -// - -func (l *BPFLink) Reader() (*BPFLinkReader, error) { - fdC := C.bpf_iter_create(C.int(l.FileDescriptor())) - if fdC < 0 { - return nil, fmt.Errorf("failed to create reader: %w", syscall.Errno(-fdC)) - } - - return &BPFLinkReader{ - l: l, - fd: int(fdC), - }, nil -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/logger_cb.go b/vendor/github.com/aquasecurity/libbpfgo/logger_cb.go deleted file mode 100644 index 2c66ddeb71..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/logger_cb.go +++ /dev/null @@ -1,71 +0,0 @@ -package libbpfgo - -/* -#include -*/ -import "C" - -import ( - "fmt" - "os" -) - -// This callback definition needs to be in a different file from where it is -// declared in C Otherwise, multiple definition compilation error will occur - -// loggerCallback is called by libbpf_print_fn() which in turn is called by -// libbpf -// -// revive:disable -// -//export loggerCallback -func loggerCallback(libbpfPrintLevel int, libbpfOutput *C.char) { - goOutput := C.GoString(libbpfOutput) - - for _, fnFilterOut := range callbacks.LogFilters { - if fnFilterOut != nil { - if fnFilterOut(libbpfPrintLevel, goOutput) { - return - } - } - } - - // pass received output to callback, leaving formatting to consumer - callbacks.Log(libbpfPrintLevel, goOutput) -} - -const ( - // libbpf print levels - LibbpfWarnLevel = int(C.LIBBPF_WARN) - LibbpfInfoLevel = int(C.LIBBPF_INFO) - LibbpfDebugLevel = int(C.LIBBPF_DEBUG) -) - -// Callbacks stores the callbacks to be used by libbpfgo -type Callbacks struct { - Log func(level int, msg string) - LogFilters []func(libLevel int, msg string) bool -} - -// callbacks is initialized with default callbacks, but can be changed by SetLoggerCbs -var callbacks = Callbacks{ - Log: logFallback, - LogFilters: []func(libLevel int, msg string) bool{}, -} - -// SetLoggerCbs receives Callbacks type to be used to log libbpf outputs and to filter out those outputs -func SetLoggerCbs(cbs Callbacks) { - if cbs.Log == nil { // guarantee that there is always an outputter - cbs.Log = logFallback - } - - callbacks = cbs -} - -// logFallback is the default logger callback -// - level is ignored -func logFallback(level int, msg string) { - fmt.Fprint(os.Stderr, msg) -} - -// revive:enable diff --git a/vendor/github.com/aquasecurity/libbpfgo/map-common.go b/vendor/github.com/aquasecurity/libbpfgo/map-common.go deleted file mode 100644 index 6526e28bd9..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/map-common.go +++ /dev/null @@ -1,207 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "syscall" -) - -// -// MapType -// - -type MapType uint32 - -const ( - MapTypeUnspec MapType = C.BPF_MAP_TYPE_UNSPEC - MapTypeHash MapType = C.BPF_MAP_TYPE_HASH - MapTypeArray MapType = C.BPF_MAP_TYPE_ARRAY - MapTypeProgArray MapType = C.BPF_MAP_TYPE_PROG_ARRAY - MapTypePerfEventArray MapType = C.BPF_MAP_TYPE_PERF_EVENT_ARRAY - MapTypePerCPUHash MapType = C.BPF_MAP_TYPE_PERCPU_HASH - MapTypePerCPUArray MapType = C.BPF_MAP_TYPE_PERCPU_ARRAY - MapTypeStackTrace MapType = C.BPF_MAP_TYPE_STACK_TRACE - MapTypeCgroupArray MapType = C.BPF_MAP_TYPE_CGROUP_ARRAY - MapTypeLRUHash MapType = C.BPF_MAP_TYPE_LRU_HASH - MapTypeLRUPerCPUHash MapType = C.BPF_MAP_TYPE_LRU_PERCPU_HASH - MapTypeLPMTrie MapType = C.BPF_MAP_TYPE_LPM_TRIE - MapTypeArrayOfMaps MapType = C.BPF_MAP_TYPE_ARRAY_OF_MAPS - MapTypeHashOfMaps MapType = C.BPF_MAP_TYPE_HASH_OF_MAPS - MapTypeDevMap MapType = C.BPF_MAP_TYPE_DEVMAP - MapTypeSockMap MapType = C.BPF_MAP_TYPE_SOCKMAP - MapTypeCPUMap MapType = C.BPF_MAP_TYPE_CPUMAP - MapTypeXSKMap MapType = C.BPF_MAP_TYPE_XSKMAP - MapTypeSockHash MapType = C.BPF_MAP_TYPE_SOCKHASH - MapTypeCgroupStorage MapType = C.BPF_MAP_TYPE_CGROUP_STORAGE - MapTypeReusePortSockArray MapType = C.BPF_MAP_TYPE_REUSEPORT_SOCKARRAY - MapTypePerCPUCgroupStorage MapType = C.BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE - MapTypeQueue MapType = C.BPF_MAP_TYPE_QUEUE - MapTypeStack MapType = C.BPF_MAP_TYPE_STACK - MapTypeSKStorage MapType = C.BPF_MAP_TYPE_SK_STORAGE - MapTypeDevmapHash MapType = C.BPF_MAP_TYPE_DEVMAP_HASH - MapTypeStructOps MapType = C.BPF_MAP_TYPE_STRUCT_OPS - MapTypeRingbuf MapType = C.BPF_MAP_TYPE_RINGBUF - MapTypeInodeStorage MapType = C.BPF_MAP_TYPE_INODE_STORAGE - MapTypeTaskStorage MapType = C.BPF_MAP_TYPE_TASK_STORAGE - MapTypeBloomFilter MapType = C.BPF_MAP_TYPE_BLOOM_FILTER -) - -var mapTypeToString = map[MapType]string{ - MapTypeUnspec: "BPF_MAP_TYPE_UNSPEC", - MapTypeHash: "BPF_MAP_TYPE_HASH", - MapTypeArray: "BPF_MAP_TYPE_ARRAY", - MapTypeProgArray: "BPF_MAP_TYPE_PROG_ARRAY", - MapTypePerfEventArray: "BPF_MAP_TYPE_PERF_EVENT_ARRAY", - MapTypePerCPUHash: "BPF_MAP_TYPE_PERCPU_HASH", - MapTypePerCPUArray: "BPF_MAP_TYPE_PERCPU_ARRAY", - MapTypeStackTrace: "BPF_MAP_TYPE_STACK_TRACE", - MapTypeCgroupArray: "BPF_MAP_TYPE_CGROUP_ARRAY", - MapTypeLRUHash: "BPF_MAP_TYPE_LRU_HASH", - MapTypeLRUPerCPUHash: "BPF_MAP_TYPE_LRU_PERCPU_HASH", - MapTypeLPMTrie: "BPF_MAP_TYPE_LPM_TRIE", - MapTypeArrayOfMaps: "BPF_MAP_TYPE_ARRAY_OF_MAPS", - MapTypeHashOfMaps: "BPF_MAP_TYPE_HASH_OF_MAPS", - MapTypeDevMap: "BPF_MAP_TYPE_DEVMAP", - MapTypeSockMap: "BPF_MAP_TYPE_SOCKMAP", - MapTypeCPUMap: "BPF_MAP_TYPE_CPUMAP", - MapTypeXSKMap: "BPF_MAP_TYPE_XSKMAP", - MapTypeSockHash: "BPF_MAP_TYPE_SOCKHASH", - MapTypeCgroupStorage: "BPF_MAP_TYPE_CGROUP_STORAGE", - MapTypeReusePortSockArray: "BPF_MAP_TYPE_REUSEPORT_SOCKARRAY", - MapTypePerCPUCgroupStorage: "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE", - MapTypeQueue: "BPF_MAP_TYPE_QUEUE", - MapTypeStack: "BPF_MAP_TYPE_STACK", - MapTypeSKStorage: "BPF_MAP_TYPE_SK_STORAGE", - MapTypeDevmapHash: "BPF_MAP_TYPE_DEVMAP_HASH", - MapTypeStructOps: "BPF_MAP_TYPE_STRUCT_OPS", - MapTypeRingbuf: "BPF_MAP_TYPE_RINGBUF", - MapTypeInodeStorage: "BPF_MAP_TYPE_INODE_STORAGE", - MapTypeTaskStorage: "BPF_MAP_TYPE_TASK_STORAGE", - MapTypeBloomFilter: "BPF_MAP_TYPE_BLOOM_FILTER", -} - -func (t MapType) String() string { - str, ok := mapTypeToString[t] - if !ok { - // MapTypeUnspec must exist in mapTypeToString to avoid infinite recursion. - return BPFProgTypeUnspec.String() - } - - return str -} - -func (t MapType) Name() string { - return C.GoString(C.libbpf_bpf_map_type_str(C.enum_bpf_map_type(t))) -} - -// -// MapFlag -// - -type MapFlag uint32 - -const ( - MapFlagUpdateAny MapFlag = iota // create new element or update existing - MapFlagUpdateNoExist // create new element if it didn't exist - MapFlagUpdateExist // update existing element - MapFlagFLock // spin_lock-ed map_lookup/map_update -) - -// -// BPFMapInfo -// - -// BPFMapInfo mirrors the C structure bpf_map_info. -type BPFMapInfo struct { - Type MapType - ID uint32 - KeySize uint32 - ValueSize uint32 - MaxEntries uint32 - MapFlags uint32 - Name string - IfIndex uint32 - BTFVmlinuxValueTypeID uint32 - NetnsDev uint64 - NetnsIno uint64 - BTFID uint32 - BTFKeyTypeID uint32 - BTFValueTypeID uint32 - MapExtra uint64 -} - -// GetMapFDByID returns a file descriptor for the map with the given ID. -func GetMapFDByID(id uint32) (int, error) { - fdC := C.bpf_map_get_fd_by_id(C.uint(id)) - if fdC < 0 { - return int(fdC), fmt.Errorf("could not find map id %d: %w", id, syscall.Errno(-fdC)) - } - - return int(fdC), nil -} - -// GetMapInfoByFD returns the BPFMapInfo for the map with the given file descriptor. -func GetMapInfoByFD(fd int) (*BPFMapInfo, error) { - infoC := C.cgo_bpf_map_info_new() - defer C.cgo_bpf_map_info_free(infoC) - - infoLenC := C.cgo_bpf_map_info_size() - retC := C.bpf_map_get_info_by_fd(C.int(fd), infoC, &infoLenC) - if retC < 0 { - return nil, fmt.Errorf("failed to get map info for fd %d: %w", fd, syscall.Errno(-retC)) - } - - return &BPFMapInfo{ - Type: MapType(C.cgo_bpf_map_info_type(infoC)), - ID: uint32(C.cgo_bpf_map_info_id(infoC)), - KeySize: uint32(C.cgo_bpf_map_info_key_size(infoC)), - ValueSize: uint32(C.cgo_bpf_map_info_value_size(infoC)), - MaxEntries: uint32(C.cgo_bpf_map_info_max_entries(infoC)), - MapFlags: uint32(C.cgo_bpf_map_info_map_flags(infoC)), - Name: C.GoString(C.cgo_bpf_map_info_name(infoC)), - IfIndex: uint32(C.cgo_bpf_map_info_ifindex(infoC)), - BTFVmlinuxValueTypeID: uint32(C.cgo_bpf_map_info_btf_vmlinux_value_type_id(infoC)), - NetnsDev: uint64(C.cgo_bpf_map_info_netns_dev(infoC)), - NetnsIno: uint64(C.cgo_bpf_map_info_netns_ino(infoC)), - BTFID: uint32(C.cgo_bpf_map_info_btf_id(infoC)), - BTFKeyTypeID: uint32(C.cgo_bpf_map_info_btf_key_type_id(infoC)), - BTFValueTypeID: uint32(C.cgo_bpf_map_info_btf_value_type_id(infoC)), - MapExtra: uint64(C.cgo_bpf_map_info_map_extra(infoC)), - }, nil -} - -// -// Map misc internal -// - -// calcMapValueSize calculates the size of the value for a map. -// For per-CPU maps, it is calculated based on the number of possible CPUs. -func calcMapValueSize(valueSize int, mapType MapType) (int, error) { - if valueSize <= 0 { - return 0, fmt.Errorf("value size must be greater than 0") - } - - switch mapType { - case MapTypePerCPUArray, - MapTypePerCPUHash, - MapTypeLRUPerCPUHash, - MapTypePerCPUCgroupStorage: - // per-CPU maps have a value size calculated using a round-up of the - // element size multiplied by the number of possible CPUs. - elemSize := roundUp(uint64(valueSize), 8) - numCPU, err := NumPossibleCPUs() - if err != nil { - return 0, err - } - - return int(elemSize) * numCPU, nil - default: - // For other maps, the value size does not change. - return valueSize, nil - } -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/map-iterator.go b/vendor/github.com/aquasecurity/libbpfgo/map-iterator.go deleted file mode 100644 index bf694dadff..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/map-iterator.go +++ /dev/null @@ -1,66 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "syscall" - "unsafe" -) - -// -// BPFMapIterator (low-level API) -// - -// BPFMapIterator iterates over keys in a BPF map. -type BPFMapIterator struct { - mapFD int - keySize int - err error - prev []byte - next []byte -} - -// Next advances the iterator to the next key in the map. -func (it *BPFMapIterator) Next() bool { - if it.err != nil { - return false - } - - prevPtr := unsafe.Pointer(nil) - if it.next != nil { - prevPtr = unsafe.Pointer(&it.next[0]) - } - - next := make([]byte, it.keySize) - nextPtr := unsafe.Pointer(&next[0]) - - retC := C.bpf_map_get_next_key(C.int(it.mapFD), prevPtr, nextPtr) - if retC < 0 { - if err := syscall.Errno(-retC); err != syscall.ENOENT { - it.err = err - } - - return false - } - - it.prev = it.next - it.next = next - - return true -} - -// Key returns the current key value of the iterator, if the most recent call -// to Next returned true. -// The slice is valid only until the next call to Next. -func (it *BPFMapIterator) Key() []byte { - return it.next -} - -// Err returns the last error that ocurred while table.Iter or iter.Next. -func (it *BPFMapIterator) Err() error { - return it.err -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/map-low.go b/vendor/github.com/aquasecurity/libbpfgo/map-low.go deleted file mode 100644 index 523a856850..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/map-low.go +++ /dev/null @@ -1,476 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "errors" - "fmt" - "syscall" - "unsafe" -) - -// -// BPFMapLow (low-level API) -// - -// BPFMapLow provides a low-level interface to BPF maps. -// Its methods follow the BPFMap naming convention. -type BPFMapLow struct { - fd int - info *BPFMapInfo -} - -// BPFMapCreateOpts mirrors the C structure bpf_map_create_opts. -type BPFMapCreateOpts struct { - BTFFD uint32 - BTFKeyTypeID uint32 - BTFValueTypeID uint32 - BTFVmlinuxValueTypeID uint32 - InnerMapFD uint32 - MapFlags uint32 - MapExtra uint64 - NumaNode uint32 - MapIfIndex uint32 -} - -// CreateMap creates a new BPF map with the given parameters. -func CreateMap(mapType MapType, mapName string, keySize, valueSize, maxEntries int, opts *BPFMapCreateOpts) (*BPFMapLow, error) { - mapNameC := C.CString(mapName) - defer C.free(unsafe.Pointer(mapNameC)) - var optsC *C.struct_bpf_map_create_opts - var errno error - - if opts != nil { - optsC, errno = C.cgo_bpf_map_create_opts_new( - C.uint(opts.BTFFD), - C.uint(opts.BTFKeyTypeID), - C.uint(opts.BTFValueTypeID), - C.uint(opts.BTFVmlinuxValueTypeID), - C.uint(opts.InnerMapFD), - C.uint(opts.MapFlags), - C.ulonglong(opts.MapExtra), - C.uint(opts.NumaNode), - C.uint(opts.MapIfIndex), - ) - if optsC == nil { - return nil, fmt.Errorf("failed to create bpf_map_create_opts: %w", errno) - } - defer C.cgo_bpf_map_create_opts_free(optsC) - } - - fdC := C.bpf_map_create(uint32(mapType), mapNameC, C.uint(keySize), C.uint(valueSize), C.uint(maxEntries), optsC) - if fdC < 0 { - return nil, fmt.Errorf("could not create map %s: %w", mapName, syscall.Errno(-fdC)) - } - - info, errInfo := GetMapInfoByFD(int(fdC)) - if errInfo != nil { - if errClose := syscall.Close(int(fdC)); errClose != nil { - return nil, fmt.Errorf("could not create map %s: %w, %v", mapName, errInfo, errClose) - } - - return nil, fmt.Errorf("could not create map %s: %w", mapName, errInfo) - } - - return &BPFMapLow{ - fd: int(fdC), - info: info, - }, nil -} - -// GetMapByID returns a BPFMapLow instance for the map with the given ID. -func GetMapByID(id uint32) (*BPFMapLow, error) { - fd, err := GetMapFDByID(id) - if err != nil { - return nil, err - } - - info, err := GetMapInfoByFD(fd) - if err != nil { - return nil, err - } - - return &BPFMapLow{ - fd: fd, - info: info, - }, nil -} - -// GetMapNextID retrieves the next available map ID after the given startID. -// It returns the next map ID and an error if one occurs during the operation. -func GetMapNextID(startId uint32) (uint32, error) { - startIDC := C.uint(startId) - retC := C.bpf_map_get_next_id(startIDC, &startIDC) - if retC == 0 { - return uint32(startIDC), nil - } - - return uint32(startIDC), fmt.Errorf("failed to get next map id: %w", syscall.Errno(-retC)) -} - -// GetMapsIDsByName searches for maps with a specified name and collects their IDs. -// It starts the search from the given 'startId' and continues until no more matching maps are found. -// The function returns a slice of unsigned 32-bit integers representing the IDs of matching maps. -// If no maps with the provided 'name' are found, it returns an empty slice and no error. -// The 'startId' is modified and returned as the last processed map ID. -// -// Example Usage: -// -// name := "myMap" // The name of the map you want to find. -// startId := uint32(0) // The map ID to start the search from. -// -// var mapIDs []uint32 // Initialize an empty slice to collect map IDs. -// var err error // Initialize an error variable. -// -// // Retry mechanism in case of errors using the last processed 'startId'. -// for { -// mapIDs, err = GetMapsIDsByName(name, startId) -// if err != nil { -// // Handle other errors, possibly with a retry mechanism. -// // You can use the 'startId' who contains the last processed map ID to continue the search. -// } else { -// // Successful search, use the 'mapIDs' slice containing the IDs of matching maps. -// // Update 'startId' to the last processed map ID to continue the search. -// } -// } -func GetMapsIDsByName(name string, startId *uint32) ([]uint32, error) { - var ( - bpfMapsIds []uint32 - err error - ) - - for { - *startId, err = GetMapNextID(*startId) - if err != nil { - if errors.Is(err, syscall.ENOENT) { - return bpfMapsIds, nil - } - - return bpfMapsIds, err - } - - bpfMapLow, err := GetMapByID(*startId) - if err != nil { - return bpfMapsIds, err - } - - if err := syscall.Close(bpfMapLow.FileDescriptor()); err != nil { - return bpfMapsIds, err - } - - if bpfMapLow.Name() != name { - continue - } - - bpfMapsIds = append(bpfMapsIds, bpfMapLow.info.ID) - } -} - -// -// BPFMapLow Specs -// - -func (m *BPFMapLow) FileDescriptor() int { - return m.fd -} - -func (m *BPFMapLow) ReuseFD(fd int) error { - info, err := GetMapInfoByFD(fd) - if err != nil { - return fmt.Errorf("failed to reuse fd %d: %w", fd, err) - } - - newFD, err := syscall.Open("/", syscall.O_RDONLY|syscall.O_CLOEXEC, 0) - if err != nil { - return fmt.Errorf("failed to reuse fd %d: %w", fd, err) - } - - err = syscall.Dup3(fd, newFD, syscall.O_CLOEXEC) - if err != nil { - _ = syscall.Close(newFD) - return fmt.Errorf("failed to reuse fd %d: %w", fd, err) - } - - err = syscall.Close(m.FileDescriptor()) - if err != nil { - _ = syscall.Close(newFD) - return fmt.Errorf("failed to reuse fd %d: %w", fd, err) - } - - m.fd = newFD - m.info = info - - return nil -} - -func (m *BPFMapLow) Name() string { - return m.info.Name -} - -func (m *BPFMapLow) Type() MapType { - return MapType(m.info.Type) -} - -func (m *BPFMapLow) MaxEntries() uint32 { - return m.info.MaxEntries -} - -// TODO: implement `bpf_map__map_flags` -// func (m *BPFMapLow) MapFlags() MapFlag { -// } - -// TODO: implement `bpf_map__numa_node` -// func (m *BPFMapLow) NUMANode() uint32 { -// } - -func (m *BPFMapLow) KeySize() int { - return int(m.info.KeySize) -} - -func (m *BPFMapLow) ValueSize() int { - return int(m.info.ValueSize) -} - -// TODO: implement `bpf_map__btf_key_type_id` -// func (m *BPFMapLow) BTFKeyTypeID() uint32 { -// } - -// TODO: implement `bpf_map__btf_value_type_id` -// func (m *BPFMapLow) BTFValueTypeID() uint32 { -// } - -// TODO: implement `bpf_map__ifindex` -// func (m *BPFMapLow) IfIndex() uint32 { -// } - -// TODO: implement `bpf_map__map_extra` -// func (m *BPFMapLow) MapExtra() uint64 { -// } - -// -// BPFMapLow Operations -// - -func (m *BPFMapLow) GetValue(key unsafe.Pointer) ([]byte, error) { - return m.GetValueFlags(key, MapFlagUpdateAny) -} - -func (m *BPFMapLow) GetValueFlags(key unsafe.Pointer, flags MapFlag) ([]byte, error) { - valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) - if err != nil { - return nil, fmt.Errorf("map %s %w", m.Name(), err) - } - - value := make([]byte, valueSize) - retC := C.bpf_map_lookup_elem_flags( - C.int(m.FileDescriptor()), - key, - unsafe.Pointer(&value[0]), - C.ulonglong(flags), - ) - if retC < 0 { - return nil, fmt.Errorf("failed to lookup value %v in map %s: %w", key, m.Name(), syscall.Errno(-retC)) - } - - return value, nil -} - -// TODO: implement `bpf_map__lookup_and_delete_elem` -// func (m *BPFMapLow) GetValueAndDeleteKey(key unsafe.Pointer) ([]byte, error) { -// } - -func (m *BPFMapLow) Update(key, value unsafe.Pointer) error { - return m.UpdateValueFlags(key, value, MapFlagUpdateAny) -} - -func (m *BPFMapLow) UpdateValueFlags(key, value unsafe.Pointer, flags MapFlag) error { - retC := C.bpf_map_update_elem( - C.int(m.FileDescriptor()), - key, - value, - C.ulonglong(flags), - ) - if retC < 0 { - return fmt.Errorf("failed to update map %s: %w", m.Name(), syscall.Errno(-retC)) - } - - return nil -} - -func (m *BPFMapLow) DeleteKey(key unsafe.Pointer) error { - retC := C.bpf_map_delete_elem(C.int(m.FileDescriptor()), key) - if retC < 0 { - return fmt.Errorf("failed to delete key %d in map %s: %w", key, m.Name(), syscall.Errno(-retC)) - } - - return nil -} - -// TODO: implement `bpf_map__get_next_key` -// func (m *BPFMapLow) GetNextKey(key unsafe.Pointer) (unsafe.Pointer, error) { -// } - -// -// BPFMapLow Batch Operations -// - -func (m *BPFMapLow) GetValueBatch(keys unsafe.Pointer, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { - valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) - if err != nil { - return nil, fmt.Errorf("map %s %w", m.Name(), err) - } - - var ( - values = make([]byte, valueSize*int(count)) - valuesPtr = unsafe.Pointer(&values[0]) - countC = C.uint(count) - ) - - optsC, errno := C.cgo_bpf_map_batch_opts_new(C.BPF_ANY, C.BPF_ANY) - if optsC == nil { - return nil, fmt.Errorf("failed to create bpf_map_batch_opts: %w", errno) - } - defer C.cgo_bpf_map_batch_opts_free(optsC) - - // The batch APIs are a bit different in which they can return an error, but - // depending on the errno code, it might mean a complete error (nothing was - // done) or a partial success (some elements were processed). - // - // - On complete sucess, it will return 0, and errno won't be set. - // - On partial sucess, it will return -1, and errno will be set to ENOENT. - // - On error, it will return -1, and an errno different to ENOENT. - retC := C.bpf_map_lookup_batch( - C.int(m.FileDescriptor()), - startKey, - nextKey, - keys, - valuesPtr, - &countC, - optsC, - ) - errno = syscall.Errno(-retC) - if retC < 0 && errno != syscall.ENOENT { - return nil, fmt.Errorf("failed to batch get value %v in map %s: %w", keys, m.Name(), errno) - } - - // Either some or all entries were read. - // retC < 0 && errno == syscall.ENOENT indicates a partial read. - return collectBatchValues(values, uint32(countC), valueSize), nil -} - -func (m *BPFMapLow) GetValueAndDeleteBatch(keys, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { - valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) - if err != nil { - return nil, fmt.Errorf("map %s %w", m.Name(), err) - } - - var ( - values = make([]byte, valueSize*int(count)) - valuesPtr = unsafe.Pointer(&values[0]) - countC = C.uint(count) - ) - - optsC, errno := C.cgo_bpf_map_batch_opts_new(C.BPF_ANY, C.BPF_ANY) - if optsC == nil { - return nil, fmt.Errorf("failed to create bpf_map_batch_opts: %w", errno) - } - defer C.cgo_bpf_map_batch_opts_free(optsC) - - retC := C.bpf_map_lookup_and_delete_batch( - C.int(m.FileDescriptor()), - startKey, - nextKey, - keys, - valuesPtr, - &countC, - optsC, - ) - errno = syscall.Errno(-retC) - if retC < 0 && errno != syscall.ENOENT { - return nil, fmt.Errorf("failed to batch lookup and delete values %v in map %s: %w", keys, m.Name(), errno) - } - - // Either some or all entries were read and deleted. - // retC < 0 && errno == syscall.ENOENT indicates a partial read and delete. - return collectBatchValues(values, uint32(countC), valueSize), nil -} - -func (m *BPFMapLow) UpdateBatch(keys, values unsafe.Pointer, count uint32) error { - countC := C.uint(count) - - optsC, errno := C.cgo_bpf_map_batch_opts_new(C.BPF_ANY, C.BPF_ANY) - if optsC == nil { - return fmt.Errorf("failed to create bpf_map_batch_opts: %w", errno) - } - defer C.cgo_bpf_map_batch_opts_free(optsC) - - retC := C.bpf_map_update_batch( - C.int(m.FileDescriptor()), - keys, - values, - &countC, - optsC, - ) - errno = syscall.Errno(-retC) - if retC < 0 { - if errno != syscall.EFAULT && uint32(countC) != count { - return fmt.Errorf("failed to update ALL elements in map %s, updated (%d/%d): %w", m.Name(), uint32(countC), count, errno) - } - return fmt.Errorf("failed to batch update elements in map %s: %w", m.Name(), errno) - } - - return nil -} - -func (m *BPFMapLow) DeleteKeyBatch(keys unsafe.Pointer, count uint32) error { - countC := C.uint(count) - - optsC, errno := C.cgo_bpf_map_batch_opts_new(C.BPF_ANY, C.BPF_ANY) - if optsC == nil { - return fmt.Errorf("failed to create bpf_map_batch_opts: %w", errno) - } - defer C.cgo_bpf_map_batch_opts_free(optsC) - - retC := C.bpf_map_delete_batch( - C.int(m.FileDescriptor()), - keys, - &countC, - optsC, - ) - errno = syscall.Errno(-retC) - if retC < 0 && errno != syscall.ENOENT { - return fmt.Errorf("failed to batch delete keys %v in map %s: %w", keys, m.Name(), errno) - } - - // retC < 0 && errno == syscall.ENOENT indicates a partial deletion. - return nil -} - -func collectBatchValues(values []byte, count uint32, valueSize int) [][]byte { - var value []byte - var collected [][]byte - - for i := 0; i < int(count*uint32(valueSize)); i += valueSize { - value = values[i : i+valueSize] - collected = append(collected, value) - } - - return collected -} - -// -// BPFMapLow Iterator -// - -func (m *BPFMapLow) Iterator() *BPFMapIterator { - return &BPFMapIterator{ - mapFD: m.FileDescriptor(), - keySize: m.KeySize(), - prev: nil, - next: nil, - } -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/map.go b/vendor/github.com/aquasecurity/libbpfgo/map.go deleted file mode 100644 index f9e4c0e50a..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/map.go +++ /dev/null @@ -1,576 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "syscall" - "unsafe" -) - -// -// BPFMap (high-level API - `bpf_map__*`) -// - -// BPFMap is a wrapper around a libbpf bpf_map. -type BPFMap struct { - bpfMap *C.struct_bpf_map - bpfMapLow *BPFMapLow - module *Module -} - -// -// BPFMap Specs -// - -func (m *BPFMap) Module() *Module { - return m.module -} - -// Deprecated: use BPFMap.Module() instead. -func (m *BPFMap) GetModule() *Module { - return m.Module() -} - -func (m *BPFMap) FileDescriptor() int { - return int(C.bpf_map__fd(m.bpfMap)) -} - -// Deprecated: use BPFMap.FileDescriptor() instead. -func (m *BPFMap) GetFd() int { - return m.FileDescriptor() -} - -// ReuseFD associates the BPFMap instance with the provided map file descriptor. -// -// This function is useful for reusing a map that was previously created by a -// different process. By passing the file descriptor of the existing map, the -// current BPFMap instance becomes linked to that map. -// -// NOTE: The function closes the current file descriptor associated with the -// BPFMap instance and replaces it with a duplicated descriptor pointing to the -// given fd. As a result, the instance original file descriptor becomes invalid, -// and all associated information is overwritten. -func (m *BPFMap) ReuseFD(fd int) error { - retC := C.bpf_map__reuse_fd(m.bpfMap, C.int(fd)) - if retC < 0 { - return fmt.Errorf("failed to reuse fd %d: %w", fd, syscall.Errno(-retC)) - } - - newFD := m.FileDescriptor() - info, err := GetMapInfoByFD(newFD) - if err != nil { - return err - } - - m.bpfMapLow.fd = newFD - m.bpfMapLow.info = info - - return nil -} - -func (m *BPFMap) Name() string { - return C.GoString(C.bpf_map__name(m.bpfMap)) -} - -// Deprecated: use BPFMap.Name() instead. -func (m *BPFMap) GetName() string { - return m.Name() -} - -func (m *BPFMap) Type() MapType { - return MapType(C.bpf_map__type(m.bpfMap)) -} - -// SetType assigns a specific type to a BPFMap instance that is not yet associated -// with a file descriptor. -func (m *BPFMap) SetType(mapType MapType) error { - retC := C.bpf_map__set_type(m.bpfMap, C.enum_bpf_map_type(int(mapType))) - if retC < 0 { - return fmt.Errorf("could not set bpf map type: %w", syscall.Errno(-retC)) - } - - return nil -} - -// MaxEntries returns the capacity of the BPFMap. -// -// For ring and perf buffer types, this returns the capacity in bytes. -func (m *BPFMap) MaxEntries() uint32 { - return uint32(C.bpf_map__max_entries(m.bpfMap)) -} - -// Deprecated: use BPFMap.MaxEntries() instead. -func (m *BPFMap) GetMaxEntries() uint32 { - return m.MaxEntries() -} - -// SetMaxEntries sets the capacity of the BPFMap to the given maxEntries value. -// -// This function must be called after BPF module initialization and before loading -// the module with BPFLoadObject, enabling customization of the map capacity. -// -// For ring and perf buffer types, maxEntries represents the capacity in bytes. -func (m *BPFMap) SetMaxEntries(maxEntries uint32) error { - retC := C.bpf_map__set_max_entries(m.bpfMap, C.uint(maxEntries)) - if retC < 0 { - return fmt.Errorf("failed to set map %s max entries to %v: %w", m.Name(), maxEntries, syscall.Errno(-retC)) - } - - return nil -} - -// Deprecated: use BPFMap.SetMaxEntries() instead. -func (m *BPFMap) Resize(maxEntries uint32) error { - return m.SetMaxEntries(maxEntries) -} - -func (m *BPFMap) MapFlags() MapFlag { - return MapFlag(C.bpf_map__map_flags(m.bpfMap)) -} - -// TODO: implement `bpf_map__set_map_flags` wrapper -// func (m *BPFMap) SetMapFlags(flags MapFlag) error { -// } - -// TODO: implement `bpf_map__numa_node` wrapper -// func (m *BPFMap) NUMANode() uint32 { -// } - -// TODO: implement `bpf_map__set_numa_node` wrapper -// func (m *BPFMap) SetNUMANode(node uint32) error { -// } - -func (m *BPFMap) KeySize() int { - return int(C.bpf_map__key_size(m.bpfMap)) -} - -// SetKeySize sets the key size to a BPFMap instance that is not yet associated -// with a file descriptor. -func (m *BPFMap) SetKeySize(size uint32) error { - retC := C.bpf_map__set_key_size(m.bpfMap, C.uint(size)) - if retC < 0 { - return fmt.Errorf("could not set map key size: %w", syscall.Errno(-retC)) - } - - return nil -} - -func (m *BPFMap) ValueSize() int { - return int(C.bpf_map__value_size(m.bpfMap)) -} - -// SetValueSize sets the value size to a BPFMap instance that is not yet associated -// with a file descriptor. -func (m *BPFMap) SetValueSize(size uint32) error { - retC := C.bpf_map__set_value_size(m.bpfMap, C.uint(size)) - if retC < 0 { - return fmt.Errorf("could not set map value size: %w", syscall.Errno(-retC)) - } - - return nil -} - -func (m *BPFMap) Autocreate() bool { - return bool(C.bpf_map__autocreate(m.bpfMap)) -} - -// Autocreate sets whether libbpf has to auto-create BPF map during BPF object -// load phase. -func (m *BPFMap) SetAutocreate(autocreate bool) error { - retC := C.bpf_map__set_autocreate(m.bpfMap, C.bool(autocreate)) - if retC < 0 { - return fmt.Errorf("could not set map autocreate: %w", syscall.Errno(-retC)) - } - - return nil -} - -func (m *BPFMap) BTFKeyTypeID() uint32 { - return uint32(C.bpf_map__btf_key_type_id(m.bpfMap)) -} - -func (m *BPFMap) BTFValueTypeID() uint32 { - return uint32(C.bpf_map__btf_value_type_id(m.bpfMap)) -} - -func (m *BPFMap) IfIndex() uint32 { - return uint32(C.bpf_map__ifindex(m.bpfMap)) -} - -// TODO: implement `bpf_map__set_ifindex` wrapper -// func (m *BPFMap) SetIfIndex(ifIndex uint32) error { -// } - -func (m *BPFMap) MapExtra() uint64 { - return uint64(C.bpf_map__map_extra(m.bpfMap)) -} - -// TODO: implement `bpf_map__set_map_extra` wrapper -// func (m *BPFMap) SetMapExtra(extra uint64) error { -// } - -func (m *BPFMap) InitialValue() ([]byte, error) { - valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) - if err != nil { - return nil, fmt.Errorf("map %s %w", m.Name(), err) - } - - value := make([]byte, valueSize) - C.cgo_bpf_map__initial_value(m.bpfMap, unsafe.Pointer(&value[0])) - - return value, nil -} - -func (m *BPFMap) SetInitialValue(value unsafe.Pointer) error { - valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) - if err != nil { - return fmt.Errorf("map %s %w", m.Name(), err) - } - - retC := C.bpf_map__set_initial_value(m.bpfMap, value, C.ulong(valueSize)) - if retC < 0 { - return fmt.Errorf("failed to set inital value for map %s: %w", m.Name(), syscall.Errno(-retC)) - } - - return nil -} - -// TODO: implement `bpf_map__is_internal` wrapper -// func (m *BPFMap) IsInternal() bool { -// } - -// -// BPFMap Pinning -// - -func (m *BPFMap) PinPath() string { - return C.GoString(C.bpf_map__pin_path(m.bpfMap)) -} - -// Deprecated: use BPFMap.PinPath() instead. -func (m *BPFMap) GetPinPath() string { - return m.PinPath() -} - -func (m *BPFMap) SetPinPath(pinPath string) error { - pathC := C.CString(pinPath) - defer C.free(unsafe.Pointer(pathC)) - - retC := C.bpf_map__set_pin_path(m.bpfMap, pathC) - if retC < 0 { - return fmt.Errorf("failed to set pin for map %s to path %s: %w", m.Name(), pinPath, syscall.Errno(-retC)) - } - - return nil -} - -func (m *BPFMap) IsPinned() bool { - return bool(C.bpf_map__is_pinned(m.bpfMap)) -} - -func (m *BPFMap) Pin(pinPath string) error { - pathC := C.CString(pinPath) - defer C.free(unsafe.Pointer(pathC)) - - retC := C.bpf_map__pin(m.bpfMap, pathC) - if retC < 0 { - return fmt.Errorf("failed to pin map %s to path %s: %w", m.Name(), pinPath, syscall.Errno(-retC)) - } - - return nil -} - -func (m *BPFMap) Unpin(pinPath string) error { - pathC := C.CString(pinPath) - defer C.free(unsafe.Pointer(pathC)) - - retC := C.bpf_map__unpin(m.bpfMap, pathC) - if retC < 0 { - return fmt.Errorf("failed to unpin map %s from path %s: %w", m.Name(), pinPath, syscall.Errno(-retC)) - } - - return nil -} - -// -// BPFMap Map of Maps -// - -// InnerMap retrieves the inner map prototype information associated with a -// BPFMap that represents a map of maps. -// -// NOTE: It must be called before the module is loaded, since it is a prototype -// destroyed right after the outer map is created. -// -// Reference: -// https://lore.kernel.org/bpf/20200429002739.48006-4-andriin@fb.com/ -func (m *BPFMap) InnerMapInfo() (*BPFMapInfo, error) { - innerMapC, errno := C.bpf_map__inner_map(m.bpfMap) - if innerMapC == nil { - return nil, fmt.Errorf("failed to get inner map for %s: %w", m.Name(), errno) - } - - innerBPFMap := &BPFMap{ - bpfMap: innerMapC, - module: m.module, - } - - return &BPFMapInfo{ - // as it is a prototype, some values are not available - Type: innerBPFMap.Type(), - ID: 0, - KeySize: uint32(innerBPFMap.KeySize()), - ValueSize: uint32(innerBPFMap.ValueSize()), - MaxEntries: innerBPFMap.MaxEntries(), - MapFlags: uint32(innerBPFMap.MapFlags()), - Name: innerBPFMap.Name(), - IfIndex: innerBPFMap.IfIndex(), - BTFVmlinuxValueTypeID: 0, - NetnsDev: 0, - NetnsIno: 0, - BTFID: 0, - BTFKeyTypeID: innerBPFMap.BTFKeyTypeID(), - BTFValueTypeID: innerBPFMap.BTFValueTypeID(), - MapExtra: innerBPFMap.MapExtra(), - }, nil -} - -// SetInnerMap configures the inner map prototype for a BPFMap that represents -// a map of maps. -// -// This function accepts the file descriptor of another map, which will serve as -// a prototype. -// -// NOTE: It must be called before the module is loaded. -func (m *BPFMap) SetInnerMap(templateMapFD int) error { - if templateMapFD < 0 { - return fmt.Errorf("invalid inner map fd %d", templateMapFD) - } - - retC := C.bpf_map__set_inner_map_fd(m.bpfMap, C.int(templateMapFD)) - if retC < 0 { - return fmt.Errorf("failed to set inner map for %s: %w", m.Name(), syscall.Errno(-retC)) - } - - return nil -} - -// -// BPFMap Operations -// - -// GetValue retrieves the value associated with a given key in the BPFMap. -// -// This function accepts an unsafe.Pointer to the key value to be searched -// in the map, and it returns the corresponding value as a slice of bytes. -// All basic types, and structs are supported as keys. -// -// NOTE: Slices and arrays are supported, but references should point to the first -// element in the slice or array, instead of the slice or array itself. This is -// crucial to prevent undefined behavior. -// -// For example: -// -// key := []byte{'a', 'b', 'c'} -// keyPtr := unsafe.Pointer(&key[0]) -// bpfmap.GetValue(keyPtr) -func (m *BPFMap) GetValue(key unsafe.Pointer) ([]byte, error) { - return m.GetValueFlags(key, MapFlagUpdateAny) -} - -func (m *BPFMap) GetValueFlags(key unsafe.Pointer, flags MapFlag) ([]byte, error) { - valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) - if err != nil { - return nil, fmt.Errorf("map %s %w", m.Name(), err) - } - - value := make([]byte, valueSize) - retC := C.bpf_map__lookup_elem( - m.bpfMap, - key, - C.ulong(m.KeySize()), - unsafe.Pointer(&value[0]), - C.ulong(valueSize), - C.ulonglong(flags), - ) - if retC < 0 { - return nil, fmt.Errorf("failed to lookup value %v in map %s: %w", key, m.Name(), syscall.Errno(-retC)) - } - - return value, nil -} - -// TODO: implement `bpf_map__lookup_and_delete_elem` wrapper -// func (m *BPFMap) GetValueAndDeleteKey(key unsafe.Pointer) ([]byte, error) { -// } - -// Deprecated: use BPFMap.GetValue() or BPFMap.GetValueFlags() instead, since -// they already calculate the value size for per-cpu maps. -func (m *BPFMap) GetValueReadInto(key unsafe.Pointer, value *[]byte) error { - valuePtr := unsafe.Pointer(&(*value)[0]) - retC := C.bpf_map__lookup_elem(m.bpfMap, key, C.ulong(m.KeySize()), valuePtr, C.ulong(len(*value)), 0) - if retC < 0 { - return fmt.Errorf("failed to lookup value %v in map %s: %w", key, m.Name(), syscall.Errno(-retC)) - } - - return nil -} - -// Update inserts or updates value in BPFMap that corresponds to a given key. -// -// This function accepts unsafe.Pointer references to both the key and value. -// All basic types, and structs are supported. -// -// NOTE: Slices and arrays are supported, but references should point to the first -// element in the slice or array, instead of the slice or array itself. This is -// crucial to prevent undefined behavior. -// -// For example: -// -// key := 1 -// value := []byte{'a', 'b', 'c'} -// keyPtr := unsafe.Pointer(&key) -// valuePtr := unsafe.Pointer(&value[0]) -// bpfmap.Update(keyPtr, valuePtr) -func (m *BPFMap) Update(key, value unsafe.Pointer) error { - return m.UpdateValueFlags(key, value, MapFlagUpdateAny) -} - -func (m *BPFMap) UpdateValueFlags(key, value unsafe.Pointer, flags MapFlag) error { - valueSize, err := calcMapValueSize(m.ValueSize(), m.Type()) - if err != nil { - return fmt.Errorf("map %s %w", m.Name(), err) - } - - retC := C.bpf_map__update_elem( - m.bpfMap, - key, - C.ulong(m.KeySize()), - value, - C.ulong(valueSize), - C.ulonglong(flags), - ) - if retC < 0 { - return fmt.Errorf("failed to update map %s: %w", m.Name(), syscall.Errno(-retC)) - } - - return nil -} - -// DeleteKey removes a specified key and its associated value from the BPFMap. -// -// This function accepts an unsafe.Pointer that references the key to be -// removed from the map. -// All basic types, and structs are supported as keys. -// -// NOTE: Slices and arrays are supported, but references should point to the first -// element in the slice or array, instead of the slice or array itself. This is -// crucial to prevent undefined behavior. -func (m *BPFMap) DeleteKey(key unsafe.Pointer) error { - retC := C.bpf_map__delete_elem(m.bpfMap, key, C.ulong(m.KeySize()), 0) - if retC < 0 { - return fmt.Errorf("failed to delete key %d in map %s: %w", key, m.Name(), syscall.Errno(-retC)) - } - - return nil -} - -// TODO: implement `bpf_map__get_next_key` wrapper -// func (m *BPFMap) GetNextKey(key unsafe.Pointer) (unsafe.Pointer, error) { -// } - -// -// BPFMap Batch Operations (low-level API) -// - -// GetValueBatch allows for batch lookups of multiple keys from the map. -// -// The first argument, keys, is a pointer to an array or slice of keys which will -// be populated with the keys returned from this operation. -// It returns the associated values as a slice of slices of bytes. -// -// This API allows for batch lookups of multiple keys, potentially in steps over -// multiple iterations. For example, you provide the last key seen (or nil) for -// the startKey, and the first key to start the next iteration with in nextKey. -// Once the first iteration is complete you can provide the last key seen in the -// previous iteration as the startKey for the next iteration and repeat until -// nextKey is nil. -// -// The last argument, count, is the number of keys to lookup. The kernel will -// update it with the count of the elements that were retrieved. -// -// The API can return partial results even though an -1 is returned. In this case, -// errno will be set to `ENOENT` and the values slice and count will be filled in -// with the elements that were read. See the comment in `BPFMapLow.GetValueBatch` -// for more context. -func (m *BPFMap) GetValueBatch(keys unsafe.Pointer, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { - return m.bpfMapLow.GetValueBatch(keys, startKey, nextKey, count) -} - -// GetValueAndDeleteBatch allows for batch lookup and deletion of elements where -// each element is deleted after being retrieved from the map. -// -// The first argument, keys, is a pointer to an array or slice of keys which will -// be populated with the keys returned from this operation. -// It returns the associated values as a slice of slices of bytes. -// -// This API allows for batch lookups and deletion of multiple keys, potentially -// in steps over multiple iterations. For example, you provide the last key seen -// (or nil) for the startKey, and the first key to start the next iteration -// with in nextKey. -// Once the first iteration is complete you can provide the last key seen in the -// previous iteration as the startKey for the next iteration and repeat until -// nextKey is nil. -// -// The last argument, count, is the number of keys to lookup and delete. The kernel -// will update it with the count of the elements that were retrieved and deleted. -// -// The API can return partial results even though an -1 is returned. In this case, -// errno will be set to `ENOENT` and the values slice and count will be filled in -// with the elements that were read. See the comment in `BPFMapLow.GetValueBatch` -// for more context. -func (m *BPFMap) GetValueAndDeleteBatch(keys, startKey, nextKey unsafe.Pointer, count uint32) ([][]byte, error) { - return m.bpfMapLow.GetValueAndDeleteBatch(keys, startKey, nextKey, count) -} - -// UpdateBatch updates multiple elements in the map by specified keys and their -// corresponding values. -// -// The first argument, keys, is a pointer to an array or slice of keys which will -// be updated using the second argument, values. -// It returns the associated error if any occurred. -// -// The last argument, count, is the number of keys to update. Passing an argument -// that greater than the number of keys in the map will cause the function to -// return a syscall.EPERM as an error. -func (m *BPFMap) UpdateBatch(keys, values unsafe.Pointer, count uint32) error { - return m.bpfMapLow.UpdateBatch(keys, values, count) -} - -// DeleteKeyBatch allows for batch deletion of multiple elements in the map. -// -// `count` number of keys will be deleted from the map. Passing an argument that -// greater than the number of keys in the map will cause the function to delete -// fewer keys than requested. See the comment in `BPFMapLow.GetValueBatch` -// for more context. -func (m *BPFMap) DeleteKeyBatch(keys unsafe.Pointer, count uint32) error { - return m.bpfMapLow.DeleteKeyBatch(keys, count) -} - -// -// BPFMap Iterator (low-level API) -// - -func (m *BPFMap) Iterator() *BPFMapIterator { - return &BPFMapIterator{ - mapFD: m.FileDescriptor(), - keySize: m.KeySize(), - prev: nil, - next: nil, - } -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/misc.go b/vendor/github.com/aquasecurity/libbpfgo/misc.go deleted file mode 100644 index 66fd0d7b60..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/misc.go +++ /dev/null @@ -1,16 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -// -// Misc generic helpers -// - -// roundUp rounds x up to the nearest multiple of y. -func roundUp(x, y uint64) uint64 { - return ((x + (y - 1)) / y) * y -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/module-iterator.go b/vendor/github.com/aquasecurity/libbpfgo/module-iterator.go deleted file mode 100644 index 3bdf0aec47..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/module-iterator.go +++ /dev/null @@ -1,81 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -// -// BPFObjectIterator (Module Iterator) -// - -// BPFObjectProgramIterator iterates over programs and maps in a BPF object -type BPFObjectIterator struct { - m *Module - prevProg *BPFProg - prevMap *BPFMap -} - -func (it *BPFObjectIterator) NextMap() *BPFMap { - var startMapC *C.struct_bpf_map - if it.prevMap != nil && it.prevMap.bpfMap != nil { - startMapC = it.prevMap.bpfMap - } - - bpfMapC, errno := C.bpf_object__next_map(it.m.obj, startMapC) - if bpfMapC == nil { - _ = errno // intentionally ignored - return nil - } - - bpfMap := &BPFMap{ - bpfMap: bpfMapC, - module: it.m, - } - it.prevMap = bpfMap - - if !bpfMap.module.loaded { - bpfMap.bpfMapLow = &BPFMapLow{ - fd: -1, - info: &BPFMapInfo{}, - } - - return bpfMap - } - - fd := bpfMap.FileDescriptor() - info, err := GetMapInfoByFD(fd) - if err != nil { - return nil - } - - bpfMap.bpfMapLow = &BPFMapLow{ - fd: fd, - info: info, - } - - return bpfMap -} - -func (it *BPFObjectIterator) NextProgram() *BPFProg { - var startProg *C.struct_bpf_program - if it.prevProg != nil && it.prevProg.prog != nil { - startProg = it.prevProg.prog - } - - progC, errno := C.bpf_object__next_program(it.m.obj, startProg) - if progC == nil { - _ = errno // intentionally ignored - return nil - } - - prog := &BPFProg{ - prog: progC, - module: it.m, - } - - it.prevProg = prog - - return prog -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/module.go b/vendor/github.com/aquasecurity/libbpfgo/module.go deleted file mode 100644 index 139503dc1a..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/module.go +++ /dev/null @@ -1,393 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "bytes" - "debug/elf" - "encoding/binary" - "errors" - "fmt" - "syscall" - "unsafe" -) - -// -// Module (BPF Object) -// - -type Module struct { - obj *C.struct_bpf_object - links []*BPFLink - perfBufs []*PerfBuffer - ringBufs []*RingBuffer - elf *elf.File - loaded bool -} - -// -// New Module Helpers -// - -type NewModuleArgs struct { - KConfigFilePath string - BTFObjPath string - BPFObjName string - BPFObjPath string - BPFObjBuff []byte - SkipMemlockBump bool -} - -func NewModuleFromFile(bpfObjPath string) (*Module, error) { - return NewModuleFromFileArgs(NewModuleArgs{ - BPFObjPath: bpfObjPath, - }) -} - -func NewModuleFromFileArgs(args NewModuleArgs) (*Module, error) { - f, err := elf.Open(args.BPFObjPath) - if err != nil { - return nil, err - } - C.cgo_libbpf_set_print_fn() - - // If skipped, we rely on libbpf to do the bumping if deemed necessary - if !args.SkipMemlockBump { - // TODO: remove this once libbpf memory limit bump issue is solved - if err := bumpMemlockRlimit(); err != nil { - return nil, err - } - } - - var btfFilePathC *C.char - var kconfigPathC *C.char - - // instruct libbpf to use user provided kernel BTF file - if args.BTFObjPath != "" { - btfFilePathC = C.CString(args.BTFObjPath) - defer C.free(unsafe.Pointer(btfFilePathC)) - } - // instruct libbpf to use user provided KConfigFile - if args.KConfigFilePath != "" { - kconfigPathC = C.CString(args.KConfigFilePath) - defer C.free(unsafe.Pointer(kconfigPathC)) - } - - optsC, errno := C.cgo_bpf_object_open_opts_new(btfFilePathC, kconfigPathC, nil) - if optsC == nil { - return nil, fmt.Errorf("failed to create bpf_object_open_opts: %w", errno) - } - defer C.cgo_bpf_object_open_opts_free(optsC) - - bpfFileC := C.CString(args.BPFObjPath) - defer C.free(unsafe.Pointer(bpfFileC)) - - objC, errno := C.bpf_object__open_file(bpfFileC, optsC) - if objC == nil { - return nil, fmt.Errorf("failed to open BPF object at path %s: %w", args.BPFObjPath, errno) - } - - return &Module{ - obj: objC, - elf: f, - }, nil -} - -func NewModuleFromBuffer(bpfObjBuff []byte, bpfObjName string) (*Module, error) { - return NewModuleFromBufferArgs(NewModuleArgs{ - BPFObjBuff: bpfObjBuff, - BPFObjName: bpfObjName, - }) -} - -func NewModuleFromBufferArgs(args NewModuleArgs) (*Module, error) { - f, err := elf.NewFile(bytes.NewReader(args.BPFObjBuff)) - if err != nil { - return nil, err - } - C.cgo_libbpf_set_print_fn() - - // TODO: remove this once libbpf memory limit bump issue is solved - if err := bumpMemlockRlimit(); err != nil { - return nil, err - } - - if args.BTFObjPath == "" { - args.BTFObjPath = "/sys/kernel/btf/vmlinux" - } - - btfFilePathC := C.CString(args.BTFObjPath) - defer C.free(unsafe.Pointer(btfFilePathC)) - kConfigPathC := C.CString(args.KConfigFilePath) - defer C.free(unsafe.Pointer(kConfigPathC)) - bpfObjNameC := C.CString(args.BPFObjName) - defer C.free(unsafe.Pointer(bpfObjNameC)) - bpfBuffC := unsafe.Pointer(C.CBytes(args.BPFObjBuff)) - defer C.free(bpfBuffC) - bpfBuffSizeC := C.size_t(len(args.BPFObjBuff)) - - if len(args.KConfigFilePath) <= 2 { - kConfigPathC = nil - } - - optsC, errno := C.cgo_bpf_object_open_opts_new(btfFilePathC, kConfigPathC, bpfObjNameC) - if optsC == nil { - return nil, fmt.Errorf("failed to create bpf_object_open_opts: %w", errno) - } - defer C.cgo_bpf_object_open_opts_free(optsC) - - objC, errno := C.bpf_object__open_mem(bpfBuffC, bpfBuffSizeC, optsC) - if objC == nil { - return nil, fmt.Errorf("failed to open BPF object %s: %w", args.BPFObjName, errno) - } - - return &Module{ - obj: objC, - elf: f, - }, nil -} - -// NOTE: libbpf has started raising limits by default but, unfortunately, that -// seems to be failing in current libbpf version. The memory limit bump might be -// removed once this is sorted out. -func bumpMemlockRlimit() error { - var rLimit syscall.Rlimit - rLimit.Max = 512 << 20 /* 512 MBs */ - rLimit.Cur = 512 << 20 /* 512 MBs */ - err := syscall.Setrlimit(C.RLIMIT_MEMLOCK, &rLimit) - if err != nil { - return fmt.Errorf("error setting rlimit: %v", err) - } - return nil -} - -// -// Module Methods -// - -func (m *Module) Close() { - for _, pb := range m.perfBufs { - pb.Close() - } - for _, rb := range m.ringBufs { - rb.Close() - } - for _, link := range m.links { - if link.link != nil { - link.Destroy() - } - } - C.bpf_object__close(m.obj) -} - -func (m *Module) BPFLoadObject() error { - retC := C.bpf_object__load(m.obj) - if retC < 0 { - return fmt.Errorf("failed to load BPF object: %w", syscall.Errno(-retC)) - } - m.loaded = true - m.elf.Close() - - return nil -} - -// InitGlobalVariable sets global variables (defined in .data or .rodata) -// in bpf code. It must be called before the BPF object is loaded. -func (m *Module) InitGlobalVariable(name string, value interface{}) error { - if m.loaded { - return errors.New("must be called before the BPF object is loaded") - } - s, err := getGlobalVariableSymbol(m.elf, name) - if err != nil { - return err - } - bpfMap, err := m.GetMap(s.sectionName) - if err != nil { - return err - } - - // get current value - currMapValue, err := bpfMap.InitialValue() - if err != nil { - return err - } - - // generate new value - newMapValue := make([]byte, bpfMap.ValueSize()) - copy(newMapValue, currMapValue) - data := bytes.NewBuffer(nil) - if err := binary.Write(data, s.byteOrder, value); err != nil { - return err - } - varValue := data.Bytes() - start := s.offset - end := s.offset + len(varValue) - if len(varValue) > s.size || end > bpfMap.ValueSize() { - return errors.New("invalid value") - } - copy(newMapValue[start:end], varValue) - - // save new value - err = bpfMap.SetInitialValue(unsafe.Pointer(&newMapValue[0])) - return err -} - -func (m *Module) GetMap(mapName string) (*BPFMap, error) { - mapNameC := C.CString(mapName) - defer C.free(unsafe.Pointer(mapNameC)) - - bpfMapC, errno := C.bpf_object__find_map_by_name(m.obj, mapNameC) - if bpfMapC == nil { - return nil, fmt.Errorf("failed to find BPF map %s: %w", mapName, errno) - } - - bpfMap := &BPFMap{ - bpfMap: bpfMapC, - module: m, - } - - if !m.loaded { - bpfMap.bpfMapLow = &BPFMapLow{ - fd: -1, - info: &BPFMapInfo{}, - } - - return bpfMap, nil - } - - fd := bpfMap.FileDescriptor() - info, err := GetMapInfoByFD(fd) - if err != nil { - // Compatibility Note: Some older kernels lack BTF (BPF Type Format) - // support for specific BPF map types. In such scenarios, libbpf may - // fail (EPERM) when attempting to retrieve information for these maps. - // Reference: https://elixir.bootlin.com/linux/v5.15.75/source/tools/lib/bpf/gen_loader.c#L401 - // - // However, we can still get some map info from the BPF map high level API. - bpfMap.bpfMapLow = &BPFMapLow{ - fd: fd, - info: &BPFMapInfo{ - Type: bpfMap.Type(), - ID: 0, - KeySize: uint32(bpfMap.KeySize()), - ValueSize: uint32(bpfMap.ValueSize()), - MaxEntries: bpfMap.MaxEntries(), - MapFlags: uint32(bpfMap.MapFlags()), - Name: bpfMap.Name(), - IfIndex: bpfMap.IfIndex(), - BTFVmlinuxValueTypeID: 0, - NetnsDev: 0, - NetnsIno: 0, - BTFID: 0, - BTFKeyTypeID: 0, - BTFValueTypeID: 0, - MapExtra: bpfMap.MapExtra(), - }, - } - - return bpfMap, nil - } - - bpfMap.bpfMapLow = &BPFMapLow{ - fd: fd, - info: info, - } - - return bpfMap, nil -} - -func (m *Module) GetProgram(progName string) (*BPFProg, error) { - progNameC := C.CString(progName) - defer C.free(unsafe.Pointer(progNameC)) - - progC, errno := C.bpf_object__find_program_by_name(m.obj, progNameC) - if progC == nil { - return nil, fmt.Errorf("failed to find BPF program %s: %w", progName, errno) - } - - return &BPFProg{ - prog: progC, - module: m, - }, nil -} - -func (m *Module) InitRingBuf(mapName string, eventsChan chan []byte) (*RingBuffer, error) { - bpfMap, err := m.GetMap(mapName) - if err != nil { - return nil, err - } - - if eventsChan == nil { - return nil, fmt.Errorf("events channel can not be nil") - } - - slot := eventChannels.put(eventsChan) - if slot == -1 { - return nil, fmt.Errorf("max ring buffers reached") - } - - rbC, errno := C.cgo_init_ring_buf(C.int(bpfMap.FileDescriptor()), C.uintptr_t(slot)) - if rbC == nil { - return nil, fmt.Errorf("failed to initialize ring buffer: %w", errno) - } - - ringBuf := &RingBuffer{ - rb: rbC, - bpfMap: bpfMap, - slot: uint(slot), - } - m.ringBufs = append(m.ringBufs, ringBuf) - return ringBuf, nil -} - -func (m *Module) InitPerfBuf(mapName string, eventsChan chan []byte, lostChan chan uint64, pageCnt int) (*PerfBuffer, error) { - bpfMap, err := m.GetMap(mapName) - if err != nil { - return nil, fmt.Errorf("failed to init perf buffer: %v", err) - } - - if eventsChan == nil { - return nil, fmt.Errorf("failed to init perf buffer: events channel can not be nil") - } - - perfBuf := &PerfBuffer{ - bpfMap: bpfMap, - eventsChan: eventsChan, - lostChan: lostChan, - } - - slot := eventChannels.put(perfBuf) - if slot == -1 { - return nil, fmt.Errorf("max number of ring/perf buffers reached") - } - - pbC, errno := C.cgo_init_perf_buf(C.int(bpfMap.FileDescriptor()), C.int(pageCnt), C.uintptr_t(slot)) - if pbC == nil { - eventChannels.remove(uint(slot)) - return nil, fmt.Errorf("failed to initialize perf buffer: %w", errno) - } - - perfBuf.pb = pbC - perfBuf.slot = uint(slot) - - m.perfBufs = append(m.perfBufs, perfBuf) - return perfBuf, nil -} - -func (m *Module) TcHookInit() *TcHook { - return &TcHook{ - hook: C.cgo_bpf_tc_hook_new(), - } -} - -func (m *Module) Iterator() *BPFObjectIterator { - return &BPFObjectIterator{ - m: m, - prevProg: nil, - prevMap: nil, - } -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/prog-common.go b/vendor/github.com/aquasecurity/libbpfgo/prog-common.go deleted file mode 100644 index a1f4d88aaf..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/prog-common.go +++ /dev/null @@ -1,239 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -// -// BPFProgType -// - -// BPFProgType is an enum as defined in https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/bpf.h -type BPFProgType uint32 - -const ( - BPFProgTypeUnspec BPFProgType = C.BPF_PROG_TYPE_UNSPEC - BPFProgTypeSocketFilter BPFProgType = C.BPF_PROG_TYPE_SOCKET_FILTER - BPFProgTypeKprobe BPFProgType = C.BPF_PROG_TYPE_KPROBE - BPFProgTypeSchedCls BPFProgType = C.BPF_PROG_TYPE_SCHED_CLS - BPFProgTypeSchedAct BPFProgType = C.BPF_PROG_TYPE_SCHED_ACT - BPFProgTypeTracepoint BPFProgType = C.BPF_PROG_TYPE_TRACEPOINT - BPFProgTypeXdp BPFProgType = C.BPF_PROG_TYPE_XDP - BPFProgTypePerfEvent BPFProgType = C.BPF_PROG_TYPE_PERF_EVENT - BPFProgTypeCgroupSkb BPFProgType = C.BPF_PROG_TYPE_CGROUP_SKB - BPFProgTypeCgroupSock BPFProgType = C.BPF_PROG_TYPE_CGROUP_SOCK - BPFProgTypeLwtIn BPFProgType = C.BPF_PROG_TYPE_LWT_IN - BPFProgTypeLwtOut BPFProgType = C.BPF_PROG_TYPE_LWT_OUT - BPFProgTypeLwtXmit BPFProgType = C.BPF_PROG_TYPE_LWT_XMIT - BPFProgTypeSockOps BPFProgType = C.BPF_PROG_TYPE_SOCK_OPS - BPFProgTypeSkSkb BPFProgType = C.BPF_PROG_TYPE_SK_SKB - BPFProgTypeCgroupDevice BPFProgType = C.BPF_PROG_TYPE_CGROUP_DEVICE - BPFProgTypeSkMsg BPFProgType = C.BPF_PROG_TYPE_SK_MSG - BPFProgTypeRawTracepoint BPFProgType = C.BPF_PROG_TYPE_RAW_TRACEPOINT - BPFProgTypeCgroupSockAddr BPFProgType = C.BPF_PROG_TYPE_CGROUP_SOCK_ADDR - BPFProgTypeLwtSeg6Local BPFProgType = C.BPF_PROG_TYPE_LWT_SEG6LOCAL - BPFProgTypeLircMode2 BPFProgType = C.BPF_PROG_TYPE_LIRC_MODE2 - BPFProgTypeSkReuseport BPFProgType = C.BPF_PROG_TYPE_SK_REUSEPORT - BPFProgTypeFlowDissector BPFProgType = C.BPF_PROG_TYPE_FLOW_DISSECTOR - BPFProgTypeCgroupSysctl BPFProgType = C.BPF_PROG_TYPE_CGROUP_SYSCTL - BPFProgTypeRawTracepointWritable BPFProgType = C.BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE - BPFProgTypeCgroupSockopt BPFProgType = C.BPF_PROG_TYPE_CGROUP_SOCKOPT - BPFProgTypeTracing BPFProgType = C.BPF_PROG_TYPE_TRACING - BPFProgTypeStructOps BPFProgType = C.BPF_PROG_TYPE_STRUCT_OPS - BPFProgTypeExt BPFProgType = C.BPF_PROG_TYPE_EXT - BPFProgTypeLsm BPFProgType = C.BPF_PROG_TYPE_LSM - BPFProgTypeSkLookup BPFProgType = C.BPF_PROG_TYPE_SK_LOOKUP - BPFProgTypeSyscall BPFProgType = C.BPF_PROG_TYPE_SYSCALL -) - -// Deprecated: Convert type directly instead. -func (t BPFProgType) Value() uint64 { return uint64(t) } - -var bpfProgTypeToString = map[BPFProgType]string{ - BPFProgTypeUnspec: "BPF_PROG_TYPE_UNSPEC", - BPFProgTypeSocketFilter: "BPF_PROG_TYPE_SOCKET_FILTER", - BPFProgTypeKprobe: "BPF_PROG_TYPE_KPROBE", - BPFProgTypeSchedCls: "BPF_PROG_TYPE_SCHED_CLS", - BPFProgTypeSchedAct: "BPF_PROG_TYPE_SCHED_ACT", - BPFProgTypeTracepoint: "BPF_PROG_TYPE_TRACEPOINT", - BPFProgTypeXdp: "BPF_PROG_TYPE_XDP", - BPFProgTypePerfEvent: "BPF_PROG_TYPE_PERF_EVENT", - BPFProgTypeCgroupSkb: "BPF_PROG_TYPE_CGROUP_SKB", - BPFProgTypeCgroupSock: "BPF_PROG_TYPE_CGROUP_SOCK", - BPFProgTypeLwtIn: "BPF_PROG_TYPE_LWT_IN", - BPFProgTypeLwtOut: "BPF_PROG_TYPE_LWT_OUT", - BPFProgTypeLwtXmit: "BPF_PROG_TYPE_LWT_XMIT", - BPFProgTypeSockOps: "BPF_PROG_TYPE_SOCK_OPS", - BPFProgTypeSkSkb: "BPF_PROG_TYPE_SK_SKB", - BPFProgTypeCgroupDevice: "BPF_PROG_TYPE_CGROUP_DEVICE", - BPFProgTypeSkMsg: "BPF_PROG_TYPE_SK_MSG", - BPFProgTypeRawTracepoint: "BPF_PROG_TYPE_RAW_TRACEPOINT", - BPFProgTypeCgroupSockAddr: "BPF_PROG_TYPE_CGROUP_SOCK_ADDR", - BPFProgTypeLwtSeg6Local: "BPF_PROG_TYPE_LWT_SEG6LOCAL", - BPFProgTypeLircMode2: "BPF_PROG_TYPE_LIRC_MODE2", - BPFProgTypeSkReuseport: "BPF_PROG_TYPE_SK_REUSEPORT", - BPFProgTypeFlowDissector: "BPF_PROG_TYPE_FLOW_DISSECTOR", - BPFProgTypeCgroupSysctl: "BPF_PROG_TYPE_CGROUP_SYSCTL", - BPFProgTypeRawTracepointWritable: "BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE", - BPFProgTypeCgroupSockopt: "BPF_PROG_TYPE_CGROUP_SOCKOPT", - BPFProgTypeTracing: "BPF_PROG_TYPE_TRACING", - BPFProgTypeStructOps: "BPF_PROG_TYPE_STRUCT_OPS", - BPFProgTypeExt: "BPF_PROG_TYPE_EXT", - BPFProgTypeLsm: "BPF_PROG_TYPE_LSM", - BPFProgTypeSkLookup: "BPF_PROG_TYPE_SK_LOOKUP", - BPFProgTypeSyscall: "BPF_PROG_TYPE_SYSCALL", -} - -func (t BPFProgType) String() string { - str, ok := bpfProgTypeToString[t] - if !ok { - // BPFProgTypeUnspec must exist in bpfProgTypeToString to avoid infinite recursion. - return BPFProgTypeUnspec.String() - } - - return str -} - -func (t BPFProgType) Name() string { - return C.GoString(C.libbpf_bpf_prog_type_str(C.enum_bpf_prog_type(t))) -} - -// -// BPFAttachType -// - -type BPFAttachType uint32 - -const ( - BPFAttachTypeCgroupInetIngress BPFAttachType = C.BPF_CGROUP_INET_INGRESS - BPFAttachTypeCgroupInetEgress BPFAttachType = C.BPF_CGROUP_INET_EGRESS - BPFAttachTypeCgroupInetSockCreate BPFAttachType = C.BPF_CGROUP_INET_SOCK_CREATE - BPFAttachTypeCgroupSockOps BPFAttachType = C.BPF_CGROUP_SOCK_OPS - BPFAttachTypeSKSKBStreamParser BPFAttachType = C.BPF_SK_SKB_STREAM_PARSER - BPFAttachTypeSKSKBStreamVerdict BPFAttachType = C.BPF_SK_SKB_STREAM_VERDICT - BPFAttachTypeCgroupDevice BPFAttachType = C.BPF_CGROUP_DEVICE - BPFAttachTypeSKMSGVerdict BPFAttachType = C.BPF_SK_MSG_VERDICT - BPFAttachTypeCgroupInet4Bind BPFAttachType = C.BPF_CGROUP_INET4_BIND - BPFAttachTypeCgroupInet6Bind BPFAttachType = C.BPF_CGROUP_INET6_BIND - BPFAttachTypeCgroupInet4Connect BPFAttachType = C.BPF_CGROUP_INET4_CONNECT - BPFAttachTypeCgroupInet6Connect BPFAttachType = C.BPF_CGROUP_INET6_CONNECT - BPFAttachTypeCgroupInet4PostBind BPFAttachType = C.BPF_CGROUP_INET4_POST_BIND - BPFAttachTypeCgroupInet6PostBind BPFAttachType = C.BPF_CGROUP_INET6_POST_BIND - BPFAttachTypeCgroupUDP4SendMsg BPFAttachType = C.BPF_CGROUP_UDP4_SENDMSG - BPFAttachTypeCgroupUDP6SendMsg BPFAttachType = C.BPF_CGROUP_UDP6_SENDMSG - BPFAttachTypeLircMode2 BPFAttachType = C.BPF_LIRC_MODE2 - BPFAttachTypeFlowDissector BPFAttachType = C.BPF_FLOW_DISSECTOR - BPFAttachTypeCgroupSysctl BPFAttachType = C.BPF_CGROUP_SYSCTL - BPFAttachTypeCgroupUDP4RecvMsg BPFAttachType = C.BPF_CGROUP_UDP4_RECVMSG - BPFAttachTypeCgroupUDP6RecvMsg BPFAttachType = C.BPF_CGROUP_UDP6_RECVMSG - BPFAttachTypeCgroupGetSockOpt BPFAttachType = C.BPF_CGROUP_GETSOCKOPT - BPFAttachTypeCgroupSetSockOpt BPFAttachType = C.BPF_CGROUP_SETSOCKOPT - BPFAttachTypeTraceRawTP BPFAttachType = C.BPF_TRACE_RAW_TP - BPFAttachTypeTraceFentry BPFAttachType = C.BPF_TRACE_FENTRY - BPFAttachTypeTraceFexit BPFAttachType = C.BPF_TRACE_FEXIT - BPFAttachTypeModifyReturn BPFAttachType = C.BPF_MODIFY_RETURN - BPFAttachTypeLSMMac BPFAttachType = C.BPF_LSM_MAC - BPFAttachTypeTraceIter BPFAttachType = C.BPF_TRACE_ITER - BPFAttachTypeCgroupInet4GetPeerName BPFAttachType = C.BPF_CGROUP_INET4_GETPEERNAME - BPFAttachTypeCgroupInet6GetPeerName BPFAttachType = C.BPF_CGROUP_INET6_GETPEERNAME - BPFAttachTypeCgroupInet4GetSockName BPFAttachType = C.BPF_CGROUP_INET4_GETSOCKNAME - BPFAttachTypeCgroupInet6GetSockName BPFAttachType = C.BPF_CGROUP_INET6_GETSOCKNAME - BPFAttachTypeXDPDevMap BPFAttachType = C.BPF_XDP_DEVMAP - BPFAttachTypeCgroupInetSockRelease BPFAttachType = C.BPF_CGROUP_INET_SOCK_RELEASE - BPFAttachTypeXDPCPUMap BPFAttachType = C.BPF_XDP_CPUMAP - BPFAttachTypeSKLookup BPFAttachType = C.BPF_SK_LOOKUP - BPFAttachTypeXDP BPFAttachType = C.BPF_XDP - BPFAttachTypeSKSKBVerdict BPFAttachType = C.BPF_SK_SKB_VERDICT - BPFAttachTypeSKReusePortSelect BPFAttachType = C.BPF_SK_REUSEPORT_SELECT - BPFAttachTypeSKReusePortSelectorMigrate BPFAttachType = C.BPF_SK_REUSEPORT_SELECT_OR_MIGRATE - BPFAttachTypePerfEvent BPFAttachType = C.BPF_PERF_EVENT - BPFAttachTypeTraceKprobeMulti BPFAttachType = C.BPF_TRACE_KPROBE_MULTI -) - -var bpfAttachTypeToString = map[BPFAttachType]string{ - BPFAttachTypeCgroupInetIngress: "BPF_CGROUP_INET_INGRESS", - BPFAttachTypeCgroupInetEgress: "BPF_CGROUP_INET_EGRESS", - BPFAttachTypeCgroupInetSockCreate: "BPF_CGROUP_INET_SOCK_CREATE", - BPFAttachTypeCgroupSockOps: "BPF_CGROUP_SOCK_OPS", - BPFAttachTypeSKSKBStreamParser: "BPF_SK_SKB_STREAM_PARSER", - BPFAttachTypeSKSKBStreamVerdict: "BPF_SK_SKB_STREAM_VERDICT", - BPFAttachTypeCgroupDevice: "BPF_CGROUP_DEVICE", - BPFAttachTypeSKMSGVerdict: "BPF_SK_MSG_VERDICT", - BPFAttachTypeCgroupInet4Bind: "BPF_CGROUP_INET4_BIND", - BPFAttachTypeCgroupInet6Bind: "BPF_CGROUP_INET6_BIND", - BPFAttachTypeCgroupInet4Connect: "BPF_CGROUP_INET4_CONNECT", - BPFAttachTypeCgroupInet6Connect: "BPF_CGROUP_INET6_CONNECT", - BPFAttachTypeCgroupInet4PostBind: "BPF_CGROUP_INET4_POST_BIND", - BPFAttachTypeCgroupInet6PostBind: "BPF_CGROUP_INET6_POST_BIND", - BPFAttachTypeCgroupUDP4SendMsg: "BPF_CGROUP_UDP4_SENDMSG", - BPFAttachTypeCgroupUDP6SendMsg: "BPF_CGROUP_UDP6_SENDMSG", - BPFAttachTypeLircMode2: "BPF_LIRC_MODE2", - BPFAttachTypeFlowDissector: "BPF_FLOW_DISSECTOR", - BPFAttachTypeCgroupSysctl: "BPF_CGROUP_SYSCTL", - BPFAttachTypeCgroupUDP4RecvMsg: "BPF_CGROUP_UDP4_RECVMSG", - BPFAttachTypeCgroupUDP6RecvMsg: "BPF_CGROUP_UDP6_RECVMSG", - BPFAttachTypeCgroupGetSockOpt: "BPF_CGROUP_GETSOCKOPT", - BPFAttachTypeCgroupSetSockOpt: "BPF_CGROUP_SETSOCKOPT", - BPFAttachTypeTraceRawTP: "BPF_TRACE_RAW_TP", - BPFAttachTypeTraceFentry: "BPF_TRACE_FENTRY", - BPFAttachTypeTraceFexit: "BPF_TRACE_FEXIT", - BPFAttachTypeModifyReturn: "BPF_MODIFY_RETURN", - BPFAttachTypeLSMMac: "BPF_LSM_MAC", - BPFAttachTypeTraceIter: "BPF_TRACE_ITER", - BPFAttachTypeCgroupInet4GetPeerName: "BPF_CGROUP_INET4_GETPEERNAME", - BPFAttachTypeCgroupInet6GetPeerName: "BPF_CGROUP_INET6_GETPEERNAME", - BPFAttachTypeCgroupInet4GetSockName: "BPF_CGROUP_INET4_GETSOCKNAME", - BPFAttachTypeCgroupInet6GetSockName: "BPF_CGROUP_INET6_GETSOCKNAME", - BPFAttachTypeXDPDevMap: "BPF_XDP_DEVMAP", - BPFAttachTypeCgroupInetSockRelease: "BPF_CGROUP_INET_SOCK_RELEASE", - BPFAttachTypeXDPCPUMap: "BPF_XDP_CPUMAP", - BPFAttachTypeSKLookup: "BPF_SK_LOOKUP", - BPFAttachTypeXDP: "BPF_XDP", - BPFAttachTypeSKSKBVerdict: "BPF_SK_SKB_VERDICT", - BPFAttachTypeSKReusePortSelect: "BPF_SK_REUSEPORT_SELECT", - BPFAttachTypeSKReusePortSelectorMigrate: "BPF_SK_REUSEPORT_SELECT_OR_MIGRATE", - BPFAttachTypePerfEvent: "BPF_PERF_EVENT", - BPFAttachTypeTraceKprobeMulti: "BPF_TRACE_KPROBE_MULTI", -} - -func (t BPFAttachType) String() string { - str, ok := bpfAttachTypeToString[t] - if !ok { - return "BPFAttachType unspecified" - } - - return str -} - -func (t BPFAttachType) Name() string { - return C.GoString(C.libbpf_bpf_attach_type_str(C.enum_bpf_attach_type(t))) -} - -// -// BPFCgroupIterOrder -// - -type BPFCgroupIterOrder uint32 - -const ( - BPFIterOrderUnspec BPFCgroupIterOrder = iota - BPFIterSelfOnly - BPFIterDescendantsPre - BPFIterDescendantsPost - BPFIterAncestorsUp -) - -// -// AttachFlag -// - -type AttachFlag uint32 - -const ( - BPFFNone AttachFlag = 0 - BPFFAllowOverride AttachFlag = C.BPF_F_ALLOW_OVERRIDE - BPFFAllowMulti AttachFlag = C.BPF_F_ALLOW_MULTI - BPFFReplace AttachFlag = C.BPF_F_REPLACE -) diff --git a/vendor/github.com/aquasecurity/libbpfgo/prog.go b/vendor/github.com/aquasecurity/libbpfgo/prog.go deleted file mode 100644 index 9d78eb06aa..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/prog.go +++ /dev/null @@ -1,559 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "net" - "path/filepath" - "strings" - "syscall" - "unsafe" -) - -// -// BPFProg -// - -type BPFProg struct { - prog *C.struct_bpf_program - module *Module - pinnedPath string -} - -func (p *BPFProg) FileDescriptor() int { - return int(C.bpf_program__fd(p.prog)) -} - -// Deprecated: use BPFProg.FileDescriptor() instead. -func (p *BPFProg) GetFd() int { - return p.FileDescriptor() -} - -func (p *BPFProg) Pin(path string) error { - absPath, err := filepath.Abs(path) - if err != nil { - return fmt.Errorf("invalid path: %s: %v", path, err) - } - - absPathC := C.CString(absPath) - defer C.free(unsafe.Pointer(absPathC)) - - retC := C.bpf_program__pin(p.prog, absPathC) - if retC < 0 { - return fmt.Errorf("failed to pin program %s to %s: %w", p.Name(), path, syscall.Errno(-retC)) - } - - p.pinnedPath = absPath - - return nil -} - -func (p *BPFProg) Unpin(path string) error { - pathC := C.CString(path) - defer C.free(unsafe.Pointer(pathC)) - - retC := C.bpf_program__unpin(p.prog, pathC) - if retC < 0 { - return fmt.Errorf("failed to unpin program %s to %s: %w", p.Name(), path, syscall.Errno(-retC)) - } - - p.pinnedPath = "" - - return nil -} - -func (p *BPFProg) GetModule() *Module { - return p.module -} - -func (p *BPFProg) Name() string { - return C.GoString(C.bpf_program__name(p.prog)) -} - -// Deprecated: use BPFProg.Name() instead. -func (p *BPFProg) GetName() string { - return p.Name() -} - -func (p *BPFProg) SectionName() string { - return C.GoString(C.bpf_program__section_name(p.prog)) -} - -// Deprecated: use BPFProg.SectionName() instead. -func (p *BPFProg) GetSectionName() string { - return p.SectionName() -} - -func (p *BPFProg) PinPath() string { - return p.pinnedPath // There's no LIBBPF_API for bpf program -} - -// Deprecated: use BPFProg.PinPath() instead. -func (p *BPFProg) GetPinPath() string { - return p.PinPath() -} - -func (p *BPFProg) GetType() BPFProgType { - return BPFProgType(C.bpf_program__type(p.prog)) -} - -func (p *BPFProg) SetAutoload(autoload bool) error { - retC := C.bpf_program__set_autoload(p.prog, C.bool(autoload)) - if retC < 0 { - return fmt.Errorf("failed to set bpf program autoload: %w", syscall.Errno(-retC)) - } - - return nil -} - -// AttachGeneric is used to attach the BPF program using autodetection -// for the attach target. You can specify the destination in BPF code -// via the SEC() such as `SEC("fentry/some_kernel_func")` -func (p *BPFProg) AttachGeneric() (*BPFLink, error) { - linkC, errno := C.bpf_program__attach(p.prog) - if linkC == nil { - return nil, fmt.Errorf("failed to attach program: %w", errno) - } - - return &BPFLink{ - link: linkC, - prog: p, - linkType: Tracing, - eventName: fmt.Sprintf("tracing-%s", p.Name()), - }, nil -} - -// SetAttachTarget can be used to specify the program and/or function to attach -// the BPF program to. To attach to a kernel function specify attachProgFD as 0 -func (p *BPFProg) SetAttachTarget(attachProgFD int, attachFuncName string) error { - attachFuncNameC := C.CString(attachFuncName) - defer C.free(unsafe.Pointer(attachFuncNameC)) - - retC := C.bpf_program__set_attach_target(p.prog, C.int(attachProgFD), attachFuncNameC) - if retC < 0 { - return fmt.Errorf("failed to set attach target for program %s %s %w", p.Name(), attachFuncName, syscall.Errno(-retC)) - } - - return nil -} - -// TODO: fix API to return error -func (p *BPFProg) SetProgramType(progType BPFProgType) { - C.bpf_program__set_type(p.prog, C.enum_bpf_prog_type(int(progType))) -} - -// TODO: fix API to return error -func (p *BPFProg) SetAttachType(attachType BPFAttachType) { - C.bpf_program__set_expected_attach_type(p.prog, C.enum_bpf_attach_type(int(attachType))) -} - -// getCgroupDirFD returns a file descriptor for a given cgroup2 directory path -func getCgroupDirFD(cgroupV2DirPath string) (int, error) { - // revive:disable - const ( - O_DIRECTORY int = syscall.O_DIRECTORY - O_RDONLY int = syscall.O_RDONLY - ) - // revive:enable - - fd, err := syscall.Open(cgroupV2DirPath, O_DIRECTORY|O_RDONLY, 0) - if fd < 0 { - return 0, fmt.Errorf("failed to open cgroupv2 directory path %s: %w", cgroupV2DirPath, err) - } - - return fd, nil -} - -// AttachCgroup attaches the BPFProg to a cgroup described by given fd. -func (p *BPFProg) AttachCgroup(cgroupV2DirPath string) (*BPFLink, error) { - cgroupDirFD, err := getCgroupDirFD(cgroupV2DirPath) - if err != nil { - return nil, err - } - defer syscall.Close(cgroupDirFD) - - linkC, errno := C.bpf_program__attach_cgroup(p.prog, C.int(cgroupDirFD)) - if linkC == nil { - return nil, fmt.Errorf("failed to attach cgroup on cgroupv2 %s to program %s: %w", cgroupV2DirPath, p.Name(), errno) - } - - // dirName will be used in bpfLink.eventName. eventName follows a format - // convention and is used to better identify link types and what they are - // linked with in case of errors or similar needs. Having eventName as: - // cgroup-progName-/sys/fs/cgroup/unified/ would look weird so replace it - // to be cgroup-progName-sys-fs-cgroup-unified instead. - dirName := strings.ReplaceAll(cgroupV2DirPath[1:], "/", "-") - - bpfLink := &BPFLink{ - link: linkC, - prog: p, - linkType: Cgroup, - eventName: fmt.Sprintf("cgroup-%s-%s", p.Name(), dirName), - } - p.module.links = append(p.module.links, bpfLink) - - return bpfLink, nil -} - -// AttachCgroupLegacy attaches the BPFProg to a cgroup described by the given -// fd. It first tries to use the most recent attachment method and, if that does -// not work, instead of failing, it tries the legacy way: to attach the cgroup -// eBPF program without previously creating a link. This allows attaching cgroup -// eBPF ingress/egress in older kernels. Note: the first attempt error message -// is filtered out inside libbpf_print_fn() as it is actually a feature probe -// attempt as well. -// -// Related kernel commit: https://github.com/torvalds/linux/commit/af6eea57437a -func (p *BPFProg) AttachCgroupLegacy(cgroupV2DirPath string, attachType BPFAttachType) (*BPFLink, error) { - bpfLink, err := p.AttachCgroup(cgroupV2DirPath) - if err == nil { - return bpfLink, nil - } - - // Try the legacy attachment method before fully failing - cgroupDirFD, err := getCgroupDirFD(cgroupV2DirPath) - if err != nil { - return nil, err - } - defer syscall.Close(cgroupDirFD) - - retC, errno := C.cgo_bpf_prog_attach_cgroup_legacy( - C.int(p.FileDescriptor()), - C.int(cgroupDirFD), - C.int(attachType), - ) - if retC < 0 { - return nil, fmt.Errorf("failed to attach (legacy) program %s to cgroupv2 %s: %w", p.Name(), cgroupV2DirPath, errno) - } - - dirName := strings.ReplaceAll(cgroupV2DirPath[1:], "/", "-") - - bpfLinkLegacy := &bpfLinkLegacy{ - attachType: attachType, - cgroupDir: cgroupV2DirPath, - } - fakeBpfLink := &BPFLink{ - link: nil, // detach/destroy made with progfd - prog: p, - eventName: fmt.Sprintf("cgroup-%s-%s", p.Name(), dirName), - // info bellow needed for detach (there isn't a real ebpf link) - linkType: CgroupLegacy, - legacy: bpfLinkLegacy, - } - - return fakeBpfLink, nil -} - -// DetachCgroupLegacy detaches the BPFProg from a cgroup described by the given -// fd. This is needed because in legacy attachment there is no BPFLink, just a -// fake one (kernel did not support it, nor libbpf). This function should be -// called by the (*BPFLink)->Destroy() function, since BPFLink is emulated (so -// users don´t need to distinguish between regular and legacy cgroup -// detachments). -func (p *BPFProg) DetachCgroupLegacy(cgroupV2DirPath string, attachType BPFAttachType) error { - cgroupDirFD, err := getCgroupDirFD(cgroupV2DirPath) - if err != nil { - return err - } - defer syscall.Close(cgroupDirFD) - - retC, errno := C.cgo_bpf_prog_detach_cgroup_legacy( - C.int(p.FileDescriptor()), - C.int(cgroupDirFD), - C.int(attachType), - ) - if retC < 0 { - return fmt.Errorf("failed to detach (legacy) program %s from cgroupv2 %s: %w", p.Name(), cgroupV2DirPath, errno) - } - - return nil -} - -func (p *BPFProg) AttachXDP(deviceName string) (*BPFLink, error) { - iface, err := net.InterfaceByName(deviceName) - if err != nil { - return nil, fmt.Errorf("failed to find device by name %s: %w", deviceName, err) - } - - linkC, errno := C.bpf_program__attach_xdp(p.prog, C.int(iface.Index)) - if linkC == nil { - return nil, fmt.Errorf("failed to attach xdp on device %s to program %s: %w", deviceName, p.Name(), errno) - } - - bpfLink := &BPFLink{ - link: linkC, - prog: p, - linkType: XDP, - eventName: fmt.Sprintf("xdp-%s-%s", p.Name(), deviceName), - } - p.module.links = append(p.module.links, bpfLink) - - return bpfLink, nil -} - -func (p *BPFProg) AttachTracepoint(category, name string) (*BPFLink, error) { - tpCategoryC := C.CString(category) - defer C.free(unsafe.Pointer(tpCategoryC)) - tpNameC := C.CString(name) - defer C.free(unsafe.Pointer(tpNameC)) - - linkC, errno := C.bpf_program__attach_tracepoint(p.prog, tpCategoryC, tpNameC) - if linkC == nil { - return nil, fmt.Errorf("failed to attach tracepoint %s to program %s: %w", name, p.Name(), errno) - } - - bpfLink := &BPFLink{ - link: linkC, - prog: p, - linkType: Tracepoint, - eventName: name, - } - p.module.links = append(p.module.links, bpfLink) - - return bpfLink, nil -} - -func (p *BPFProg) AttachRawTracepoint(tpEvent string) (*BPFLink, error) { - tpEventC := C.CString(tpEvent) - defer C.free(unsafe.Pointer(tpEventC)) - - linkC, errno := C.bpf_program__attach_raw_tracepoint(p.prog, tpEventC) - if linkC == nil { - return nil, fmt.Errorf("failed to attach raw tracepoint %s to program %s: %w", tpEvent, p.Name(), errno) - } - - bpfLink := &BPFLink{ - link: linkC, - prog: p, - linkType: RawTracepoint, - eventName: tpEvent, - } - p.module.links = append(p.module.links, bpfLink) - - return bpfLink, nil -} - -func (p *BPFProg) AttachLSM() (*BPFLink, error) { - linkC, errno := C.bpf_program__attach_lsm(p.prog) - if linkC == nil { - return nil, fmt.Errorf("failed to attach lsm to program %s: %w", p.Name(), errno) - } - - bpfLink := &BPFLink{ - link: linkC, - prog: p, - linkType: LSM, - } - p.module.links = append(p.module.links, bpfLink) - - return bpfLink, nil -} - -func (p *BPFProg) AttachPerfEvent(fd int) (*BPFLink, error) { - linkC, errno := C.bpf_program__attach_perf_event(p.prog, C.int(fd)) - if linkC == nil { - return nil, fmt.Errorf("failed to attach perf event to program %s: %w", p.Name(), errno) - } - - bpfLink := &BPFLink{ - link: linkC, - prog: p, - linkType: PerfEvent, - } - p.module.links = append(p.module.links, bpfLink) - - return bpfLink, nil -} - -// this API should be used for kernels > 4.17 -func (p *BPFProg) AttachKprobe(kp string) (*BPFLink, error) { - return doAttachKprobe(p, kp, false) -} - -// this API should be used for kernels > 4.17 -func (p *BPFProg) AttachKretprobe(kp string) (*BPFLink, error) { - return doAttachKprobe(p, kp, true) -} - -func doAttachKprobe(prog *BPFProg, kp string, isKretprobe bool) (*BPFLink, error) { - kpC := C.CString(kp) - defer C.free(unsafe.Pointer(kpC)) - - linkC, errno := C.bpf_program__attach_kprobe(prog.prog, C.bool(isKretprobe), kpC) - if linkC == nil { - return nil, fmt.Errorf("failed to attach %s k(ret)probe to program %s: %w", kp, prog.Name(), errno) - } - - kpType := Kprobe - if isKretprobe { - kpType = Kretprobe - } - - bpfLink := &BPFLink{ - link: linkC, - prog: prog, - linkType: kpType, - eventName: kp, - } - prog.module.links = append(prog.module.links, bpfLink) - - return bpfLink, nil -} - -func (p *BPFProg) AttachNetns(networkNamespacePath string) (*BPFLink, error) { - fd, err := syscall.Open(networkNamespacePath, syscall.O_RDONLY, 0) - if fd < 0 { - return nil, fmt.Errorf("failed to open network namespace path %s: %w", networkNamespacePath, err) - } - - linkC, errno := C.bpf_program__attach_netns(p.prog, C.int(fd)) - if linkC == nil { - return nil, fmt.Errorf("failed to attach network namespace on %s to program %s: %w", networkNamespacePath, p.Name(), errno) - } - - // fileName will be used in bpfLink.eventName. eventName follows a format - // convention and is used to better identify link types and what they are - // linked with in case of errors or similar needs. Having eventName as: - // netns-progName-/proc/self/ns/net would look weird so replace it - // to be netns-progName-proc-self-ns-net instead. - fileName := strings.ReplaceAll(networkNamespacePath[1:], "/", "-") - - bpfLink := &BPFLink{ - link: linkC, - prog: p, - linkType: Netns, - eventName: fmt.Sprintf("netns-%s-%s", p.Name(), fileName), - } - p.module.links = append(p.module.links, bpfLink) - - return bpfLink, nil -} - -type IterOpts struct { - MapFd int - CgroupIterOrder BPFCgroupIterOrder - CgroupFd int - CgroupId uint64 - Tid int - Pid int - PidFd int -} - -func (p *BPFProg) AttachIter(opts IterOpts) (*BPFLink, error) { - optsC, errno := C.cgo_bpf_iter_attach_opts_new( - C.uint(opts.MapFd), - uint32(opts.CgroupIterOrder), - C.uint(opts.CgroupFd), - C.ulonglong(opts.CgroupId), - C.uint(opts.Tid), - C.uint(opts.Pid), - C.uint(opts.PidFd), - ) - if optsC == nil { - return nil, fmt.Errorf("failed to create iter_attach_opts to program %s: %w", p.Name(), errno) - } - defer C.cgo_bpf_iter_attach_opts_free(optsC) - - linkC, errno := C.bpf_program__attach_iter(p.prog, optsC) - if linkC == nil { - return nil, fmt.Errorf("failed to attach iter to program %s: %w", p.Name(), errno) - } - - bpfLink := &BPFLink{ - link: linkC, - prog: p, - linkType: Iter, - eventName: fmt.Sprintf("iter-%s-%d", p.Name(), opts.MapFd), - } - p.module.links = append(p.module.links, bpfLink) - - return bpfLink, nil -} - -// AttachUprobe attaches the BPFProgram to entry of the symbol in the library or binary at 'path' -// which can be relative or absolute. A pid can be provided to attach to, or -1 can be specified -// to attach to all processes -func (p *BPFProg) AttachUprobe(pid int, path string, offset uint32) (*BPFLink, error) { - absPath, err := filepath.Abs(path) - if err != nil { - return nil, err - } - - return doAttachUprobe(p, false, pid, absPath, offset) -} - -// AttachURetprobe attaches the BPFProgram to exit of the symbol in the library or binary at 'path' -// which can be relative or absolute. A pid can be provided to attach to, or -1 can be specified -// to attach to all processes -func (p *BPFProg) AttachURetprobe(pid int, path string, offset uint32) (*BPFLink, error) { - absPath, err := filepath.Abs(path) - if err != nil { - return nil, err - } - - return doAttachUprobe(p, true, pid, absPath, offset) -} - -func doAttachUprobe(prog *BPFProg, isUretprobe bool, pid int, path string, offset uint32) (*BPFLink, error) { - pathC := C.CString(path) - defer C.free(unsafe.Pointer(pathC)) - - linkC, errno := C.bpf_program__attach_uprobe( - prog.prog, - C.bool(isUretprobe), - C.int(pid), - pathC, - C.size_t(offset), - ) - if linkC == nil { - return nil, fmt.Errorf("failed to attach u(ret)probe to program %s:%d with pid %d: %w ", path, offset, pid, errno) - } - - upType := Uprobe - if isUretprobe { - upType = Uretprobe - } - - bpfLink := &BPFLink{ - link: linkC, - prog: prog, - linkType: upType, - eventName: fmt.Sprintf("%s:%d:%d", path, pid, offset), - } - - return bpfLink, nil -} - -// AttachGenericFD attaches the BPFProgram to a targetFd at the specified attachType hook. -func (p *BPFProg) AttachGenericFD(targetFd int, attachType BPFAttachType, flags AttachFlag) error { - retC := C.bpf_prog_attach( - C.int(p.FileDescriptor()), - C.int(targetFd), - C.enum_bpf_attach_type(int(attachType)), - C.uint(uint(flags)), - ) - if retC < 0 { - return fmt.Errorf("failed to attach: %w", syscall.Errno(-retC)) - } - - return nil -} - -// DetachGenericFD detaches the BPFProgram associated with the targetFd at the hook specified by attachType. -func (p *BPFProg) DetachGenericFD(targetFd int, attachType BPFAttachType) error { - retC := C.bpf_prog_detach2( - C.int(p.FileDescriptor()), - C.int(targetFd), - C.enum_bpf_attach_type(int(attachType)), - ) - if retC < 0 { - return fmt.Errorf("failed to detach: %w", syscall.Errno(-retC)) - } - - return nil -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/rwArray.go b/vendor/github.com/aquasecurity/libbpfgo/rwArray.go deleted file mode 100644 index 211a8ca22e..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/rwArray.go +++ /dev/null @@ -1,75 +0,0 @@ -package libbpfgo - -import ( - "sync" -) - -type slot struct { - value interface{} - used bool -} - -// rwArray allows for multiple concurrent readers but -// only a single writer. The writers lock a mutex while the readers -// are lock free. -// It is implemented as an array of slots where each slot holds a -// value (of type interface{}) and a boolean marker to indicate if it's -// in use or not. The insertion (Put) performs a linear probe -// looking for an available slot as indicated by the in-use marker. -// While probing, it is not touching the value itself, as it's -// being read without a lock by the readers. -type rwArray struct { - slots []slot - mux sync.Mutex -} - -func newRWArray(capacity uint) rwArray { - return rwArray{ - slots: make([]slot, capacity), - } -} - -func (a *rwArray) put(v interface{}) int { - a.mux.Lock() - defer a.mux.Unlock() - - limit := len(a.slots) - - for i := 0; i < limit; i++ { - if !a.slots[i].used { - a.slots[i].value = v - a.slots[i].used = true - - return i - } - } - - return -1 -} - -func (a *rwArray) remove(index uint) { - a.mux.Lock() - defer a.mux.Unlock() - - if int(index) >= len(a.slots) { - return - } - - a.slots[index].value = nil - a.slots[index].used = false -} - -func (a *rwArray) get(index uint) interface{} { - if int(index) >= len(a.slots) { - return nil - } - - // N.B. If slot[index].used == false, this is technically - // a race since put() might be putting the value in there - // at the same time. - return a.slots[index].value -} - -func (a *rwArray) capacity() uint { - return uint(len(a.slots)) -} diff --git a/vendor/github.com/aquasecurity/libbpfgo/tchook-common.go b/vendor/github.com/aquasecurity/libbpfgo/tchook-common.go deleted file mode 100644 index af06c90c2d..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/tchook-common.go +++ /dev/null @@ -1,30 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -// -// TcAttachPoint -// - -type TcAttachPoint uint32 - -const ( - BPFTcIngress TcAttachPoint = C.BPF_TC_INGRESS - BPFTcEgress TcAttachPoint = C.BPF_TC_EGRESS - BPFTcIngressEgress TcAttachPoint = C.BPF_TC_INGRESS | C.BPF_TC_EGRESS - BPFTcCustom TcAttachPoint = C.BPF_TC_CUSTOM -) - -// -// TcFlags -// - -type TcFlags uint32 - -const ( - BpfTcFReplace TcFlags = C.BPF_TC_F_REPLACE -) diff --git a/vendor/github.com/aquasecurity/libbpfgo/tchook.go b/vendor/github.com/aquasecurity/libbpfgo/tchook.go deleted file mode 100644 index a30366b836..0000000000 --- a/vendor/github.com/aquasecurity/libbpfgo/tchook.go +++ /dev/null @@ -1,161 +0,0 @@ -package libbpfgo - -/* -#cgo LDFLAGS: -lelf -lz -#include "libbpfgo.h" -*/ -import "C" - -import ( - "fmt" - "net" - "syscall" -) - -// -// TcHook -// - -type TcHook struct { - hook *C.struct_bpf_tc_hook -} - -func (hook *TcHook) SetInterfaceByIndex(ifaceIdx int) { - hook.hook.ifindex = C.int(ifaceIdx) -} - -func (hook *TcHook) SetInterfaceByName(ifaceName string) error { - iface, err := net.InterfaceByName(ifaceName) - if err != nil { - return err - } - hook.hook.ifindex = C.int(iface.Index) - - return nil -} - -func (hook *TcHook) GetInterfaceIndex() int { - return int(hook.hook.ifindex) -} - -func (hook *TcHook) SetAttachPoint(attachPoint TcAttachPoint) { - hook.hook.attach_point = uint32(attachPoint) -} - -func (hook *TcHook) SetParent(a int, b int) { - parent := (((a) << 16) & 0xFFFF0000) | ((b) & 0x0000FFFF) - hook.hook.parent = C.uint(parent) -} - -func (hook *TcHook) Create() error { - retC := C.bpf_tc_hook_create(hook.hook) - if retC < 0 { - return fmt.Errorf("failed to create tc hook: %w", syscall.Errno(-retC)) - } - - return nil -} - -func (hook *TcHook) Destroy() error { - retC := C.bpf_tc_hook_destroy(hook.hook) - if retC < 0 { - return fmt.Errorf("failed to destroy tc hook: %w", syscall.Errno(-retC)) - } - - C.cgo_bpf_tc_hook_free(hook.hook) - - return nil -} - -type TcOpts struct { - ProgFd int - Flags TcFlags - ProgId uint - Handle uint - Priority uint -} - -func tcOptsToC(tcOpts *TcOpts) (*C.struct_bpf_tc_opts, error) { - if tcOpts == nil { - return nil, nil - } - - optsC, errno := C.cgo_bpf_tc_opts_new( - C.int(tcOpts.ProgFd), - C.uint(tcOpts.Flags), - C.uint(tcOpts.ProgId), - C.uint(tcOpts.Handle), - C.uint(tcOpts.Priority), - ) - if optsC == nil { - return nil, fmt.Errorf("failed to create bpf_tc_opts: %w", errno) - } - - return optsC, nil -} - -func tcOptsFromC(tcOpts *TcOpts, optsC *C.struct_bpf_tc_opts) { - if optsC == nil { - return - } - - tcOpts.ProgFd = int(C.cgo_bpf_tc_opts_prog_fd(optsC)) - tcOpts.Flags = TcFlags(C.cgo_bpf_tc_opts_flags(optsC)) - tcOpts.ProgId = uint(C.cgo_bpf_tc_opts_prog_id(optsC)) - tcOpts.Handle = uint(C.cgo_bpf_tc_opts_handle(optsC)) - tcOpts.Priority = uint(C.cgo_bpf_tc_opts_priority(optsC)) -} - -func (hook *TcHook) Attach(tcOpts *TcOpts) error { - optsC, err := tcOptsToC(tcOpts) - if err != nil { - return err - } - defer C.cgo_bpf_tc_opts_free(optsC) - - retC := C.bpf_tc_attach(hook.hook, optsC) - if retC < 0 { - return fmt.Errorf("failed to attach tc hook: %w", syscall.Errno(-retC)) - } - - // update tcOpts with the values from the libbpf - tcOptsFromC(tcOpts, optsC) - - return nil -} - -func (hook *TcHook) Detach(tcOpts *TcOpts) error { - optsC, err := tcOptsToC(tcOpts) - if err != nil { - return err - } - defer C.cgo_bpf_tc_opts_free(optsC) - - retC := C.bpf_tc_detach(hook.hook, optsC) - if retC < 0 { - return fmt.Errorf("failed to detach tc hook: %w", syscall.Errno(-retC)) - } - - // update tcOpts with the values from the libbpf - tcOptsFromC(tcOpts, optsC) - - return nil -} - -func (hook *TcHook) Query(tcOpts *TcOpts) error { - optsC, err := tcOptsToC(tcOpts) - if err != nil { - return err - } - defer C.cgo_bpf_tc_opts_free(optsC) - - retC := C.bpf_tc_query(hook.hook, optsC) - if retC < 0 { - return fmt.Errorf("failed to query tc hook: %w", syscall.Errno(-retC)) - } - - // update tcOpts with the values from the libbpf - tcOptsFromC(tcOpts, optsC) - - return nil -} diff --git a/vendor/github.com/cilium/ebpf/.clang-format b/vendor/github.com/cilium/ebpf/.clang-format index 4eb94b1baa..0ff4257606 100644 --- a/vendor/github.com/cilium/ebpf/.clang-format +++ b/vendor/github.com/cilium/ebpf/.clang-format @@ -4,6 +4,9 @@ BasedOnStyle: LLVM AlignAfterOpenBracket: DontAlign AlignConsecutiveAssignments: true AlignEscapedNewlines: DontAlign +# mkdocs annotations in source code are written as trailing comments +# and alignment pushes these really far away from the content. +AlignTrailingComments: false AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: false AllowAllParametersOfDeclarationOnNextLine: false @@ -14,4 +17,9 @@ KeepEmptyLinesAtTheStartOfBlocks: false TabWidth: 4 UseTab: ForContinuationAndIndentation ColumnLimit: 1000 +# Go compiler comments need to stay unindented. +CommentPragmas: '^go:.*' +# linux/bpf.h needs to be included before bpf/bpf_helpers.h for types like __u64 +# and sorting makes this impossible. +SortIncludes: false ... diff --git a/vendor/github.com/cilium/ebpf/.gitattributes b/vendor/github.com/cilium/ebpf/.gitattributes new file mode 100644 index 0000000000..113f97b980 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/.gitattributes @@ -0,0 +1 @@ +internal/sys/types.go linguist-generated=false diff --git a/vendor/github.com/cilium/ebpf/.golangci.yaml b/vendor/github.com/cilium/ebpf/.golangci.yaml index dc62dd6d0f..65f91b910b 100644 --- a/vendor/github.com/cilium/ebpf/.golangci.yaml +++ b/vendor/github.com/cilium/ebpf/.golangci.yaml @@ -1,28 +1,13 @@ --- -issues: - exclude-rules: - # syscall param structs will have unused fields in Go code. - - path: syscall.*.go - linters: - - structcheck - linters: disable-all: true enable: - - deadcode - - errcheck - goimports - gosimple - govet - ineffassign - misspell - staticcheck - - structcheck - typecheck - unused - - varcheck - - # Could be enabled later: - # - gocyclo - # - maligned - # - gosec + - gofmt diff --git a/vendor/github.com/cilium/ebpf/ARCHITECTURE.md b/vendor/github.com/cilium/ebpf/ARCHITECTURE.md deleted file mode 100644 index 8cd7e2486e..0000000000 --- a/vendor/github.com/cilium/ebpf/ARCHITECTURE.md +++ /dev/null @@ -1,86 +0,0 @@ -Architecture of the library -=== - - ELF -> Specifications -> Objects -> Links - -ELF ---- - -BPF is usually produced by using Clang to compile a subset of C. Clang outputs -an ELF file which contains program byte code (aka BPF), but also metadata for -maps used by the program. The metadata follows the conventions set by libbpf -shipped with the kernel. Certain ELF sections have special meaning -and contain structures defined by libbpf. Newer versions of clang emit -additional metadata in BPF Type Format (aka BTF). - -The library aims to be compatible with libbpf so that moving from a C toolchain -to a Go one creates little friction. To that end, the [ELF reader](elf_reader.go) -is tested against the Linux selftests and avoids introducing custom behaviour -if possible. - -The output of the ELF reader is a `CollectionSpec` which encodes -all of the information contained in the ELF in a form that is easy to work with -in Go. - -### BTF - -The BPF Type Format describes more than just the types used by a BPF program. It -includes debug aids like which source line corresponds to which instructions and -what global variables are used. - -[BTF parsing](internal/btf/) lives in a separate internal package since exposing -it would mean an additional maintenance burden, and because the API still -has sharp corners. The most important concept is the `btf.Type` interface, which -also describes things that aren't really types like `.rodata` or `.bss` sections. -`btf.Type`s can form cyclical graphs, which can easily lead to infinite loops if -one is not careful. Hopefully a safe pattern to work with `btf.Type` emerges as -we write more code that deals with it. - -Specifications ---- - -`CollectionSpec`, `ProgramSpec` and `MapSpec` are blueprints for in-kernel -objects and contain everything necessary to execute the relevant `bpf(2)` -syscalls. Since the ELF reader outputs a `CollectionSpec` it's possible to -modify clang-compiled BPF code, for example to rewrite constants. At the same -time the [asm](asm/) package provides an assembler that can be used to generate -`ProgramSpec` on the fly. - -Creating a spec should never require any privileges or be restricted in any way, -for example by only allowing programs in native endianness. This ensures that -the library stays flexible. - -Objects ---- - -`Program` and `Map` are the result of loading specs into the kernel. Sometimes -loading a spec will fail because the kernel is too old, or a feature is not -enabled. There are multiple ways the library deals with that: - -* Fallback: older kernels don't allow naming programs and maps. The library - automatically detects support for names, and omits them during load if - necessary. This works since name is primarily a debug aid. - -* Sentinel error: sometimes it's possible to detect that a feature isn't available. - In that case the library will return an error wrapping `ErrNotSupported`. - This is also useful to skip tests that can't run on the current kernel. - -Once program and map objects are loaded they expose the kernel's low-level API, -e.g. `NextKey`. Often this API is awkward to use in Go, so there are safer -wrappers on top of the low-level API, like `MapIterator`. The low-level API is -useful when our higher-level API doesn't support a particular use case. - -Links ---- - -BPF can be attached to many different points in the kernel and newer BPF hooks -tend to use bpf_link to do so. Older hooks unfortunately use a combination of -syscalls, netlink messages, etc. Adding support for a new link type should not -pull in large dependencies like netlink, so XDP programs or tracepoints are -out of scope. - -Each bpf_link_type has one corresponding Go type, e.g. `link.tracing` corresponds -to BPF_LINK_TRACING. In general, these types should be unexported as long as they -don't export methods outside of the Link interface. Each Go type may have multiple -exported constructors. For example `AttachTracing` and `AttachLSM` create a -tracing link, but are distinct functions since they may require different arguments. diff --git a/vendor/github.com/cilium/ebpf/CODEOWNERS b/vendor/github.com/cilium/ebpf/CODEOWNERS new file mode 100644 index 0000000000..ad13437ea2 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/CODEOWNERS @@ -0,0 +1,9 @@ +* @cilium/ebpf-lib-maintainers + +features/ @rgo3 +link/ @mmat11 + +perf/ @florianl +ringbuf/ @florianl + +btf/ @dylandreimerink diff --git a/vendor/github.com/cilium/ebpf/CONTRIBUTING.md b/vendor/github.com/cilium/ebpf/CONTRIBUTING.md index 0d29eae81e..673a9ac290 100644 --- a/vendor/github.com/cilium/ebpf/CONTRIBUTING.md +++ b/vendor/github.com/cilium/ebpf/CONTRIBUTING.md @@ -1,40 +1,5 @@ -# How to contribute +# Contributing to ebpf-go -Development is on [GitHub](https://github.com/cilium/ebpf) and contributions in -the form of pull requests and issues reporting bugs or suggesting new features -are welcome. Please take a look at [the architecture](ARCHITECTURE.md) to get -a better understanding for the high-level goals. - -New features must be accompanied by tests. Before starting work on any large -feature, please [join](https://ebpf.io/slack) the -[#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack to -discuss the design first. - -When submitting pull requests, consider writing details about what problem you -are solving and why the proposed approach solves that problem in commit messages -and/or pull request description to help future library users and maintainers to -reason about the proposed changes. - -## Running the tests - -Many of the tests require privileges to set resource limits and load eBPF code. -The easiest way to obtain these is to run the tests with `sudo`. - -To test the current package with your local kernel you can simply run: -``` -go test -exec sudo ./... -``` - -To test the current package with a different kernel version you can use the [run-tests.sh](run-tests.sh) script. -It requires [virtme](https://github.com/amluto/virtme) and qemu to be installed. - -Examples: - -```bash -# Run all tests on a 5.4 kernel -./run-tests.sh 5.4 - -# Run a subset of tests: -./run-tests.sh 5.4 go test ./link -``` +Want to contribute to ebpf-go? There are a few things you need to know. +We wrote a [contribution guide](https://ebpf-go.dev/contributing/) to help you get started. diff --git a/vendor/github.com/cilium/ebpf/MAINTAINERS.md b/vendor/github.com/cilium/ebpf/MAINTAINERS.md index 9c18e7e76f..a56a03e394 100644 --- a/vendor/github.com/cilium/ebpf/MAINTAINERS.md +++ b/vendor/github.com/cilium/ebpf/MAINTAINERS.md @@ -1,8 +1,3 @@ # Maintainers - * [Lorenz Bauer] - * [Timo Beckers] (Isovalent) - - -[Lorenz Bauer]: https://github.com/lmb -[Timo Beckers]: https://github.com/ti-mo +Maintainers can be found in the [Cilium Maintainers file](https://github.com/cilium/community/blob/main/roles/Maintainers.md) diff --git a/vendor/github.com/cilium/ebpf/Makefile b/vendor/github.com/cilium/ebpf/Makefile index 2d5f04c370..c55a93d9cb 100644 --- a/vendor/github.com/cilium/ebpf/Makefile +++ b/vendor/github.com/cilium/ebpf/Makefile @@ -1,9 +1,9 @@ # The development version of clang is distributed as the 'clang' binary, # while stable/released versions have a version number attached. # Pin the default clang to a stable version. -CLANG ?= clang-14 -STRIP ?= llvm-strip-14 -OBJCOPY ?= llvm-objcopy-14 +CLANG ?= clang-17 +STRIP ?= llvm-strip-17 +OBJCOPY ?= llvm-objcopy-17 CFLAGS := -O2 -g -Wall -Werror $(CFLAGS) CI_KERNEL_URL ?= https://github.com/cilium/ci-kernels/raw/master/ @@ -21,13 +21,11 @@ CONTAINER_RUN_ARGS ?= $(if $(filter ${CONTAINER_ENGINE}, podman), --log-driver=n IMAGE := $(shell cat ${REPODIR}/testdata/docker/IMAGE) VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION) - -# clang <8 doesn't tag relocs properly (STT_NOTYPE) -# clang 9 is the first version emitting BTF TARGETS := \ - testdata/loader-clang-7 \ - testdata/loader-clang-9 \ + testdata/loader-clang-11 \ + testdata/loader-clang-14 \ testdata/loader-$(CLANG) \ + testdata/manyprogs \ testdata/btf_map_init \ testdata/invalid_map \ testdata/raw_tracepoint \ @@ -35,13 +33,23 @@ TARGETS := \ testdata/invalid_btf_map_init \ testdata/strings \ testdata/freplace \ + testdata/fentry_fexit \ testdata/iproute2_map_compat \ testdata/map_spin_lock \ testdata/subprog_reloc \ testdata/fwd_decl \ + testdata/kconfig \ + testdata/kconfig_config \ + testdata/kfunc \ + testdata/invalid-kfunc \ + testdata/kfunc-kmod \ + testdata/constants \ + testdata/errors \ btf/testdata/relocs \ btf/testdata/relocs_read \ - btf/testdata/relocs_read_tgt + btf/testdata/relocs_read_tgt \ + btf/testdata/relocs_enum \ + cmd/bpf2go/testdata/minimal .PHONY: all clean container-all container-shell generate @@ -49,22 +57,26 @@ TARGETS := \ # Build all ELF binaries using a containerized LLVM toolchain. container-all: - ${CONTAINER_ENGINE} run --rm ${CONTAINER_RUN_ARGS} \ + +${CONTAINER_ENGINE} run --rm -t ${CONTAINER_RUN_ARGS} \ -v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \ - --env CFLAGS="-fdebug-prefix-map=/ebpf=." \ --env HOME="/tmp" \ + --env BPF2GO_CC="$(CLANG)" \ + --env BPF2GO_FLAGS="-fdebug-prefix-map=/ebpf=. $(CFLAGS)" \ "${IMAGE}:${VERSION}" \ - $(MAKE) all + make all # (debug) Drop the user into a shell inside the container as root. +# Set BPF2GO_ envs to make 'make generate' just work. container-shell: ${CONTAINER_ENGINE} run --rm -ti \ -v "${REPODIR}":/ebpf -w /ebpf \ + --env BPF2GO_CC="$(CLANG)" \ + --env BPF2GO_FLAGS="-fdebug-prefix-map=/ebpf=. $(CFLAGS)" \ "${IMAGE}:${VERSION}" clean: - -$(RM) testdata/*.elf - -$(RM) btf/testdata/*.elf + find "$(CURDIR)" -name "*.elf" -delete + find "$(CURDIR)" -name "*.o" -delete format: find . -type f -name "*.c" | xargs clang-format -i @@ -73,13 +85,9 @@ all: format $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) gene ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf -# $BPF_CLANG is used in go:generate invocations. -generate: export BPF_CLANG := $(CLANG) -generate: export BPF_CFLAGS := $(CFLAGS) generate: - go generate ./cmd/bpf2go/test - go generate ./internal/sys - cd examples/ && go generate ./... + go generate -run "internal/cmd/gentypes" ./... + go generate -skip "internal/cmd/gentypes" ./... testdata/loader-%-el.elf: testdata/loader.c $* $(CFLAGS) -target bpfel -c $< -o $@ @@ -97,14 +105,8 @@ testdata/loader-%-eb.elf: testdata/loader.c $(CLANG) $(CFLAGS) -target bpfeb -c $< -o $@ $(STRIP) -g $@ -.PHONY: generate-btf -generate-btf: KERNEL_VERSION?=5.18 -generate-btf: - $(eval TMP := $(shell mktemp -d)) - curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION).bz" -o "$(TMP)/bzImage" - ./testdata/extract-vmlinux "$(TMP)/bzImage" > "$(TMP)/vmlinux" - $(OBJCOPY) --dump-section .BTF=/dev/stdout "$(TMP)/vmlinux" /dev/null | gzip > "btf/testdata/vmlinux.btf.gz" - curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION)-selftests-bpf.tgz" -o "$(TMP)/selftests.tgz" - tar -xf "$(TMP)/selftests.tgz" --to-stdout tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.ko | \ - $(OBJCOPY) --dump-section .BTF="btf/testdata/btf_testmod.btf" - /dev/null - $(RM) -r "$(TMP)" +.PHONY: update-kernel-deps +update-kernel-deps: export KERNEL_VERSION?=6.7 +update-kernel-deps: + ./testdata/sh/update-kernel-deps.sh + $(MAKE) container-all diff --git a/vendor/github.com/cilium/ebpf/README.md b/vendor/github.com/cilium/ebpf/README.md index 3e490de711..85871db1ae 100644 --- a/vendor/github.com/cilium/ebpf/README.md +++ b/vendor/github.com/cilium/ebpf/README.md @@ -2,35 +2,38 @@ [![PkgGoDev](https://pkg.go.dev/badge/github.com/cilium/ebpf)](https://pkg.go.dev/github.com/cilium/ebpf) -![HoneyGopher](.github/images/cilium-ebpf.png) +![HoneyGopher](docs/ebpf/ebpf-go.png) -eBPF is a pure Go library that provides utilities for loading, compiling, and +ebpf-go is a pure Go library that provides utilities for loading, compiling, and debugging eBPF programs. It has minimal external dependencies and is intended to be used in long running processes. -The library is maintained by [Cloudflare](https://www.cloudflare.com) and -[Cilium](https://www.cilium.io). - -See [ebpf.io](https://ebpf.io) for other projects from the eBPF ecosystem. +See [ebpf.io](https://ebpf.io) for complementary projects from the wider eBPF +ecosystem. ## Getting Started -A small collection of Go and eBPF programs that serve as examples for building -your own tools can be found under [examples/](examples/). +Please take a look at our [Getting Started] guide. -Contributions are highly encouraged, as they highlight certain use cases of +[Contributions](https://ebpf-go.dev/contributing) are highly encouraged, as they highlight certain use cases of eBPF and the library, and help shape the future of the project. ## Getting Help -Please -[join](https://ebpf.io/slack) the +The community actively monitors our [GitHub Discussions](https://github.com/cilium/ebpf/discussions) page. +Please search for existing threads before starting a new one. Refrain from +opening issues on the bug tracker if you're just starting out or if you're not +sure if something is a bug in the library code. + +Alternatively, [join](https://ebpf.io/slack) the [#ebpf-go](https://cilium.slack.com/messages/ebpf-go) channel on Slack if you -have questions regarding the library. +have other questions regarding the project. Note that this channel is ephemeral +and has its history erased past a certain point, which is less helpful for +others running into the same problem later. ## Packages -This library includes the following packages: +This library includes the following packages: * [asm](https://pkg.go.dev/github.com/cilium/ebpf/asm) contains a basic assembler, allowing you to write eBPF assembly instructions directly @@ -38,7 +41,7 @@ This library includes the following packages: * [cmd/bpf2go](https://pkg.go.dev/github.com/cilium/ebpf/cmd/bpf2go) allows compiling and embedding eBPF programs written in C within Go code. As well as compiling the C code, it auto-generates Go code for loading and manipulating - the eBPF program and map objects. + the eBPF program and map objects. * [link](https://pkg.go.dev/github.com/cilium/ebpf/link) allows attaching eBPF to various hooks * [perf](https://pkg.go.dev/github.com/cilium/ebpf/perf) allows reading from a @@ -49,24 +52,14 @@ This library includes the following packages: of `bpftool feature probe` for discovering BPF-related kernel features using native Go. * [rlimit](https://pkg.go.dev/github.com/cilium/ebpf/rlimit) provides a convenient API to lift the `RLIMIT_MEMLOCK` constraint on kernels before 5.11. +* [btf](https://pkg.go.dev/github.com/cilium/ebpf/btf) allows reading the BPF Type Format. ## Requirements * A version of Go that is [supported by upstream](https://golang.org/doc/devel/release.html#policy) -* Linux >= 4.9. CI is run against kernel.org LTS releases. 4.4 should work but is - not tested against. - -## Regenerating Testdata - -Run `make` in the root of this repository to rebuild testdata in all -subpackages. This requires Docker, as it relies on a standardized build -environment to keep the build output stable. - -It is possible to regenerate data using Podman by overriding the `CONTAINER_*` -variables: `CONTAINER_ENGINE=podman CONTAINER_RUN_ARGS= make`. - -The toolchain image build files are kept in [testdata/docker/](testdata/docker/). +* CI is run against kernel.org LTS releases. >= 4.4 should work but EOL'ed versions + are not supported. ## License @@ -75,3 +68,5 @@ MIT ### eBPF Gopher The eBPF honeygopher is based on the Go gopher designed by Renee French. + +[Getting Started]: https://ebpf-go.dev/guides/getting-started/ diff --git a/vendor/github.com/cilium/ebpf/asm/alu.go b/vendor/github.com/cilium/ebpf/asm/alu.go index 70ccc4d151..282233d327 100644 --- a/vendor/github.com/cilium/ebpf/asm/alu.go +++ b/vendor/github.com/cilium/ebpf/asm/alu.go @@ -1,26 +1,26 @@ package asm -//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp +//go:generate go run golang.org/x/tools/cmd/stringer@latest -output alu_string.go -type=Source,Endianness,ALUOp // Source of ALU / ALU64 / Branch operations // -// msb lsb -// +----+-+---+ -// |op |S|cls| -// +----+-+---+ -type Source uint8 +// msb lsb +// +------------+-+---+ +// | op |S|cls| +// +------------+-+---+ +type Source uint16 -const sourceMask OpCode = 0x08 +const sourceMask OpCode = 0x0008 // Source bitmask const ( // InvalidSource is returned by getters when invoked // on non ALU / branch OpCodes. - InvalidSource Source = 0xff + InvalidSource Source = 0xffff // ImmSource src is from constant - ImmSource Source = 0x00 + ImmSource Source = 0x0000 // RegSource src is from register - RegSource Source = 0x08 + RegSource Source = 0x0008 ) // The Endianness of a byte swap instruction. @@ -39,46 +39,56 @@ const ( // ALUOp are ALU / ALU64 operations // -// msb lsb -// +----+-+---+ -// |OP |s|cls| -// +----+-+---+ -type ALUOp uint8 +// msb lsb +// +-------+----+-+---+ +// | EXT | OP |s|cls| +// +-------+----+-+---+ +type ALUOp uint16 -const aluMask OpCode = 0xf0 +const aluMask OpCode = 0x3ff0 const ( // InvalidALUOp is returned by getters when invoked // on non ALU OpCodes - InvalidALUOp ALUOp = 0xff + InvalidALUOp ALUOp = 0xffff // Add - addition - Add ALUOp = 0x00 + Add ALUOp = 0x0000 // Sub - subtraction - Sub ALUOp = 0x10 + Sub ALUOp = 0x0010 // Mul - multiplication - Mul ALUOp = 0x20 + Mul ALUOp = 0x0020 // Div - division - Div ALUOp = 0x30 + Div ALUOp = 0x0030 + // SDiv - signed division + SDiv ALUOp = Div + 0x0100 // Or - bitwise or - Or ALUOp = 0x40 + Or ALUOp = 0x0040 // And - bitwise and - And ALUOp = 0x50 + And ALUOp = 0x0050 // LSh - bitwise shift left - LSh ALUOp = 0x60 + LSh ALUOp = 0x0060 // RSh - bitwise shift right - RSh ALUOp = 0x70 + RSh ALUOp = 0x0070 // Neg - sign/unsign signing bit - Neg ALUOp = 0x80 + Neg ALUOp = 0x0080 // Mod - modulo - Mod ALUOp = 0x90 + Mod ALUOp = 0x0090 + // SMod - signed modulo + SMod ALUOp = Mod + 0x0100 // Xor - bitwise xor - Xor ALUOp = 0xa0 + Xor ALUOp = 0x00a0 // Mov - move value from one place to another - Mov ALUOp = 0xb0 - // ArSh - arithmatic shift - ArSh ALUOp = 0xc0 + Mov ALUOp = 0x00b0 + // MovSX8 - move lower 8 bits, sign extended upper bits of target + MovSX8 ALUOp = Mov + 0x0100 + // MovSX16 - move lower 16 bits, sign extended upper bits of target + MovSX16 ALUOp = Mov + 0x0200 + // MovSX32 - move lower 32 bits, sign extended upper bits of target + MovSX32 ALUOp = Mov + 0x0300 + // ArSh - arithmetic shift + ArSh ALUOp = 0x00c0 // Swap - endian conversions - Swap ALUOp = 0xd0 + Swap ALUOp = 0x00d0 ) // HostTo converts from host to another endianness. @@ -102,6 +112,27 @@ func HostTo(endian Endianness, dst Register, size Size) Instruction { } } +// BSwap unconditionally reverses the order of bytes in a register. +func BSwap(dst Register, size Size) Instruction { + var imm int64 + switch size { + case Half: + imm = 16 + case Word: + imm = 32 + case DWord: + imm = 64 + default: + return Instruction{OpCode: InvalidOpCode} + } + + return Instruction{ + OpCode: OpCode(ALU64Class).SetALUOp(Swap), + Dst: dst, + Constant: imm, + } +} + // Op returns the OpCode for an ALU operation with a given source. func (op ALUOp) Op(source Source) OpCode { return OpCode(ALU64Class).SetALUOp(op).SetSource(source) diff --git a/vendor/github.com/cilium/ebpf/asm/alu_string.go b/vendor/github.com/cilium/ebpf/asm/alu_string.go index 72d3fe6292..35b406bf3f 100644 --- a/vendor/github.com/cilium/ebpf/asm/alu_string.go +++ b/vendor/github.com/cilium/ebpf/asm/alu_string.go @@ -8,7 +8,7 @@ func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} - _ = x[InvalidSource-255] + _ = x[InvalidSource-65535] _ = x[ImmSource-0] _ = x[RegSource-8] } @@ -25,7 +25,7 @@ func (i Source) String() string { return _Source_name_0 case i == 8: return _Source_name_1 - case i == 255: + case i == 65535: return _Source_name_2 default: return "Source(" + strconv.FormatInt(int64(i), 10) + ")" @@ -62,41 +62,51 @@ func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} - _ = x[InvalidALUOp-255] + _ = x[InvalidALUOp-65535] _ = x[Add-0] _ = x[Sub-16] _ = x[Mul-32] _ = x[Div-48] + _ = x[SDiv-304] _ = x[Or-64] _ = x[And-80] _ = x[LSh-96] _ = x[RSh-112] _ = x[Neg-128] _ = x[Mod-144] + _ = x[SMod-400] _ = x[Xor-160] _ = x[Mov-176] + _ = x[MovSX8-432] + _ = x[MovSX16-688] + _ = x[MovSX32-944] _ = x[ArSh-192] _ = x[Swap-208] } -const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp" +const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapSDivSModMovSX8MovSX16MovSX32InvalidALUOp" var _ALUOp_map = map[ALUOp]string{ - 0: _ALUOp_name[0:3], - 16: _ALUOp_name[3:6], - 32: _ALUOp_name[6:9], - 48: _ALUOp_name[9:12], - 64: _ALUOp_name[12:14], - 80: _ALUOp_name[14:17], - 96: _ALUOp_name[17:20], - 112: _ALUOp_name[20:23], - 128: _ALUOp_name[23:26], - 144: _ALUOp_name[26:29], - 160: _ALUOp_name[29:32], - 176: _ALUOp_name[32:35], - 192: _ALUOp_name[35:39], - 208: _ALUOp_name[39:43], - 255: _ALUOp_name[43:55], + 0: _ALUOp_name[0:3], + 16: _ALUOp_name[3:6], + 32: _ALUOp_name[6:9], + 48: _ALUOp_name[9:12], + 64: _ALUOp_name[12:14], + 80: _ALUOp_name[14:17], + 96: _ALUOp_name[17:20], + 112: _ALUOp_name[20:23], + 128: _ALUOp_name[23:26], + 144: _ALUOp_name[26:29], + 160: _ALUOp_name[29:32], + 176: _ALUOp_name[32:35], + 192: _ALUOp_name[35:39], + 208: _ALUOp_name[39:43], + 304: _ALUOp_name[43:47], + 400: _ALUOp_name[47:51], + 432: _ALUOp_name[51:57], + 688: _ALUOp_name[57:64], + 944: _ALUOp_name[64:71], + 65535: _ALUOp_name[71:83], } func (i ALUOp) String() string { diff --git a/vendor/github.com/cilium/ebpf/asm/func.go b/vendor/github.com/cilium/ebpf/asm/func.go index a14e9e2c3c..84a40b2277 100644 --- a/vendor/github.com/cilium/ebpf/asm/func.go +++ b/vendor/github.com/cilium/ebpf/asm/func.go @@ -1,6 +1,6 @@ package asm -//go:generate stringer -output func_string.go -type=BuiltinFunc +//go:generate go run golang.org/x/tools/cmd/stringer@latest -output func_string.go -type=BuiltinFunc // BuiltinFunc is a built-in eBPF function. type BuiltinFunc int32 @@ -13,15 +13,15 @@ func (_ BuiltinFunc) Max() BuiltinFunc { // // You can regenerate this list using the following gawk script: // -// /FN\(.+\),/ { -// match($1, /\((.+)\)/, r) -// split(r[1], p, "_") -// printf "Fn" -// for (i in p) { -// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2) -// } -// print "" -// } +// /FN\(.+\),/ { +// match($1, /\(([a-z_0-9]+),/, r) +// split(r[1], p, "_") +// printf "Fn" +// for (i in p) { +// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2) +// } +// print "" +// } // // The script expects include/uapi/linux/bpf.h as it's input. const ( @@ -229,6 +229,14 @@ const ( FnDynptrRead FnDynptrWrite FnDynptrData + FnTcpRawGenSyncookieIpv4 + FnTcpRawGenSyncookieIpv6 + FnTcpRawCheckSyncookieIpv4 + FnTcpRawCheckSyncookieIpv6 + FnKtimeGetTaiNs + FnUserRingbufDrain + FnCgrpStorageGet + FnCgrpStorageDelete maxBuiltinFunc ) diff --git a/vendor/github.com/cilium/ebpf/asm/func_string.go b/vendor/github.com/cilium/ebpf/asm/func_string.go index b7431b7f60..47150bc4f2 100644 --- a/vendor/github.com/cilium/ebpf/asm/func_string.go +++ b/vendor/github.com/cilium/ebpf/asm/func_string.go @@ -212,12 +212,20 @@ func _() { _ = x[FnDynptrRead-201] _ = x[FnDynptrWrite-202] _ = x[FnDynptrData-203] - _ = x[maxBuiltinFunc-204] + _ = x[FnTcpRawGenSyncookieIpv4-204] + _ = x[FnTcpRawGenSyncookieIpv6-205] + _ = x[FnTcpRawCheckSyncookieIpv4-206] + _ = x[FnTcpRawCheckSyncookieIpv6-207] + _ = x[FnKtimeGetTaiNs-208] + _ = x[FnUserRingbufDrain-209] + _ = x[FnCgrpStorageGet-210] + _ = x[FnCgrpStorageDelete-211] + _ = x[maxBuiltinFunc-212] } -const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysCloseFnTimerInitFnTimerSetCallbackFnTimerStartFnTimerCancelFnGetFuncIpFnGetAttachCookieFnTaskPtRegsFnGetBranchSnapshotFnTraceVprintkFnSkcToUnixSockFnKallsymsLookupNameFnFindVmaFnLoopFnStrncmpFnGetFuncArgFnGetFuncRetFnGetFuncArgCntFnGetRetvalFnSetRetvalFnXdpGetBuffLenFnXdpLoadBytesFnXdpStoreBytesFnCopyFromUserTaskFnSkbSetTstampFnImaFileHashFnKptrXchgFnMapLookupPercpuElemFnSkcToMptcpSockFnDynptrFromMemFnRingbufReserveDynptrFnRingbufSubmitDynptrFnRingbufDiscardDynptrFnDynptrReadFnDynptrWriteFnDynptrDatamaxBuiltinFunc" +const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFileFnCheckMtuFnForEachMapElemFnSnprintfFnSysBpfFnBtfFindByNameKindFnSysCloseFnTimerInitFnTimerSetCallbackFnTimerStartFnTimerCancelFnGetFuncIpFnGetAttachCookieFnTaskPtRegsFnGetBranchSnapshotFnTraceVprintkFnSkcToUnixSockFnKallsymsLookupNameFnFindVmaFnLoopFnStrncmpFnGetFuncArgFnGetFuncRetFnGetFuncArgCntFnGetRetvalFnSetRetvalFnXdpGetBuffLenFnXdpLoadBytesFnXdpStoreBytesFnCopyFromUserTaskFnSkbSetTstampFnImaFileHashFnKptrXchgFnMapLookupPercpuElemFnSkcToMptcpSockFnDynptrFromMemFnRingbufReserveDynptrFnRingbufSubmitDynptrFnRingbufDiscardDynptrFnDynptrReadFnDynptrWriteFnDynptrDataFnTcpRawGenSyncookieIpv4FnTcpRawGenSyncookieIpv6FnTcpRawCheckSyncookieIpv4FnTcpRawCheckSyncookieIpv6FnKtimeGetTaiNsFnUserRingbufDrainFnCgrpStorageGetFnCgrpStorageDeletemaxBuiltinFunc" -var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497, 2508, 2526, 2538, 2551, 2562, 2579, 2591, 2610, 2624, 2639, 2659, 2668, 2674, 2683, 2695, 2707, 2722, 2733, 2744, 2759, 2773, 2788, 2806, 2820, 2833, 2843, 2864, 2880, 2895, 2917, 2938, 2960, 2972, 2985, 2997, 3011} +var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424, 2434, 2450, 2460, 2468, 2487, 2497, 2508, 2526, 2538, 2551, 2562, 2579, 2591, 2610, 2624, 2639, 2659, 2668, 2674, 2683, 2695, 2707, 2722, 2733, 2744, 2759, 2773, 2788, 2806, 2820, 2833, 2843, 2864, 2880, 2895, 2917, 2938, 2960, 2972, 2985, 2997, 3021, 3045, 3071, 3097, 3112, 3130, 3146, 3165, 3179} func (i BuiltinFunc) String() string { if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) { diff --git a/vendor/github.com/cilium/ebpf/asm/instruction.go b/vendor/github.com/cilium/ebpf/asm/instruction.go index f17d88b518..67cd39d6f6 100644 --- a/vendor/github.com/cilium/ebpf/asm/instruction.go +++ b/vendor/github.com/cilium/ebpf/asm/instruction.go @@ -60,6 +60,34 @@ func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, err } ins.Offset = int16(bo.Uint16(data[2:4])) + + if ins.OpCode.Class().IsALU() { + switch ins.OpCode.ALUOp() { + case Div: + if ins.Offset == 1 { + ins.OpCode = ins.OpCode.SetALUOp(SDiv) + ins.Offset = 0 + } + case Mod: + if ins.Offset == 1 { + ins.OpCode = ins.OpCode.SetALUOp(SMod) + ins.Offset = 0 + } + case Mov: + switch ins.Offset { + case 8: + ins.OpCode = ins.OpCode.SetALUOp(MovSX8) + ins.Offset = 0 + case 16: + ins.OpCode = ins.OpCode.SetALUOp(MovSX16) + ins.Offset = 0 + case 32: + ins.OpCode = ins.OpCode.SetALUOp(MovSX32) + ins.Offset = 0 + } + } + } + // Convert to int32 before widening to int64 // to ensure the signed bit is carried over. ins.Constant = int64(int32(bo.Uint32(data[4:8]))) @@ -106,8 +134,38 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) return 0, fmt.Errorf("can't marshal registers: %s", err) } + if ins.OpCode.Class().IsALU() { + newOffset := int16(0) + switch ins.OpCode.ALUOp() { + case SDiv: + ins.OpCode = ins.OpCode.SetALUOp(Div) + newOffset = 1 + case SMod: + ins.OpCode = ins.OpCode.SetALUOp(Mod) + newOffset = 1 + case MovSX8: + ins.OpCode = ins.OpCode.SetALUOp(Mov) + newOffset = 8 + case MovSX16: + ins.OpCode = ins.OpCode.SetALUOp(Mov) + newOffset = 16 + case MovSX32: + ins.OpCode = ins.OpCode.SetALUOp(Mov) + newOffset = 32 + } + if newOffset != 0 && ins.Offset != 0 { + return 0, fmt.Errorf("extended ALU opcodes should have an .Offset of 0: %s", ins) + } + ins.Offset = newOffset + } + + op, err := ins.OpCode.bpfOpCode() + if err != nil { + return 0, err + } + data := make([]byte, InstructionSize) - data[0] = byte(ins.OpCode) + data[0] = op data[1] = byte(regs) bo.PutUint16(data[2:4], uint16(ins.Offset)) bo.PutUint32(data[4:8], uint32(cons)) @@ -226,6 +284,13 @@ func (ins *Instruction) IsFunctionCall() bool { return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall } +// IsKfuncCall returns true if the instruction calls a kfunc. +// +// This is not the same thing as a BPF helper call. +func (ins *Instruction) IsKfuncCall() bool { + return ins.OpCode.JumpOp() == Call && ins.Src == PseudoKfuncCall +} + // IsLoadOfFunctionPointer returns true if the instruction loads a function pointer. func (ins *Instruction) IsLoadOfFunctionPointer() bool { return ins.OpCode.IsDWordLoad() && ins.Src == PseudoFunc @@ -291,9 +356,9 @@ func (ins Instruction) Format(f fmt.State, c rune) { goto ref } - fmt.Fprintf(f, "%v ", op) switch cls := op.Class(); { case cls.isLoadOrStore(): + fmt.Fprintf(f, "%v ", op) switch op.Mode() { case ImmMode: fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant) @@ -301,30 +366,50 @@ func (ins Instruction) Format(f fmt.State, c rune) { fmt.Fprintf(f, "imm: %d", ins.Constant) case IndMode: fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant) - case MemMode: + case MemMode, MemSXMode: fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant) case XAddMode: fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src) } case cls.IsALU(): - fmt.Fprintf(f, "dst: %s ", ins.Dst) - if op.ALUOp() == Swap || op.Source() == ImmSource { + fmt.Fprintf(f, "%v", op) + if op == Swap.Op(ImmSource) { + fmt.Fprintf(f, "%d", ins.Constant) + } + + fmt.Fprintf(f, " dst: %s ", ins.Dst) + switch { + case op.ALUOp() == Swap: + break + case op.Source() == ImmSource: fmt.Fprintf(f, "imm: %d", ins.Constant) - } else { + default: fmt.Fprintf(f, "src: %s", ins.Src) } case cls.IsJump(): + fmt.Fprintf(f, "%v ", op) switch jop := op.JumpOp(); jop { case Call: - if ins.Src == PseudoCall { + switch ins.Src { + case PseudoCall: // bpf-to-bpf call fmt.Fprint(f, ins.Constant) - } else { + case PseudoKfuncCall: + // kfunc call + fmt.Fprintf(f, "Kfunc(%d)", ins.Constant) + default: fmt.Fprint(f, BuiltinFunc(ins.Constant)) } + case Ja: + if ins.OpCode.Class() == Jump32Class { + fmt.Fprintf(f, "imm: %d", ins.Constant) + } else { + fmt.Fprintf(f, "off: %d", ins.Offset) + } + default: fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset) if op.Source() == ImmSource { @@ -333,6 +418,8 @@ func (ins Instruction) Format(f fmt.State, c rune) { fmt.Fprintf(f, "src: %s", ins.Src) } } + default: + fmt.Fprintf(f, "%v ", op) } ref: @@ -354,6 +441,13 @@ func (ins Instruction) Size() uint64 { return uint64(InstructionSize * ins.OpCode.rawInstructions()) } +// WithMetadata sets the given Metadata on the Instruction. e.g. to copy +// Metadata from another Instruction when replacing it. +func (ins Instruction) WithMetadata(meta Metadata) Instruction { + ins.Metadata = meta + return ins +} + type symbolMeta struct{} // WithSymbol marks the Instruction as a Symbol, which other Instructions @@ -754,7 +848,8 @@ func (insns Instructions) encodeFunctionReferences() error { } switch { - case ins.IsFunctionReference() && ins.Constant == -1: + case ins.IsFunctionReference() && ins.Constant == -1, + ins.OpCode == Ja.opCode(Jump32Class, ImmSource) && ins.Constant == -1: symOffset, ok := symbolOffsets[ins.Reference()] if !ok { return fmt.Errorf("%s at insn %d: symbol %q: %w", ins.OpCode, i, ins.Reference(), ErrUnsatisfiedProgramReference) diff --git a/vendor/github.com/cilium/ebpf/asm/jump.go b/vendor/github.com/cilium/ebpf/asm/jump.go index e31e42cac5..2738d736b2 100644 --- a/vendor/github.com/cilium/ebpf/asm/jump.go +++ b/vendor/github.com/cilium/ebpf/asm/jump.go @@ -1,16 +1,16 @@ package asm -//go:generate stringer -output jump_string.go -type=JumpOp +//go:generate go run golang.org/x/tools/cmd/stringer@latest -output jump_string.go -type=JumpOp // JumpOp affect control flow. // -// msb lsb -// +----+-+---+ -// |OP |s|cls| -// +----+-+---+ +// msb lsb +// +----+-+---+ +// |OP |s|cls| +// +----+-+---+ type JumpOp uint8 -const jumpMask OpCode = aluMask +const jumpMask OpCode = 0xf0 const ( // InvalidJumpOp is returned by getters when invoked @@ -103,13 +103,21 @@ func (op JumpOp) Reg32(dst, src Register, label string) Instruction { } func (op JumpOp) opCode(class Class, source Source) OpCode { - if op == Exit || op == Call || op == Ja { + if op == Exit || op == Call { return InvalidOpCode } return OpCode(class).SetJumpOp(op).SetSource(source) } +// LongJump returns a jump always instruction with a range of [-2^31, 2^31 - 1]. +func LongJump(label string) Instruction { + return Instruction{ + OpCode: Ja.opCode(Jump32Class, ImmSource), + Constant: -1, + }.WithReference(label) +} + // Label adjusts PC to the address of the label. func (op JumpOp) Label(label string) Instruction { if op == Call { diff --git a/vendor/github.com/cilium/ebpf/asm/load_store.go b/vendor/github.com/cilium/ebpf/asm/load_store.go index 85ed286b02..cdb5c5cfa4 100644 --- a/vendor/github.com/cilium/ebpf/asm/load_store.go +++ b/vendor/github.com/cilium/ebpf/asm/load_store.go @@ -1,13 +1,13 @@ package asm -//go:generate stringer -output load_store_string.go -type=Mode,Size +//go:generate go run golang.org/x/tools/cmd/stringer@latest -output load_store_string.go -type=Mode,Size // Mode for load and store operations // -// msb lsb -// +---+--+---+ -// |MDE|sz|cls| -// +---+--+---+ +// msb lsb +// +---+--+---+ +// |MDE|sz|cls| +// +---+--+---+ type Mode uint8 const modeMask OpCode = 0xe0 @@ -24,16 +24,18 @@ const ( IndMode Mode = 0x40 // MemMode - load from memory MemMode Mode = 0x60 + // MemSXMode - load from memory, sign extension + MemSXMode Mode = 0x80 // XAddMode - add atomically across processors. XAddMode Mode = 0xc0 ) // Size of load and store operations // -// msb lsb -// +---+--+---+ -// |mde|SZ|cls| -// +---+--+---+ +// msb lsb +// +---+--+---+ +// |mde|SZ|cls| +// +---+--+---+ type Size uint8 const sizeMask OpCode = 0x18 @@ -73,6 +75,11 @@ func LoadMemOp(size Size) OpCode { return OpCode(LdXClass).SetMode(MemMode).SetSize(size) } +// LoadMemSXOp returns the OpCode to load a value of given size from memory sign extended. +func LoadMemSXOp(size Size) OpCode { + return OpCode(LdXClass).SetMode(MemSXMode).SetSize(size) +} + // LoadMem emits `dst = *(size *)(src + offset)`. func LoadMem(dst, src Register, offset int16, size Size) Instruction { return Instruction{ @@ -83,6 +90,20 @@ func LoadMem(dst, src Register, offset int16, size Size) Instruction { } } +// LoadMemSX emits `dst = *(size *)(src + offset)` but sign extends dst. +func LoadMemSX(dst, src Register, offset int16, size Size) Instruction { + if size == DWord { + return Instruction{OpCode: InvalidOpCode} + } + + return Instruction{ + OpCode: LoadMemSXOp(size), + Dst: dst, + Src: src, + Offset: offset, + } +} + // LoadImmOp returns the OpCode to load an immediate of given size. // // As of kernel 4.20, only DWord size is accepted. diff --git a/vendor/github.com/cilium/ebpf/asm/load_store_string.go b/vendor/github.com/cilium/ebpf/asm/load_store_string.go index 76d29a0756..c48080327c 100644 --- a/vendor/github.com/cilium/ebpf/asm/load_store_string.go +++ b/vendor/github.com/cilium/ebpf/asm/load_store_string.go @@ -13,6 +13,7 @@ func _() { _ = x[AbsMode-32] _ = x[IndMode-64] _ = x[MemMode-96] + _ = x[MemSXMode-128] _ = x[XAddMode-192] } @@ -21,8 +22,9 @@ const ( _Mode_name_1 = "AbsMode" _Mode_name_2 = "IndMode" _Mode_name_3 = "MemMode" - _Mode_name_4 = "XAddMode" - _Mode_name_5 = "InvalidMode" + _Mode_name_4 = "MemSXMode" + _Mode_name_5 = "XAddMode" + _Mode_name_6 = "InvalidMode" ) func (i Mode) String() string { @@ -35,10 +37,12 @@ func (i Mode) String() string { return _Mode_name_2 case i == 96: return _Mode_name_3 - case i == 192: + case i == 128: return _Mode_name_4 - case i == 255: + case i == 192: return _Mode_name_5 + case i == 255: + return _Mode_name_6 default: return "Mode(" + strconv.FormatInt(int64(i), 10) + ")" } diff --git a/vendor/github.com/cilium/ebpf/asm/opcode.go b/vendor/github.com/cilium/ebpf/asm/opcode.go index b11917e18b..1dfd0b171a 100644 --- a/vendor/github.com/cilium/ebpf/asm/opcode.go +++ b/vendor/github.com/cilium/ebpf/asm/opcode.go @@ -5,14 +5,14 @@ import ( "strings" ) -//go:generate stringer -output opcode_string.go -type=Class +//go:generate go run golang.org/x/tools/cmd/stringer@latest -output opcode_string.go -type=Class // Class of operations // -// msb lsb -// +---+--+---+ -// | ?? |CLS| -// +---+--+---+ +// msb lsb +// +---+--+---+ +// | ?? |CLS| +// +---+--+---+ type Class uint8 const classMask OpCode = 0x07 @@ -66,18 +66,43 @@ func (cls Class) isJumpOrALU() bool { return cls.IsJump() || cls.IsALU() } -// OpCode is a packed eBPF opcode. +// OpCode represents a single operation. +// It is not a 1:1 mapping to real eBPF opcodes. // -// Its encoding is defined by a Class value: +// The encoding varies based on a 3-bit Class: // -// msb lsb -// +----+-+---+ -// | ???? |CLS| -// +----+-+---+ -type OpCode uint8 +// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +// ??? | CLS +// +// For ALUClass and ALUCLass32: +// +// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +// OPC |S| CLS +// +// For LdClass, LdXclass, StClass and StXClass: +// +// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +// 0 | MDE |SIZ| CLS +// +// For JumpClass, Jump32Class: +// +// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +// 0 | OPC |S| CLS +type OpCode uint16 // InvalidOpCode is returned by setters on OpCode -const InvalidOpCode OpCode = 0xff +const InvalidOpCode OpCode = 0xffff + +// bpfOpCode returns the actual BPF opcode. +func (op OpCode) bpfOpCode() (byte, error) { + const opCodeMask = 0xff + + if !valid(op, opCodeMask) { + return 0, fmt.Errorf("invalid opcode %x", op) + } + + return byte(op & opCodeMask), nil +} // rawInstructions returns the number of BPF instructions required // to encode this opcode. @@ -147,7 +172,7 @@ func (op OpCode) JumpOp() JumpOp { jumpOp := JumpOp(op & jumpMask) // Some JumpOps are only supported by JumpClass, not Jump32Class. - if op.Class() == Jump32Class && (jumpOp == Exit || jumpOp == Call || jumpOp == Ja) { + if op.Class() == Jump32Class && (jumpOp == Exit || jumpOp == Call) { return InvalidJumpOp } @@ -234,17 +259,24 @@ func (op OpCode) String() string { } case class.IsALU(): + if op.ALUOp() == Swap && op.Class() == ALU64Class { + // B to make BSwap, uncontitional byte swap + f.WriteString("B") + } + f.WriteString(op.ALUOp().String()) if op.ALUOp() == Swap { - // Width for Endian is controlled by Constant - f.WriteString(op.Endianness().String()) + if op.Class() == ALUClass { + // Width for Endian is controlled by Constant + f.WriteString(op.Endianness().String()) + } } else { + f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) + if class == ALUClass { f.WriteString("32") } - - f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) } case class.IsJump(): @@ -254,7 +286,7 @@ func (op OpCode) String() string { f.WriteString("32") } - if jop := op.JumpOp(); jop != Exit && jop != Call { + if jop := op.JumpOp(); jop != Exit && jop != Call && jop != Ja { f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) } diff --git a/vendor/github.com/cilium/ebpf/asm/register.go b/vendor/github.com/cilium/ebpf/asm/register.go index dd5d44f1c1..457a3b8a88 100644 --- a/vendor/github.com/cilium/ebpf/asm/register.go +++ b/vendor/github.com/cilium/ebpf/asm/register.go @@ -35,10 +35,11 @@ const ( // Pseudo registers used by 64bit loads and jumps const ( - PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD - PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE - PseudoCall = R1 // BPF_PSEUDO_CALL - PseudoFunc = R4 // BPF_PSEUDO_FUNC + PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD + PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE + PseudoCall = R1 // BPF_PSEUDO_CALL + PseudoFunc = R4 // BPF_PSEUDO_FUNC + PseudoKfuncCall = R2 // BPF_PSEUDO_KFUNC_CALL ) func (r Register) String() string { diff --git a/vendor/github.com/cilium/ebpf/attachtype_string.go b/vendor/github.com/cilium/ebpf/attachtype_string.go index de355ed909..bece896bb6 100644 --- a/vendor/github.com/cilium/ebpf/attachtype_string.go +++ b/vendor/github.com/cilium/ebpf/attachtype_string.go @@ -51,11 +51,25 @@ func _() { _ = x[AttachSkReuseportSelect-39] _ = x[AttachSkReuseportSelectOrMigrate-40] _ = x[AttachPerfEvent-41] + _ = x[AttachTraceKprobeMulti-42] + _ = x[AttachLSMCgroup-43] + _ = x[AttachStructOps-44] + _ = x[AttachNetfilter-45] + _ = x[AttachTCXIngress-46] + _ = x[AttachTCXEgress-47] + _ = x[AttachTraceUprobeMulti-48] + _ = x[AttachCgroupUnixConnect-49] + _ = x[AttachCgroupUnixSendmsg-50] + _ = x[AttachCgroupUnixRecvmsg-51] + _ = x[AttachCgroupUnixGetpeername-52] + _ = x[AttachCgroupUnixGetsockname-53] + _ = x[AttachNetkitPrimary-54] + _ = x[AttachNetkitPeer-55] } -const _AttachType_name = "NoneCGroupInetEgressCGroupInetSockCreateCGroupSockOpsSkSKBStreamParserSkSKBStreamVerdictCGroupDeviceSkMsgVerdictCGroupInet4BindCGroupInet6BindCGroupInet4ConnectCGroupInet6ConnectCGroupInet4PostBindCGroupInet6PostBindCGroupUDP4SendmsgCGroupUDP6SendmsgLircMode2FlowDissectorCGroupSysctlCGroupUDP4RecvmsgCGroupUDP6RecvmsgCGroupGetsockoptCGroupSetsockoptTraceRawTpTraceFEntryTraceFExitModifyReturnLSMMacTraceIterCgroupInet4GetPeernameCgroupInet6GetPeernameCgroupInet4GetSocknameCgroupInet6GetSocknameXDPDevMapCgroupInetSockReleaseXDPCPUMapSkLookupXDPSkSKBVerdictSkReuseportSelectSkReuseportSelectOrMigratePerfEvent" +const _AttachType_name = "NoneCGroupInetEgressCGroupInetSockCreateCGroupSockOpsSkSKBStreamParserSkSKBStreamVerdictCGroupDeviceSkMsgVerdictCGroupInet4BindCGroupInet6BindCGroupInet4ConnectCGroupInet6ConnectCGroupInet4PostBindCGroupInet6PostBindCGroupUDP4SendmsgCGroupUDP6SendmsgLircMode2FlowDissectorCGroupSysctlCGroupUDP4RecvmsgCGroupUDP6RecvmsgCGroupGetsockoptCGroupSetsockoptTraceRawTpTraceFEntryTraceFExitModifyReturnLSMMacTraceIterCgroupInet4GetPeernameCgroupInet6GetPeernameCgroupInet4GetSocknameCgroupInet6GetSocknameXDPDevMapCgroupInetSockReleaseXDPCPUMapSkLookupXDPSkSKBVerdictSkReuseportSelectSkReuseportSelectOrMigratePerfEventTraceKprobeMultiLSMCgroupStructOpsNetfilterTCXIngressTCXEgressTraceUprobeMultiCgroupUnixConnectCgroupUnixSendmsgCgroupUnixRecvmsgCgroupUnixGetpeernameCgroupUnixGetsocknameNetkitPrimaryNetkitPeer" -var _AttachType_index = [...]uint16{0, 4, 20, 40, 53, 70, 88, 100, 112, 127, 142, 160, 178, 197, 216, 233, 250, 259, 272, 284, 301, 318, 334, 350, 360, 371, 381, 393, 399, 408, 430, 452, 474, 496, 505, 526, 535, 543, 546, 558, 575, 601, 610} +var _AttachType_index = [...]uint16{0, 4, 20, 40, 53, 70, 88, 100, 112, 127, 142, 160, 178, 197, 216, 233, 250, 259, 272, 284, 301, 318, 334, 350, 360, 371, 381, 393, 399, 408, 430, 452, 474, 496, 505, 526, 535, 543, 546, 558, 575, 601, 610, 626, 635, 644, 653, 663, 672, 688, 705, 722, 739, 760, 781, 794, 804} func (i AttachType) String() string { if i >= AttachType(len(_AttachType_index)-1) { diff --git a/vendor/github.com/cilium/ebpf/btf/btf.go b/vendor/github.com/cilium/ebpf/btf/btf.go index a5969332aa..204757dbf6 100644 --- a/vendor/github.com/cilium/ebpf/btf/btf.go +++ b/vendor/github.com/cilium/ebpf/btf/btf.go @@ -2,7 +2,6 @@ package btf import ( "bufio" - "bytes" "debug/elf" "encoding/binary" "errors" @@ -11,66 +10,169 @@ import ( "math" "os" "reflect" + "sync" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" - "github.com/cilium/ebpf/internal/unix" ) const btfMagic = 0xeB9F // Errors returned by BTF functions. var ( - ErrNotSupported = internal.ErrNotSupported - ErrNotFound = errors.New("not found") - ErrNoExtendedInfo = errors.New("no extended info") + ErrNotSupported = internal.ErrNotSupported + ErrNotFound = errors.New("not found") + ErrNoExtendedInfo = errors.New("no extended info") + ErrMultipleMatches = errors.New("multiple matching types") ) // ID represents the unique ID of a BTF object. type ID = sys.BTFID -// Spec represents decoded BTF. -type Spec struct { - // Data from .BTF. - rawTypes []rawType - strings *stringTable - - // All types contained by the spec. For the base type, the position of - // a type in the slice is its ID. - types types +// immutableTypes is a set of types which musn't be changed. +type immutableTypes struct { + // All types contained by the spec, not including types from the base in + // case the spec was parsed from split BTF. + types []Type // Type IDs indexed by type. typeIDs map[Type]TypeID + // The ID of the first type in types. + firstTypeID TypeID + // Types indexed by essential name. // Includes all struct flavors and types with the same name. - namedTypes map[essentialName][]Type + namedTypes map[essentialName][]TypeID + // Byte order of the types. This affects things like struct member order + // when using bitfields. byteOrder binary.ByteOrder } -type btfHeader struct { - Magic uint16 - Version uint8 - Flags uint8 - HdrLen uint32 +func (s *immutableTypes) typeByID(id TypeID) (Type, bool) { + if id < s.firstTypeID { + return nil, false + } + + index := int(id - s.firstTypeID) + if index >= len(s.types) { + return nil, false + } + + return s.types[index], true +} + +// mutableTypes is a set of types which may be changed. +type mutableTypes struct { + imm immutableTypes + mu *sync.RWMutex // protects copies below + copies map[Type]Type // map[orig]copy + copiedTypeIDs map[Type]TypeID // map[copy]origID +} + +// add a type to the set of mutable types. +// +// Copies type and all of its children once. Repeated calls with the same type +// do not copy again. +func (mt *mutableTypes) add(typ Type, typeIDs map[Type]TypeID) Type { + mt.mu.RLock() + cpy, ok := mt.copies[typ] + mt.mu.RUnlock() + + if ok { + // Fast path: the type has been copied before. + return cpy + } + + // modifyGraphPreorder copies the type graph node by node, so we can't drop + // the lock in between. + mt.mu.Lock() + defer mt.mu.Unlock() + + return copyType(typ, typeIDs, mt.copies, mt.copiedTypeIDs) +} + +// copy a set of mutable types. +func (mt *mutableTypes) copy() mutableTypes { + mtCopy := mutableTypes{ + mt.imm, + &sync.RWMutex{}, + make(map[Type]Type, len(mt.copies)), + make(map[Type]TypeID, len(mt.copiedTypeIDs)), + } + + // Prevent concurrent modification of mt.copiedTypeIDs. + mt.mu.RLock() + defer mt.mu.RUnlock() + + copiesOfCopies := make(map[Type]Type, len(mt.copies)) + for orig, copy := range mt.copies { + // NB: We make a copy of copy, not orig, so that changes to mutable types + // are preserved. + copyOfCopy := copyType(copy, mt.copiedTypeIDs, copiesOfCopies, mtCopy.copiedTypeIDs) + mtCopy.copies[orig] = copyOfCopy + } + + return mtCopy +} + +func (mt *mutableTypes) typeID(typ Type) (TypeID, error) { + if _, ok := typ.(*Void); ok { + // Equality is weird for void, since it is a zero sized type. + return 0, nil + } + + mt.mu.RLock() + defer mt.mu.RUnlock() + + id, ok := mt.copiedTypeIDs[typ] + if !ok { + return 0, fmt.Errorf("no ID for type %s: %w", typ, ErrNotFound) + } + + return id, nil +} + +func (mt *mutableTypes) typeByID(id TypeID) (Type, bool) { + immT, ok := mt.imm.typeByID(id) + if !ok { + return nil, false + } - TypeOff uint32 - TypeLen uint32 - StringOff uint32 - StringLen uint32 + return mt.add(immT, mt.imm.typeIDs), true } -// typeStart returns the offset from the beginning of the .BTF section -// to the start of its type entries. -func (h *btfHeader) typeStart() int64 { - return int64(h.HdrLen + h.TypeOff) +func (mt *mutableTypes) anyTypesByName(name string) ([]Type, error) { + immTypes := mt.imm.namedTypes[newEssentialName(name)] + if len(immTypes) == 0 { + return nil, fmt.Errorf("type name %s: %w", name, ErrNotFound) + } + + // Return a copy to prevent changes to namedTypes. + result := make([]Type, 0, len(immTypes)) + for _, id := range immTypes { + immT, ok := mt.imm.typeByID(id) + if !ok { + return nil, fmt.Errorf("no type with ID %d", id) + } + + // Match against the full name, not just the essential one + // in case the type being looked up is a struct flavor. + if immT.TypeName() == name { + result = append(result, mt.add(immT, mt.imm.typeIDs)) + } + } + return result, nil } -// stringStart returns the offset from the beginning of the .BTF section -// to the start of its string table. -func (h *btfHeader) stringStart() int64 { - return int64(h.HdrLen + h.StringOff) +// Spec allows querying a set of Types and loading the set into the +// kernel. +type Spec struct { + mutableTypes + + // String table from ELF. + strings *stringTable } // LoadSpec opens file and calls LoadSpecFromReader on it. @@ -92,10 +194,7 @@ func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) { file, err := internal.NewSafeELFFile(rd) if err != nil { if bo := guessRawBTFByteOrder(rd); bo != nil { - // Try to parse a naked BTF blob. This will return an error if - // we encounter a Datasec, since we can't fix it up. - spec, err := loadRawSpec(io.NewSectionReader(rd, 0, math.MaxInt64), bo, nil, nil) - return spec, err + return loadRawSpec(io.NewSectionReader(rd, 0, math.MaxInt64), bo, nil) } return nil, err @@ -106,7 +205,7 @@ func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) { // LoadSpecAndExtInfosFromReader reads from an ELF. // -// ExtInfos may be nil if the ELF doesn't contain section metadta. +// ExtInfos may be nil if the ELF doesn't contain section metadata. // Returns ErrNotFound if the ELF contains no BTF. func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) { file, err := internal.NewSafeELFFile(rd) @@ -119,7 +218,7 @@ func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) { return nil, nil, err } - extInfos, err := loadExtInfosFromELF(file, spec.types, spec.strings) + extInfos, err := loadExtInfosFromELF(file, spec) if err != nil && !errors.Is(err, ErrNotFound) { return nil, nil, err } @@ -127,40 +226,40 @@ func LoadSpecAndExtInfosFromReader(rd io.ReaderAt) (*Spec, *ExtInfos, error) { return spec, extInfos, nil } -// variableOffsets extracts all symbols offsets from an ELF and indexes them by +// symbolOffsets extracts all symbols offsets from an ELF and indexes them by // section and variable name. // // References to variables in BTF data sections carry unsigned 32-bit offsets. // Some ELF symbols (e.g. in vmlinux) may point to virtual memory that is well // beyond this range. Since these symbols cannot be described by BTF info, // ignore them here. -func variableOffsets(file *internal.SafeELFFile) (map[variable]uint32, error) { +func symbolOffsets(file *internal.SafeELFFile) (map[symbol]uint32, error) { symbols, err := file.Symbols() if err != nil { return nil, fmt.Errorf("can't read symbols: %v", err) } - variableOffsets := make(map[variable]uint32) - for _, symbol := range symbols { - if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE { + offsets := make(map[symbol]uint32) + for _, sym := range symbols { + if idx := sym.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE { // Ignore things like SHN_ABS continue } - if symbol.Value > math.MaxUint32 { + if sym.Value > math.MaxUint32 { // VarSecinfo offset is u32, cannot reference symbols in higher regions. continue } - if int(symbol.Section) >= len(file.Sections) { - return nil, fmt.Errorf("symbol %s: invalid section %d", symbol.Name, symbol.Section) + if int(sym.Section) >= len(file.Sections) { + return nil, fmt.Errorf("symbol %s: invalid section %d", sym.Name, sym.Section) } - secName := file.Sections[symbol.Section].Name - variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value) + secName := file.Sections[sym.Section].Name + offsets[symbol{secName, sym.Name}] = uint32(sym.Value) } - return variableOffsets, nil + return offsets, nil } func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) { @@ -190,7 +289,7 @@ func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) { return nil, fmt.Errorf("btf: %w", ErrNotFound) } - vars, err := variableOffsets(file) + offsets, err := symbolOffsets(file) if err != nil { return nil, err } @@ -199,51 +298,64 @@ func loadSpecFromELF(file *internal.SafeELFFile) (*Spec, error) { return nil, fmt.Errorf("compressed BTF is not supported") } - rawTypes, rawStrings, err := parseBTF(btfSection.ReaderAt, file.ByteOrder, nil) + spec, err := loadRawSpec(btfSection.ReaderAt, file.ByteOrder, nil) if err != nil { return nil, err } - err = fixupDatasec(rawTypes, rawStrings, sectionSizes, vars) + err = fixupDatasec(spec.imm.types, sectionSizes, offsets) if err != nil { return nil, err } - return inflateSpec(rawTypes, rawStrings, file.ByteOrder, nil) + return spec, nil } -func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder, - baseTypes types, baseStrings *stringTable) (*Spec, error) { +func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder, base *Spec) (*Spec, error) { + var ( + baseStrings *stringTable + firstTypeID TypeID + err error + ) - rawTypes, rawStrings, err := parseBTF(btf, bo, baseStrings) - if err != nil { - return nil, err - } + if base != nil { + if base.imm.firstTypeID != 0 { + return nil, fmt.Errorf("can't use split BTF as base") + } - return inflateSpec(rawTypes, rawStrings, bo, baseTypes) -} + baseStrings = base.strings -func inflateSpec(rawTypes []rawType, rawStrings *stringTable, bo binary.ByteOrder, - baseTypes types) (*Spec, error) { + firstTypeID, err = base.nextTypeID() + if err != nil { + return nil, err + } + } - types, err := inflateRawTypes(rawTypes, baseTypes, rawStrings) + types, rawStrings, err := parseBTF(btf, bo, baseStrings, base) if err != nil { return nil, err } - typeIDs, typesByName := indexTypes(types, TypeID(len(baseTypes))) + typeIDs, typesByName := indexTypes(types, firstTypeID) return &Spec{ - rawTypes: rawTypes, - namedTypes: typesByName, - typeIDs: typeIDs, - types: types, - strings: rawStrings, - byteOrder: bo, + mutableTypes{ + immutableTypes{ + types, + typeIDs, + firstTypeID, + typesByName, + bo, + }, + &sync.RWMutex{}, + make(map[Type]Type), + make(map[Type]TypeID), + }, + rawStrings, }, nil } -func indexTypes(types []Type, typeIDOffset TypeID) (map[Type]TypeID, map[essentialName][]Type) { +func indexTypes(types []Type, firstTypeID TypeID) (map[Type]TypeID, map[essentialName][]TypeID) { namedTypes := 0 for _, typ := range types { if typ.TypeName() != "" { @@ -255,100 +367,20 @@ func indexTypes(types []Type, typeIDOffset TypeID) (map[Type]TypeID, map[essenti } typeIDs := make(map[Type]TypeID, len(types)) - typesByName := make(map[essentialName][]Type, namedTypes) + typesByName := make(map[essentialName][]TypeID, namedTypes) for i, typ := range types { + id := firstTypeID + TypeID(i) + typeIDs[typ] = id + if name := newEssentialName(typ.TypeName()); name != "" { - typesByName[name] = append(typesByName[name], typ) + typesByName[name] = append(typesByName[name], id) } - typeIDs[typ] = TypeID(i) + typeIDOffset } return typeIDs, typesByName } -// LoadKernelSpec returns the current kernel's BTF information. -// -// Defaults to /sys/kernel/btf/vmlinux and falls back to scanning the file system -// for vmlinux ELFs. Returns an error wrapping ErrNotSupported if BTF is not enabled. -func LoadKernelSpec() (*Spec, error) { - fh, err := os.Open("/sys/kernel/btf/vmlinux") - if err == nil { - defer fh.Close() - - return loadRawSpec(fh, internal.NativeEndian, nil, nil) - } - - file, err := findVMLinux() - if err != nil { - return nil, err - } - defer file.Close() - - return loadSpecFromELF(file) -} - -// findVMLinux scans multiple well-known paths for vmlinux kernel images. -func findVMLinux() (*internal.SafeELFFile, error) { - release, err := internal.KernelRelease() - if err != nil { - return nil, err - } - - // use same list of locations as libbpf - // https://github.com/libbpf/libbpf/blob/9a3a42608dbe3731256a5682a125ac1e23bced8f/src/btf.c#L3114-L3122 - locations := []string{ - "/boot/vmlinux-%s", - "/lib/modules/%s/vmlinux-%[1]s", - "/lib/modules/%s/build/vmlinux", - "/usr/lib/modules/%s/kernel/vmlinux", - "/usr/lib/debug/boot/vmlinux-%s", - "/usr/lib/debug/boot/vmlinux-%s.debug", - "/usr/lib/debug/lib/modules/%s/vmlinux", - } - - for _, loc := range locations { - file, err := internal.OpenSafeELFFile(fmt.Sprintf(loc, release)) - if errors.Is(err, os.ErrNotExist) { - continue - } - return file, err - } - - return nil, fmt.Errorf("no BTF found for kernel version %s: %w", release, internal.ErrNotSupported) -} - -// parseBTFHeader parses the header of the .BTF section. -func parseBTFHeader(r io.Reader, bo binary.ByteOrder) (*btfHeader, error) { - var header btfHeader - if err := binary.Read(r, bo, &header); err != nil { - return nil, fmt.Errorf("can't read header: %v", err) - } - - if header.Magic != btfMagic { - return nil, fmt.Errorf("incorrect magic value %v", header.Magic) - } - - if header.Version != 1 { - return nil, fmt.Errorf("unexpected version %v", header.Version) - } - - if header.Flags != 0 { - return nil, fmt.Errorf("unsupported flags %v", header.Flags) - } - - remainder := int64(header.HdrLen) - int64(binary.Size(&header)) - if remainder < 0 { - return nil, errors.New("header length shorter than btfHeader size") - } - - if _, err := io.CopyN(internal.DiscardZeroes{}, r, remainder); err != nil { - return nil, fmt.Errorf("header padding: %v", err) - } - - return &header, nil -} - func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder { buf := new(bufio.Reader) for _, bo := range []binary.ByteOrder{ @@ -366,7 +398,7 @@ func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder { // parseBTF reads a .BTF section into memory and parses it into a list of // raw types and a string table. -func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([]rawType, *stringTable, error) { +func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable, base *Spec) ([]Type, *stringTable, error) { buf := internal.NewBufferedSectionReader(btf, 0, math.MaxInt64) header, err := parseBTFHeader(buf, bo) if err != nil { @@ -380,146 +412,120 @@ func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([ } buf.Reset(io.NewSectionReader(btf, header.typeStart(), int64(header.TypeLen))) - rawTypes, err := readTypes(buf, bo, header.TypeLen) + types, err := readAndInflateTypes(buf, bo, header.TypeLen, rawStrings, base) if err != nil { - return nil, nil, fmt.Errorf("can't read types: %w", err) + return nil, nil, err } - return rawTypes, rawStrings, nil + return types, rawStrings, nil } -type variable struct { +type symbol struct { section string name string } -func fixupDatasec(rawTypes []rawType, rawStrings *stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error { - for i, rawType := range rawTypes { - if rawType.Kind() != kindDatasec { +// fixupDatasec attempts to patch up missing info in Datasecs and its members by +// supplementing them with information from the ELF headers and symbol table. +func fixupDatasec(types []Type, sectionSizes map[string]uint32, offsets map[symbol]uint32) error { + for _, typ := range types { + ds, ok := typ.(*Datasec) + if !ok { continue } - name, err := rawStrings.Lookup(rawType.NameOff) - if err != nil { - return err - } + name := ds.Name + + // Some Datasecs are virtual and don't have corresponding ELF sections. + switch name { + case ".ksyms": + // .ksyms describes forward declarations of kfunc signatures. + // Nothing to fix up, all sizes and offsets are 0. + for _, vsi := range ds.Vars { + _, ok := vsi.Type.(*Func) + if !ok { + // Only Funcs are supported in the .ksyms Datasec. + return fmt.Errorf("data section %s: expected *btf.Func, not %T: %w", name, vsi.Type, ErrNotSupported) + } + } - if name == ".kconfig" || name == ".ksyms" { - return fmt.Errorf("reference to %s: %w", name, ErrNotSupported) + continue + case ".kconfig": + // .kconfig has a size of 0 and has all members' offsets set to 0. + // Fix up all offsets and set the Datasec's size. + if err := fixupDatasecLayout(ds); err != nil { + return err + } + + // Fix up extern to global linkage to avoid a BTF verifier error. + for _, vsi := range ds.Vars { + vsi.Type.(*Var).Linkage = GlobalVar + } + + continue } - if rawTypes[i].SizeType != 0 { + if ds.Size != 0 { continue } - size, ok := sectionSizes[name] + ds.Size, ok = sectionSizes[name] if !ok { return fmt.Errorf("data section %s: missing size", name) } - rawTypes[i].SizeType = size - - secinfos := rawType.data.([]btfVarSecinfo) - for j, secInfo := range secinfos { - id := int(secInfo.Type - 1) - if id >= len(rawTypes) { - return fmt.Errorf("data section %s: invalid type id %d for variable %d", name, id, j) - } - - varName, err := rawStrings.Lookup(rawTypes[id].NameOff) - if err != nil { - return fmt.Errorf("data section %s: can't get name for type %d: %w", name, id, err) - } - - offset, ok := variableOffsets[variable{name, varName}] + for i := range ds.Vars { + symName := ds.Vars[i].Type.TypeName() + ds.Vars[i].Offset, ok = offsets[symbol{name, symName}] if !ok { - return fmt.Errorf("data section %s: missing offset for variable %s", name, varName) + return fmt.Errorf("data section %s: missing offset for symbol %s", name, symName) } - - secinfos[j].Offset = offset } } return nil } -// Copy creates a copy of Spec. -func (s *Spec) Copy() *Spec { - types := copyTypes(s.types, nil) - - typeIDOffset := TypeID(0) - if len(s.types) != 0 { - typeIDOffset = s.typeIDs[s.types[0]] - } - typeIDs, typesByName := indexTypes(types, typeIDOffset) - - // NB: Other parts of spec are not copied since they are immutable. - return &Spec{ - s.rawTypes, - s.strings, - types, - typeIDs, - typesByName, - s.byteOrder, - } -} - -type marshalOpts struct { - ByteOrder binary.ByteOrder - StripFuncLinkage bool -} - -func (s *Spec) marshal(opts marshalOpts) ([]byte, error) { - var ( - buf bytes.Buffer - header = new(btfHeader) - headerLen = binary.Size(header) - ) - - // Reserve space for the header. We have to write it last since - // we don't know the size of the type section yet. - _, _ = buf.Write(make([]byte, headerLen)) +// fixupDatasecLayout populates ds.Vars[].Offset according to var sizes and +// alignment. Calculate and set ds.Size. +func fixupDatasecLayout(ds *Datasec) error { + var off uint32 - // Write type section, just after the header. - for _, raw := range s.rawTypes { - switch { - case opts.StripFuncLinkage && raw.Kind() == kindFunc: - raw.SetLinkage(StaticFunc) + for i, vsi := range ds.Vars { + v, ok := vsi.Type.(*Var) + if !ok { + return fmt.Errorf("member %d: unsupported type %T", i, vsi.Type) } - if err := raw.Marshal(&buf, opts.ByteOrder); err != nil { - return nil, fmt.Errorf("can't marshal BTF: %w", err) + size, err := Sizeof(v.Type) + if err != nil { + return fmt.Errorf("variable %s: getting size: %w", v.Name, err) + } + align, err := alignof(v.Type) + if err != nil { + return fmt.Errorf("variable %s: getting alignment: %w", v.Name, err) } - } - typeLen := uint32(buf.Len() - headerLen) + // Align the current member based on the offset of the end of the previous + // member and the alignment of the current member. + off = internal.Align(off, uint32(align)) - // Write string section after type section. - stringsLen := s.strings.Length() - buf.Grow(stringsLen) - if err := s.strings.Marshal(&buf); err != nil { - return nil, err - } + ds.Vars[i].Offset = off - // Fill out the header, and write it out. - header = &btfHeader{ - Magic: btfMagic, - Version: 1, - Flags: 0, - HdrLen: uint32(headerLen), - TypeOff: 0, - TypeLen: typeLen, - StringOff: typeLen, - StringLen: uint32(stringsLen), + off += uint32(size) } - raw := buf.Bytes() - err := binary.Write(sliceWriter(raw[:headerLen]), opts.ByteOrder, header) - if err != nil { - return nil, fmt.Errorf("can't write header: %v", err) - } + ds.Size = off + + return nil +} - return raw, nil +// Copy creates a copy of Spec. +func (s *Spec) Copy() *Spec { + return &Spec{ + s.mutableTypes.copy(), + s.strings, + } } type sliceWriter []byte @@ -532,29 +538,34 @@ func (sw sliceWriter) Write(p []byte) (int, error) { return copy(sw, p), nil } +// nextTypeID returns the next unallocated type ID or an error if there are no +// more type IDs. +func (s *Spec) nextTypeID() (TypeID, error) { + id := s.imm.firstTypeID + TypeID(len(s.imm.types)) + if id < s.imm.firstTypeID { + return 0, fmt.Errorf("no more type IDs") + } + return id, nil +} + // TypeByID returns the BTF Type with the given type ID. // // Returns an error wrapping ErrNotFound if a Type with the given ID // does not exist in the Spec. func (s *Spec) TypeByID(id TypeID) (Type, error) { - return s.types.ByID(id) + typ, ok := s.typeByID(id) + if !ok { + return nil, fmt.Errorf("look up type with ID %d (first ID is %d): %w", id, s.imm.firstTypeID, ErrNotFound) + } + + return typ, nil } // TypeID returns the ID for a given Type. // -// Returns an error wrapping ErrNoFound if the type isn't part of the Spec. +// Returns an error wrapping [ErrNotFound] if the type isn't part of the Spec. func (s *Spec) TypeID(typ Type) (TypeID, error) { - if _, ok := typ.(*Void); ok { - // Equality is weird for void, since it is a zero sized type. - return 0, nil - } - - id, ok := s.typeIDs[typ] - if !ok { - return 0, fmt.Errorf("no ID for type %s: %w", typ, ErrNotFound) - } - - return id, nil + return s.mutableTypes.typeID(typ) } // AnyTypesByName returns a list of BTF Types with the given name. @@ -565,21 +576,7 @@ func (s *Spec) TypeID(typ Type) (TypeID, error) { // // Returns an error wrapping ErrNotFound if no matching Type exists in the Spec. func (s *Spec) AnyTypesByName(name string) ([]Type, error) { - types := s.namedTypes[newEssentialName(name)] - if len(types) == 0 { - return nil, fmt.Errorf("type name %s: %w", name, ErrNotFound) - } - - // Return a copy to prevent changes to namedTypes. - result := make([]Type, 0, len(types)) - for _, t := range types { - // Match against the full name, not just the essential one - // in case the type being looked up is a struct flavor. - if t.TypeName() == name { - result = append(result, t) - } - } - return result, nil + return s.mutableTypes.anyTypesByName(name) } // AnyTypeByName returns a Type with the given name. @@ -598,17 +595,19 @@ func (s *Spec) AnyTypeByName(name string) (Type, error) { return types[0], nil } -// TypeByName searches for a Type with a specific name. Since multiple -// Types with the same name can exist, the parameter typ is taken to -// narrow down the search in case of a clash. +// TypeByName searches for a Type with a specific name. Since multiple Types +// with the same name can exist, the parameter typ is taken to narrow down the +// search in case of a clash. // -// typ must be a non-nil pointer to an implementation of a Type. -// On success, the address of the found Type will be copied to typ. +// typ must be a non-nil pointer to an implementation of a Type. On success, the +// address of the found Type will be copied to typ. // -// Returns an error wrapping ErrNotFound if no matching -// Type exists in the Spec. If multiple candidates are found, -// an error is returned. +// Returns an error wrapping ErrNotFound if no matching Type exists in the Spec. +// Returns an error wrapping ErrMultipleTypes if multiple candidates are found. func (s *Spec) TypeByName(name string, typ interface{}) error { + typeInterface := reflect.TypeOf((*Type)(nil)).Elem() + + // typ may be **T or *Type typValue := reflect.ValueOf(typ) if typValue.Kind() != reflect.Ptr { return fmt.Errorf("%T is not a pointer", typ) @@ -620,7 +619,12 @@ func (s *Spec) TypeByName(name string, typ interface{}) error { } wanted := typPtr.Type() - if !wanted.AssignableTo(reflect.TypeOf((*Type)(nil)).Elem()) { + if wanted == typeInterface { + // This is *Type. Unwrap the value's type. + wanted = typPtr.Elem().Type() + } + + if !wanted.AssignableTo(typeInterface) { return fmt.Errorf("%T does not satisfy Type interface", typ) } @@ -636,14 +640,14 @@ func (s *Spec) TypeByName(name string, typ interface{}) error { } if candidate != nil { - return fmt.Errorf("type %s: multiple candidates for %T", name, typ) + return fmt.Errorf("type %s(%T): %w", name, typ, ErrMultipleMatches) } candidate = typ } if candidate == nil { - return fmt.Errorf("type %s: %w", name, ErrNotFound) + return fmt.Errorf("%s %s: %w", wanted, name, ErrNotFound) } typPtr.Set(reflect.ValueOf(candidate)) @@ -656,242 +660,32 @@ func (s *Spec) TypeByName(name string, typ interface{}) error { // Types from base are used to resolve references in the split BTF. // The returned Spec only contains types from the split BTF, not from the base. func LoadSplitSpecFromReader(r io.ReaderAt, base *Spec) (*Spec, error) { - return loadRawSpec(r, internal.NativeEndian, base.types, base.strings) + return loadRawSpec(r, internal.NativeEndian, base) } // TypesIterator iterates over types of a given spec. type TypesIterator struct { - spec *Spec - index int + spec *Spec + id TypeID + done bool // The last visited type in the spec. Type Type } // Iterate returns the types iterator. func (s *Spec) Iterate() *TypesIterator { - return &TypesIterator{spec: s, index: 0} + return &TypesIterator{spec: s, id: s.imm.firstTypeID} } // Next returns true as long as there are any remaining types. func (iter *TypesIterator) Next() bool { - if len(iter.spec.types) <= iter.index { + if iter.done { return false } - iter.Type = iter.spec.types[iter.index] - iter.index++ - return true -} - -// Handle is a reference to BTF loaded into the kernel. -type Handle struct { - fd *sys.FD - - // Size of the raw BTF in bytes. - size uint32 + var ok bool + iter.Type, ok = iter.spec.typeByID(iter.id) + iter.id++ + iter.done = !ok + return !iter.done } - -// NewHandle loads BTF into the kernel. -// -// Returns ErrNotSupported if BTF is not supported. -func NewHandle(spec *Spec) (*Handle, error) { - if err := haveBTF(); err != nil { - return nil, err - } - - if spec.byteOrder != internal.NativeEndian { - return nil, fmt.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian) - } - - btf, err := spec.marshal(marshalOpts{ - ByteOrder: internal.NativeEndian, - StripFuncLinkage: haveFuncLinkage() != nil, - }) - if err != nil { - return nil, fmt.Errorf("can't marshal BTF: %w", err) - } - - if uint64(len(btf)) > math.MaxUint32 { - return nil, errors.New("BTF exceeds the maximum size") - } - - attr := &sys.BtfLoadAttr{ - Btf: sys.NewSlicePointer(btf), - BtfSize: uint32(len(btf)), - } - - fd, err := sys.BtfLoad(attr) - if err != nil { - logBuf := make([]byte, 64*1024) - attr.BtfLogBuf = sys.NewSlicePointer(logBuf) - attr.BtfLogSize = uint32(len(logBuf)) - attr.BtfLogLevel = 1 - // NB: The syscall will never return ENOSPC as of 5.18-rc4. - _, _ = sys.BtfLoad(attr) - return nil, internal.ErrorWithLog(err, logBuf) - } - - return &Handle{fd, attr.BtfSize}, nil -} - -// NewHandleFromID returns the BTF handle for a given id. -// -// Prefer calling [ebpf.Program.Handle] or [ebpf.Map.Handle] if possible. -// -// Returns ErrNotExist, if there is no BTF with the given id. -// -// Requires CAP_SYS_ADMIN. -func NewHandleFromID(id ID) (*Handle, error) { - fd, err := sys.BtfGetFdById(&sys.BtfGetFdByIdAttr{ - Id: uint32(id), - }) - if err != nil { - return nil, fmt.Errorf("get FD for ID %d: %w", id, err) - } - - info, err := newHandleInfoFromFD(fd) - if err != nil { - _ = fd.Close() - return nil, err - } - - return &Handle{fd, info.size}, nil -} - -// Spec parses the kernel BTF into Go types. -// -// base is used to decode split BTF and may be nil. -func (h *Handle) Spec(base *Spec) (*Spec, error) { - var btfInfo sys.BtfInfo - btfBuffer := make([]byte, h.size) - btfInfo.Btf, btfInfo.BtfSize = sys.NewSlicePointerLen(btfBuffer) - - if err := sys.ObjInfo(h.fd, &btfInfo); err != nil { - return nil, err - } - - var baseTypes types - var baseStrings *stringTable - if base != nil { - baseTypes = base.types - baseStrings = base.strings - } - - return loadRawSpec(bytes.NewReader(btfBuffer), internal.NativeEndian, baseTypes, baseStrings) -} - -// Close destroys the handle. -// -// Subsequent calls to FD will return an invalid value. -func (h *Handle) Close() error { - if h == nil { - return nil - } - - return h.fd.Close() -} - -// FD returns the file descriptor for the handle. -func (h *Handle) FD() int { - return h.fd.Int() -} - -// Info returns metadata about the handle. -func (h *Handle) Info() (*HandleInfo, error) { - return newHandleInfoFromFD(h.fd) -} - -func marshalBTF(types interface{}, strings []byte, bo binary.ByteOrder) []byte { - const minHeaderLength = 24 - - typesLen := uint32(binary.Size(types)) - header := btfHeader{ - Magic: btfMagic, - Version: 1, - HdrLen: minHeaderLength, - TypeOff: 0, - TypeLen: typesLen, - StringOff: typesLen, - StringLen: uint32(len(strings)), - } - - buf := new(bytes.Buffer) - _ = binary.Write(buf, bo, &header) - _ = binary.Write(buf, bo, types) - buf.Write(strings) - - return buf.Bytes() -} - -var haveBTF = internal.FeatureTest("BTF", "5.1", func() error { - var ( - types struct { - Integer btfType - Var btfType - btfVar struct{ Linkage uint32 } - } - strings = []byte{0, 'a', 0} - ) - - // We use a BTF_KIND_VAR here, to make sure that - // the kernel understands BTF at least as well as we - // do. BTF_KIND_VAR was introduced ~5.1. - types.Integer.SetKind(kindPointer) - types.Var.NameOff = 1 - types.Var.SetKind(kindVar) - types.Var.SizeType = 1 - - btf := marshalBTF(&types, strings, internal.NativeEndian) - - fd, err := sys.BtfLoad(&sys.BtfLoadAttr{ - Btf: sys.NewSlicePointer(btf), - BtfSize: uint32(len(btf)), - }) - if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { - // Treat both EINVAL and EPERM as not supported: loading the program - // might still succeed without BTF. - return internal.ErrNotSupported - } - if err != nil { - return err - } - - fd.Close() - return nil -}) - -var haveFuncLinkage = internal.FeatureTest("BTF func linkage", "5.6", func() error { - if err := haveBTF(); err != nil { - return err - } - - var ( - types struct { - FuncProto btfType - Func btfType - } - strings = []byte{0, 'a', 0} - ) - - types.FuncProto.SetKind(kindFuncProto) - types.Func.SetKind(kindFunc) - types.Func.SizeType = 1 // aka FuncProto - types.Func.NameOff = 1 - types.Func.SetLinkage(GlobalFunc) - - btf := marshalBTF(&types, strings, internal.NativeEndian) - - fd, err := sys.BtfLoad(&sys.BtfLoadAttr{ - Btf: sys.NewSlicePointer(btf), - BtfSize: uint32(len(btf)), - }) - if errors.Is(err, unix.EINVAL) { - return internal.ErrNotSupported - } - if err != nil { - return err - } - - fd.Close() - return nil -}) diff --git a/vendor/github.com/cilium/ebpf/btf/btf_types.go b/vendor/github.com/cilium/ebpf/btf/btf_types.go index 4810180494..f0e327abc0 100644 --- a/vendor/github.com/cilium/ebpf/btf/btf_types.go +++ b/vendor/github.com/cilium/ebpf/btf/btf_types.go @@ -2,37 +2,46 @@ package btf import ( "encoding/binary" + "errors" "fmt" "io" + "unsafe" + + "github.com/cilium/ebpf/internal" ) -//go:generate stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage +//go:generate go run golang.org/x/tools/cmd/stringer@latest -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage,btfKind // btfKind describes a Type. type btfKind uint8 // Equivalents of the BTF_KIND_* constants. const ( - kindUnknown btfKind = iota - kindInt - kindPointer - kindArray - kindStruct - kindUnion - kindEnum - kindForward - kindTypedef - kindVolatile - kindConst - kindRestrict + kindUnknown btfKind = iota // Unknown + kindInt // Int + kindPointer // Pointer + kindArray // Array + kindStruct // Struct + kindUnion // Union + kindEnum // Enum + kindForward // Forward + kindTypedef // Typedef + kindVolatile // Volatile + kindConst // Const + kindRestrict // Restrict // Added ~4.20 - kindFunc - kindFuncProto + kindFunc // Func + kindFuncProto // FuncProto // Added ~5.1 - kindVar - kindDatasec + kindVar // Var + kindDatasec // Datasec // Added ~5.13 - kindFloat + kindFloat // Float + // Added 5.16 + kindDeclTag // DeclTag + kindTypeTag // TypeTag + // Added 6.0 + kindEnum64 // Enum64 ) // FuncLinkage describes BTF function linkage metadata. @@ -63,6 +72,65 @@ const ( btfTypeKindFlagMask = 1 ) +var btfHeaderLen = binary.Size(&btfHeader{}) + +type btfHeader struct { + Magic uint16 + Version uint8 + Flags uint8 + HdrLen uint32 + + TypeOff uint32 + TypeLen uint32 + StringOff uint32 + StringLen uint32 +} + +// typeStart returns the offset from the beginning of the .BTF section +// to the start of its type entries. +func (h *btfHeader) typeStart() int64 { + return int64(h.HdrLen + h.TypeOff) +} + +// stringStart returns the offset from the beginning of the .BTF section +// to the start of its string table. +func (h *btfHeader) stringStart() int64 { + return int64(h.HdrLen + h.StringOff) +} + +// parseBTFHeader parses the header of the .BTF section. +func parseBTFHeader(r io.Reader, bo binary.ByteOrder) (*btfHeader, error) { + var header btfHeader + if err := binary.Read(r, bo, &header); err != nil { + return nil, fmt.Errorf("can't read header: %v", err) + } + + if header.Magic != btfMagic { + return nil, fmt.Errorf("incorrect magic value %v", header.Magic) + } + + if header.Version != 1 { + return nil, fmt.Errorf("unexpected version %v", header.Version) + } + + if header.Flags != 0 { + return nil, fmt.Errorf("unsupported flags %v", header.Flags) + } + + remainder := int64(header.HdrLen) - int64(binary.Size(&header)) + if remainder < 0 { + return nil, errors.New("header length shorter than btfHeader size") + } + + if _, err := io.CopyN(internal.DiscardZeroes{}, r, remainder); err != nil { + return nil, fmt.Errorf("header padding: %v", err) + } + + return &header, nil +} + +var btfTypeLen = binary.Size(btfType{}) + // btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst. type btfType struct { NameOff uint32 @@ -85,45 +153,17 @@ type btfType struct { SizeType uint32 } -func (k btfKind) String() string { - switch k { - case kindUnknown: - return "Unknown" - case kindInt: - return "Integer" - case kindPointer: - return "Pointer" - case kindArray: - return "Array" - case kindStruct: - return "Struct" - case kindUnion: - return "Union" - case kindEnum: - return "Enumeration" - case kindForward: - return "Forward" - case kindTypedef: - return "Typedef" - case kindVolatile: - return "Volatile" - case kindConst: - return "Const" - case kindRestrict: - return "Restrict" - case kindFunc: - return "Function" - case kindFuncProto: - return "Function Proto" - case kindVar: - return "Variable" - case kindDatasec: - return "Section" - case kindFloat: - return "Float" - default: - return fmt.Sprintf("Unknown (%d)", k) +var btfTypeSize = int(unsafe.Sizeof(btfType{})) + +func unmarshalBtfType(bt *btfType, b []byte, bo binary.ByteOrder) (int, error) { + if len(b) < btfTypeSize { + return 0, fmt.Errorf("not enough bytes to unmarshal btfType") } + + bt.NameOff = bo.Uint32(b[0:]) + bt.Info = bo.Uint32(b[4:]) + bt.SizeType = bo.Uint32(b[8:]) + return btfTypeSize, nil } func mask(len uint32) uint32 { @@ -164,10 +204,43 @@ func (bt *btfType) SetVlen(vlen int) { bt.setInfo(uint32(vlen), btfTypeVlenMask, btfTypeVlenShift) } -func (bt *btfType) KindFlag() bool { +func (bt *btfType) kindFlagBool() bool { return bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift) == 1 } +func (bt *btfType) setKindFlagBool(set bool) { + var value uint32 + if set { + value = 1 + } + bt.setInfo(value, btfTypeKindFlagMask, btfTypeKindFlagShift) +} + +// Bitfield returns true if the struct or union contain a bitfield. +func (bt *btfType) Bitfield() bool { + return bt.kindFlagBool() +} + +func (bt *btfType) SetBitfield(isBitfield bool) { + bt.setKindFlagBool(isBitfield) +} + +func (bt *btfType) FwdKind() FwdKind { + return FwdKind(bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift)) +} + +func (bt *btfType) SetFwdKind(kind FwdKind) { + bt.setInfo(uint32(kind), btfTypeKindFlagMask, btfTypeKindFlagShift) +} + +func (bt *btfType) Signed() bool { + return bt.kindFlagBool() +} + +func (bt *btfType) SetSigned(signed bool) { + bt.setKindFlagBool(signed) +} + func (bt *btfType) Linkage() FuncLinkage { return FuncLinkage(bt.info(btfTypeVlenMask, btfTypeVlenShift)) } @@ -181,6 +254,10 @@ func (bt *btfType) Type() TypeID { return TypeID(bt.SizeType) } +func (bt *btfType) SetType(id TypeID) { + bt.SizeType = uint32(id) +} + func (bt *btfType) Size() uint32 { // TODO: Panic here if wrong kind? return bt.SizeType @@ -190,13 +267,22 @@ func (bt *btfType) SetSize(size uint32) { bt.SizeType = size } +func (bt *btfType) Marshal(w io.Writer, bo binary.ByteOrder) error { + buf := make([]byte, unsafe.Sizeof(*bt)) + bo.PutUint32(buf[0:], bt.NameOff) + bo.PutUint32(buf[4:], bt.Info) + bo.PutUint32(buf[8:], bt.SizeType) + _, err := w.Write(buf) + return err +} + type rawType struct { btfType data interface{} } func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error { - if err := binary.Write(w, bo, &rt.btfType); err != nil { + if err := rt.btfType.Marshal(w, bo); err != nil { return err } @@ -209,11 +295,11 @@ func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error { // btfInt encodes additional data for integers. // -// ? ? ? ? e e e e o o o o o o o o ? ? ? ? ? ? ? ? b b b b b b b b -// ? = undefined -// e = encoding -// o = offset (bitfields?) -// b = bits (bitfields) +// ? ? ? ? e e e e o o o o o o o o ? ? ? ? ? ? ? ? b b b b b b b b +// ? = undefined +// e = encoding +// o = offset (bitfields?) +// b = bits (bitfields) type btfInt struct { Raw uint32 } @@ -227,6 +313,17 @@ const ( btfIntBitsShift = 0 ) +var btfIntLen = int(unsafe.Sizeof(btfInt{})) + +func unmarshalBtfInt(bi *btfInt, b []byte, bo binary.ByteOrder) (int, error) { + if len(b) < btfIntLen { + return 0, fmt.Errorf("not enough bytes to unmarshal btfInt") + } + + bi.Raw = bo.Uint32(b[0:]) + return btfIntLen, nil +} + func (bi btfInt) Encoding() IntEncoding { return IntEncoding(readBits(bi.Raw, btfIntEncodingLen, btfIntEncodingShift)) } @@ -257,25 +354,130 @@ type btfArray struct { Nelems uint32 } +var btfArrayLen = int(unsafe.Sizeof(btfArray{})) + +func unmarshalBtfArray(ba *btfArray, b []byte, bo binary.ByteOrder) (int, error) { + if len(b) < btfArrayLen { + return 0, fmt.Errorf("not enough bytes to unmarshal btfArray") + } + + ba.Type = TypeID(bo.Uint32(b[0:])) + ba.IndexType = TypeID(bo.Uint32(b[4:])) + ba.Nelems = bo.Uint32(b[8:]) + return btfArrayLen, nil +} + type btfMember struct { NameOff uint32 Type TypeID Offset uint32 } +var btfMemberLen = int(unsafe.Sizeof(btfMember{})) + +func unmarshalBtfMembers(members []btfMember, b []byte, bo binary.ByteOrder) (int, error) { + off := 0 + for i := range members { + if off+btfMemberLen > len(b) { + return 0, fmt.Errorf("not enough bytes to unmarshal btfMember %d", i) + } + + members[i].NameOff = bo.Uint32(b[off+0:]) + members[i].Type = TypeID(bo.Uint32(b[off+4:])) + members[i].Offset = bo.Uint32(b[off+8:]) + + off += btfMemberLen + } + + return off, nil +} + type btfVarSecinfo struct { Type TypeID Offset uint32 Size uint32 } +var btfVarSecinfoLen = int(unsafe.Sizeof(btfVarSecinfo{})) + +func unmarshalBtfVarSecInfos(secinfos []btfVarSecinfo, b []byte, bo binary.ByteOrder) (int, error) { + off := 0 + for i := range secinfos { + if off+btfVarSecinfoLen > len(b) { + return 0, fmt.Errorf("not enough bytes to unmarshal btfVarSecinfo %d", i) + } + + secinfos[i].Type = TypeID(bo.Uint32(b[off+0:])) + secinfos[i].Offset = bo.Uint32(b[off+4:]) + secinfos[i].Size = bo.Uint32(b[off+8:]) + + off += btfVarSecinfoLen + } + + return off, nil +} + type btfVariable struct { Linkage uint32 } +var btfVariableLen = int(unsafe.Sizeof(btfVariable{})) + +func unmarshalBtfVariable(bv *btfVariable, b []byte, bo binary.ByteOrder) (int, error) { + if len(b) < btfVariableLen { + return 0, fmt.Errorf("not enough bytes to unmarshal btfVariable") + } + + bv.Linkage = bo.Uint32(b[0:]) + return btfVariableLen, nil +} + type btfEnum struct { NameOff uint32 - Val int32 + Val uint32 +} + +var btfEnumLen = int(unsafe.Sizeof(btfEnum{})) + +func unmarshalBtfEnums(enums []btfEnum, b []byte, bo binary.ByteOrder) (int, error) { + off := 0 + for i := range enums { + if off+btfEnumLen > len(b) { + return 0, fmt.Errorf("not enough bytes to unmarshal btfEnum %d", i) + } + + enums[i].NameOff = bo.Uint32(b[off+0:]) + enums[i].Val = bo.Uint32(b[off+4:]) + + off += btfEnumLen + } + + return off, nil +} + +type btfEnum64 struct { + NameOff uint32 + ValLo32 uint32 + ValHi32 uint32 +} + +var btfEnum64Len = int(unsafe.Sizeof(btfEnum64{})) + +func unmarshalBtfEnums64(enums []btfEnum64, b []byte, bo binary.ByteOrder) (int, error) { + off := 0 + for i := range enums { + if off+btfEnum64Len > len(b) { + return 0, fmt.Errorf("not enough bytes to unmarshal btfEnum64 %d", i) + } + + enums[i].NameOff = bo.Uint32(b[off+0:]) + enums[i].ValLo32 = bo.Uint32(b[off+4:]) + enums[i].ValHi32 = bo.Uint32(b[off+8:]) + + off += btfEnum64Len + } + + return off, nil } type btfParam struct { @@ -283,61 +485,35 @@ type btfParam struct { Type TypeID } -func readTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32) ([]rawType, error) { - var header btfType - // because of the interleaving between types and struct members it is difficult to - // precompute the numbers of raw types this will parse - // this "guess" is a good first estimation - sizeOfbtfType := uintptr(binary.Size(btfType{})) - tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2 - types := make([]rawType, 0, tyMaxCount) - - for id := TypeID(1); ; id++ { - if err := binary.Read(r, bo, &header); err == io.EOF { - return types, nil - } else if err != nil { - return nil, fmt.Errorf("can't read type info for id %v: %v", id, err) - } +var btfParamLen = int(unsafe.Sizeof(btfParam{})) - var data interface{} - switch header.Kind() { - case kindInt: - data = new(btfInt) - case kindPointer: - case kindArray: - data = new(btfArray) - case kindStruct: - fallthrough - case kindUnion: - data = make([]btfMember, header.Vlen()) - case kindEnum: - data = make([]btfEnum, header.Vlen()) - case kindForward: - case kindTypedef: - case kindVolatile: - case kindConst: - case kindRestrict: - case kindFunc: - case kindFuncProto: - data = make([]btfParam, header.Vlen()) - case kindVar: - data = new(btfVariable) - case kindDatasec: - data = make([]btfVarSecinfo, header.Vlen()) - case kindFloat: - default: - return nil, fmt.Errorf("type id %v: unknown kind: %v", id, header.Kind()) +func unmarshalBtfParams(params []btfParam, b []byte, bo binary.ByteOrder) (int, error) { + off := 0 + for i := range params { + if off+btfParamLen > len(b) { + return 0, fmt.Errorf("not enough bytes to unmarshal btfParam %d", i) } - if data == nil { - types = append(types, rawType{header, nil}) - continue - } + params[i].NameOff = bo.Uint32(b[off+0:]) + params[i].Type = TypeID(bo.Uint32(b[off+4:])) - if err := binary.Read(r, bo, data); err != nil { - return nil, fmt.Errorf("type id %d: kind %v: can't read %T: %v", id, header.Kind(), data, err) - } + off += btfParamLen + } - types = append(types, rawType{header, data}) + return off, nil +} + +type btfDeclTag struct { + ComponentIdx uint32 +} + +var btfDeclTagLen = int(unsafe.Sizeof(btfDeclTag{})) + +func unmarshalBtfDeclTag(bdt *btfDeclTag, b []byte, bo binary.ByteOrder) (int, error) { + if len(b) < btfDeclTagLen { + return 0, fmt.Errorf("not enough bytes to unmarshal btfDeclTag") } + + bdt.ComponentIdx = bo.Uint32(b[0:]) + return btfDeclTagLen, nil } diff --git a/vendor/github.com/cilium/ebpf/btf/btf_types_string.go b/vendor/github.com/cilium/ebpf/btf/btf_types_string.go index 0e0c17d68b..b7a1b80d15 100644 --- a/vendor/github.com/cilium/ebpf/btf/btf_types_string.go +++ b/vendor/github.com/cilium/ebpf/btf/btf_types_string.go @@ -1,4 +1,4 @@ -// Code generated by "stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage"; DO NOT EDIT. +// Code generated by "stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage,btfKind"; DO NOT EDIT. package btf @@ -42,3 +42,39 @@ func (i VarLinkage) String() string { } return _VarLinkage_name[_VarLinkage_index[i]:_VarLinkage_index[i+1]] } +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[kindUnknown-0] + _ = x[kindInt-1] + _ = x[kindPointer-2] + _ = x[kindArray-3] + _ = x[kindStruct-4] + _ = x[kindUnion-5] + _ = x[kindEnum-6] + _ = x[kindForward-7] + _ = x[kindTypedef-8] + _ = x[kindVolatile-9] + _ = x[kindConst-10] + _ = x[kindRestrict-11] + _ = x[kindFunc-12] + _ = x[kindFuncProto-13] + _ = x[kindVar-14] + _ = x[kindDatasec-15] + _ = x[kindFloat-16] + _ = x[kindDeclTag-17] + _ = x[kindTypeTag-18] + _ = x[kindEnum64-19] +} + +const _btfKind_name = "UnknownIntPointerArrayStructUnionEnumForwardTypedefVolatileConstRestrictFuncFuncProtoVarDatasecFloatDeclTagTypeTagEnum64" + +var _btfKind_index = [...]uint8{0, 7, 10, 17, 22, 28, 33, 37, 44, 51, 59, 64, 72, 76, 85, 88, 95, 100, 107, 114, 120} + +func (i btfKind) String() string { + if i >= btfKind(len(_btfKind_index)-1) { + return "btfKind(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _btfKind_name[_btfKind_index[i]:_btfKind_index[i+1]] +} diff --git a/vendor/github.com/cilium/ebpf/btf/core.go b/vendor/github.com/cilium/ebpf/btf/core.go index c487548093..ee89f98331 100644 --- a/vendor/github.com/cilium/ebpf/btf/core.go +++ b/vendor/github.com/cilium/ebpf/btf/core.go @@ -6,6 +6,7 @@ import ( "fmt" "math" "reflect" + "slices" "strconv" "strings" @@ -15,11 +16,16 @@ import ( // Code in this file is derived from libbpf, which is available under a BSD // 2-Clause license. +// A constant used when CO-RE relocation has to remove instructions. +// +// Taken from libbpf. +const COREBadRelocationSentinel = 0xbad2310 + // COREFixup is the result of computing a CO-RE relocation for a target. type COREFixup struct { kind coreKind - local uint32 - target uint32 + local uint64 + target uint64 // True if there is no valid fixup. The instruction is replaced with an // invalid dummy. poison bool @@ -41,9 +47,22 @@ func (f *COREFixup) String() string { func (f *COREFixup) Apply(ins *asm.Instruction) error { if f.poison { - const badRelo = 0xbad2310 + // Relocation is poisoned, replace the instruction with an invalid one. + if ins.OpCode.IsDWordLoad() { + // Replace a dword load with a invalid dword load to preserve instruction size. + *ins = asm.LoadImm(asm.R10, COREBadRelocationSentinel, asm.DWord) + } else { + // Replace all single size instruction with a invalid call instruction. + *ins = asm.BuiltinFunc(COREBadRelocationSentinel).Call() + } + + // Add context to the kernel verifier output. + if source := ins.Source(); source != nil { + *ins = ins.WithSource(asm.Comment(fmt.Sprintf("instruction poisoned by CO-RE: %s", source))) + } else { + *ins = ins.WithSource(asm.Comment("instruction poisoned by CO-RE")) + } - *ins = asm.BuiltinFunc(badRelo).Call() return nil } @@ -119,10 +138,11 @@ const ( reloTypeSize /* type size in bytes */ reloEnumvalExists /* enum value existence in target kernel */ reloEnumvalValue /* enum value integer value */ + reloTypeMatches /* type matches kernel type */ ) func (k coreKind) checksForExistence() bool { - return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists + return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists || k == reloTypeMatches } func (k coreKind) String() string { @@ -151,21 +171,43 @@ func (k coreKind) String() string { return "enumval_exists" case reloEnumvalValue: return "enumval_value" + case reloTypeMatches: + return "type_matches" default: - return "unknown" + return fmt.Sprintf("unknown (%d)", k) } } -// CORERelocate calculates the difference in types between local and target. +// CORERelocate calculates changes needed to adjust eBPF instructions for differences +// in types. +// +// targets forms the set of types to relocate against. The first element has to be +// BTF for vmlinux, the following must be types for kernel modules. +// +// resolveLocalTypeID is called for each local type which requires a stable TypeID. +// Calling the function with the same type multiple times must produce the same +// result. It is the callers responsibility to ensure that the relocated instructions +// are loaded with matching BTF. // // Returns a list of fixups which can be applied to instructions to make them // match the target type(s). // // Fixups are returned in the order of relos, e.g. fixup[i] is the solution // for relos[i]. -func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, error) { - if local.byteOrder != target.byteOrder { - return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder) +func CORERelocate(relos []*CORERelocation, targets []*Spec, bo binary.ByteOrder, resolveLocalTypeID func(Type) (TypeID, error)) ([]COREFixup, error) { + if len(targets) == 0 { + // Explicitly check for nil here since the argument used to be optional. + return nil, fmt.Errorf("targets must be provided") + } + + // We can't encode type IDs that aren't for vmlinux into instructions at the + // moment. + resolveTargetTypeID := targets[0].TypeID + + for _, target := range targets { + if bo != target.imm.byteOrder { + return nil, fmt.Errorf("can't relocate %s against %s", bo, target.imm.byteOrder) + } } type reloGroup struct { @@ -185,15 +227,15 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor) } - id, err := local.TypeID(relo.typ) + id, err := resolveLocalTypeID(relo.typ) if err != nil { - return nil, fmt.Errorf("%s: %w", relo.kind, err) + return nil, fmt.Errorf("%s: get type id: %w", relo.kind, err) } result[i] = COREFixup{ kind: relo.kind, - local: uint32(id), - target: uint32(id), + local: uint64(relo.id), + target: uint64(id), } continue } @@ -213,8 +255,23 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er return nil, fmt.Errorf("relocate unnamed or anonymous type %s: %w", localType, ErrNotSupported) } - targets := target.namedTypes[newEssentialName(localTypeName)] - fixups, err := coreCalculateFixups(local, target, localType, targets, group.relos) + essentialName := newEssentialName(localTypeName) + + var targetTypes []Type + for _, target := range targets { + namedTypeIDs := target.imm.namedTypes[essentialName] + targetTypes = slices.Grow(targetTypes, len(namedTypeIDs)) + for _, id := range namedTypeIDs { + typ, err := target.TypeByID(id) + if err != nil { + return nil, err + } + + targetTypes = append(targetTypes, typ) + } + } + + fixups, err := coreCalculateFixups(group.relos, targetTypes, bo, resolveTargetTypeID) if err != nil { return nil, fmt.Errorf("relocate %s: %w", localType, err) } @@ -229,34 +286,24 @@ func CORERelocate(local, target *Spec, relos []*CORERelocation) ([]COREFixup, er var errAmbiguousRelocation = errors.New("ambiguous relocation") var errImpossibleRelocation = errors.New("impossible relocation") +var errIncompatibleTypes = errors.New("incompatible types") -// coreCalculateFixups calculates the fixups for the given relocations using -// the "best" target. +// coreCalculateFixups finds the target type that best matches all relocations. +// +// All relos must target the same type. // // The best target is determined by scoring: the less poisoning we have to do // the better the target is. -func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type, relos []*CORERelocation) ([]COREFixup, error) { - localID, err := localSpec.TypeID(local) - if err != nil { - return nil, fmt.Errorf("local type ID: %w", err) - } - local = Copy(local, UnderlyingType) - +func coreCalculateFixups(relos []*CORERelocation, targets []Type, bo binary.ByteOrder, resolveTargetTypeID func(Type) (TypeID, error)) ([]COREFixup, error) { bestScore := len(relos) var bestFixups []COREFixup - for i := range targets { - targetID, err := targetSpec.TypeID(targets[i]) - if err != nil { - return nil, fmt.Errorf("target type ID: %w", err) - } - target := Copy(targets[i], UnderlyingType) - + for _, target := range targets { score := 0 // lower is better fixups := make([]COREFixup, 0, len(relos)) for _, relo := range relos { - fixup, err := coreCalculateFixup(localSpec.byteOrder, local, localID, target, targetID, relo) + fixup, err := coreCalculateFixup(relo, target, bo, resolveTargetTypeID) if err != nil { - return nil, fmt.Errorf("target %s: %w", target, err) + return nil, fmt.Errorf("target %s: %s: %w", target, relo.kind, err) } if fixup.poison || fixup.isNonExistant() { score++ @@ -303,13 +350,14 @@ func coreCalculateFixups(localSpec, targetSpec *Spec, local Type, targets []Type return bestFixups, nil } -// coreCalculateFixup calculates the fixup for a single local type, target type -// and relocation. -func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, target Type, targetID TypeID, relo *CORERelocation) (COREFixup, error) { - fixup := func(local, target uint32) (COREFixup, error) { +var errNoSignedness = errors.New("no signedness") + +// coreCalculateFixup calculates the fixup given a relocation and a target type. +func coreCalculateFixup(relo *CORERelocation, target Type, bo binary.ByteOrder, resolveTargetTypeID func(Type) (TypeID, error)) (COREFixup, error) { + fixup := func(local, target uint64) (COREFixup, error) { return COREFixup{kind: relo.kind, local: local, target: target}, nil } - fixupWithoutValidation := func(local, target uint32) (COREFixup, error) { + fixupWithoutValidation := func(local, target uint64) (COREFixup, error) { return COREFixup{kind: relo.kind, local: local, target: target, skipLocalValidation: true}, nil } poison := func() (COREFixup, error) { @@ -320,18 +368,35 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, } zero := COREFixup{} + local := relo.typ + switch relo.kind { + case reloTypeMatches: + if len(relo.accessor) > 1 || relo.accessor[0] != 0 { + return zero, fmt.Errorf("unexpected accessor %v", relo.accessor) + } + + err := coreTypesMatch(local, target, nil) + if errors.Is(err, errIncompatibleTypes) { + return poison() + } + if err != nil { + return zero, err + } + + return fixup(1, 1) + case reloTypeIDTarget, reloTypeSize, reloTypeExists: if len(relo.accessor) > 1 || relo.accessor[0] != 0 { - return zero, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor) + return zero, fmt.Errorf("unexpected accessor %v", relo.accessor) } - err := coreAreTypesCompatible(local, target) - if errors.Is(err, errImpossibleRelocation) { + err := CheckTypeCompatibility(local, target) + if errors.Is(err, errIncompatibleTypes) { return poison() } if err != nil { - return zero, fmt.Errorf("relocation %s: %w", relo.kind, err) + return zero, err } switch relo.kind { @@ -339,7 +404,16 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, return fixup(1, 1) case reloTypeIDTarget: - return fixup(uint32(localID), uint32(targetID)) + targetID, err := resolveTargetTypeID(target) + if errors.Is(err, ErrNotFound) { + // Probably a relocation trying to get the ID + // of a type from a kmod. + return poison() + } + if err != nil { + return zero, err + } + return fixup(uint64(relo.id), uint64(targetID)) case reloTypeSize: localSize, err := Sizeof(local) @@ -352,7 +426,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, return zero, err } - return fixup(uint32(localSize), uint32(targetSize)) + return fixup(uint64(localSize), uint64(targetSize)) } case reloEnumvalValue, reloEnumvalExists: @@ -361,7 +435,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, return poison() } if err != nil { - return zero, fmt.Errorf("relocation %s: %w", relo.kind, err) + return zero, err } switch relo.kind { @@ -369,24 +443,11 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, return fixup(1, 1) case reloEnumvalValue: - return fixup(uint32(localValue.Value), uint32(targetValue.Value)) - } - - case reloFieldSigned: - switch local.(type) { - case *Enum: - return fixup(1, 1) - case *Int: - return fixup( - uint32(local.(*Int).Encoding&Signed), - uint32(target.(*Int).Encoding&Signed), - ) - default: - return fixupWithoutValidation(0, 0) + return fixup(localValue.Value, targetValue.Value) } - case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64: - if _, ok := target.(*Fwd); ok { + case reloFieldByteOffset, reloFieldByteSize, reloFieldExists, reloFieldLShiftU64, reloFieldRShiftU64, reloFieldSigned: + if _, ok := As[*Fwd](target); ok { // We can't relocate fields using a forward declaration, so // skip it. If a non-forward declaration is present in the BTF // we'll find it in one of the other iterations. @@ -398,7 +459,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, return poison() } if err != nil { - return zero, fmt.Errorf("target %s: %w", target, err) + return zero, err } maybeSkipValidation := func(f COREFixup, err error) (COREFixup, error) { @@ -411,7 +472,7 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, return fixup(1, 1) case reloFieldByteOffset: - return maybeSkipValidation(fixup(localField.offset, targetField.offset)) + return maybeSkipValidation(fixup(uint64(localField.offset), uint64(targetField.offset))) case reloFieldByteSize: localSize, err := Sizeof(localField.Type) @@ -423,24 +484,24 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, if err != nil { return zero, err } - return maybeSkipValidation(fixup(uint32(localSize), uint32(targetSize))) + return maybeSkipValidation(fixup(uint64(localSize), uint64(targetSize))) case reloFieldLShiftU64: - var target uint32 - if byteOrder == binary.LittleEndian { + var target uint64 + if bo == binary.LittleEndian { targetSize, err := targetField.sizeBits() if err != nil { return zero, err } - target = uint32(64 - targetField.bitfieldOffset - targetSize) + target = uint64(64 - targetField.bitfieldOffset - targetSize) } else { loadWidth, err := Sizeof(targetField.Type) if err != nil { return zero, err } - target = uint32(64 - Bits(loadWidth*8) + targetField.bitfieldOffset) + target = uint64(64 - Bits(loadWidth*8) + targetField.bitfieldOffset) } return fixupWithoutValidation(0, target) @@ -450,11 +511,41 @@ func coreCalculateFixup(byteOrder binary.ByteOrder, local Type, localID TypeID, return zero, err } - return fixupWithoutValidation(0, uint32(64-targetSize)) + return fixupWithoutValidation(0, uint64(64-targetSize)) + + case reloFieldSigned: + switch local := UnderlyingType(localField.Type).(type) { + case *Enum: + target, ok := As[*Enum](targetField.Type) + if !ok { + return zero, fmt.Errorf("target isn't *Enum but %T", targetField.Type) + } + + return fixup(boolToUint64(local.Signed), boolToUint64(target.Signed)) + case *Int: + target, ok := As[*Int](targetField.Type) + if !ok { + return zero, fmt.Errorf("target isn't *Int but %T", targetField.Type) + } + + return fixup( + uint64(local.Encoding&Signed), + uint64(target.Encoding&Signed), + ) + default: + return zero, fmt.Errorf("type %T: %w", local, errNoSignedness) + } } } - return zero, fmt.Errorf("relocation %s: %w", relo.kind, ErrNotSupported) + return zero, ErrNotSupported +} + +func boolToUint64(val bool) uint64 { + if val { + return 1 + } + return 0 } /* coreAccessor contains a path through a struct. It contains at least one index. @@ -516,7 +607,7 @@ func (ca coreAccessor) String() string { } func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) { - e, ok := t.(*Enum) + e, ok := As[*Enum](t) if !ok { return nil, fmt.Errorf("not an enum: %s", t) } @@ -536,9 +627,9 @@ func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) { // coreField represents the position of a "child" of a composite type from the // start of that type. // -// /- start of composite -// | offset * 8 | bitfieldOffset | bitfieldSize | ... | -// \- start of field end of field -/ +// /- start of composite +// | offset * 8 | bitfieldOffset | bitfieldSize | ... | +// \- start of field end of field -/ type coreField struct { Type Type @@ -555,6 +646,10 @@ type coreField struct { } func (cf *coreField) adjustOffsetToNthElement(n int) error { + if n == 0 { + return nil + } + size, err := Sizeof(cf.Type) if err != nil { return err @@ -597,7 +692,7 @@ func (cf *coreField) sizeBits() (Bits, error) { // between kernel versions. Synthesise the size to make the shifts work. size, err := Sizeof(cf.Type) if err != nil { - return 0, nil + return 0, err } return Bits(size * 8), nil } @@ -611,6 +706,10 @@ func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField, local := coreField{Type: localT} target := coreField{Type: targetT} + if err := coreAreMembersCompatible(local.Type, target.Type); err != nil { + return coreField{}, coreField{}, fmt.Errorf("fields: %w", err) + } + // The first index is used to offset a pointer of the base type like // when accessing an array. if err := local.adjustOffsetToNthElement(localAcc[0]); err != nil { @@ -621,13 +720,9 @@ func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField, return coreField{}, coreField{}, err } - if err := coreAreMembersCompatible(local.Type, target.Type); err != nil { - return coreField{}, coreField{}, fmt.Errorf("fields: %w", err) - } - var localMaybeFlex, targetMaybeFlex bool for i, acc := range localAcc[1:] { - switch localType := local.Type.(type) { + switch localType := UnderlyingType(local.Type).(type) { case composite: // For composite types acc is used to find the field in the local type, // and then we try to find a field in target with the same name. @@ -638,21 +733,21 @@ func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField, localMember := localMembers[acc] if localMember.Name == "" { - _, ok := localMember.Type.(composite) + localMemberType, ok := As[composite](localMember.Type) if !ok { return coreField{}, coreField{}, fmt.Errorf("unnamed field with type %s: %s", localMember.Type, ErrNotSupported) } // This is an anonymous struct or union, ignore it. local = coreField{ - Type: localMember.Type, + Type: localMemberType, offset: local.offset + localMember.Offset.Bytes(), } localMaybeFlex = false continue } - targetType, ok := target.Type.(composite) + targetType, ok := As[composite](target.Type) if !ok { return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation) } @@ -698,7 +793,7 @@ func coreFindField(localT Type, localAcc coreAccessor, targetT Type) (coreField, case *Array: // For arrays, acc is the index in the target. - targetType, ok := target.Type.(*Array) + targetType, ok := As[*Array](target.Type) if !ok { return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation) } @@ -771,7 +866,7 @@ func coreFindMember(typ composite, name string) (Member, bool, error) { if visited[target] { continue } - if len(visited) >= maxTypeDepth { + if len(visited) >= maxResolveDepth { // This check is different than libbpf, which restricts the entire // path to BPF_CORE_SPEC_MAX_LEN items. return Member{}, false, fmt.Errorf("type is nested too deep") @@ -792,7 +887,7 @@ func coreFindMember(typ composite, name string) (Member, bool, error) { continue } - comp, ok := member.Type.(composite) + comp, ok := As[composite](member.Type) if !ok { return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type) } @@ -811,7 +906,7 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal return nil, nil, err } - targetEnum, ok := target.(*Enum) + targetEnum, ok := As[*Enum](target) if !ok { return nil, nil, errImpossibleRelocation } @@ -828,6 +923,17 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal return nil, nil, errImpossibleRelocation } +// CheckTypeCompatibility checks local and target types for Compatibility according to CO-RE rules. +// +// Only layout compatibility is checked, ignoring names of the root type. +func CheckTypeCompatibility(localType Type, targetType Type) error { + return coreAreTypesCompatible(localType, targetType, nil) +} + +type pair struct { + A, B Type +} + /* The comment below is from bpf_core_types_are_compat in libbpf.c: * * Check local and target types for compatibility. This check is used for @@ -849,60 +955,62 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal * These rules are not set in stone and probably will be adjusted as we get * more experience with using BPF CO-RE relocations. * - * Returns errImpossibleRelocation if types are not compatible. + * Returns errIncompatibleTypes if types are not compatible. */ -func coreAreTypesCompatible(localType Type, targetType Type) error { - var ( - localTs, targetTs typeDeque - l, t = &localType, &targetType - depth = 0 - ) +func coreAreTypesCompatible(localType Type, targetType Type, visited map[pair]struct{}) error { + localType = UnderlyingType(localType) + targetType = UnderlyingType(targetType) - for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() { - if depth >= maxTypeDepth { - return errors.New("types are nested too deep") - } + if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { + return fmt.Errorf("type mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) + } - localType = *l - targetType = *t + if _, ok := visited[pair{localType, targetType}]; ok { + return nil + } + if visited == nil { + visited = make(map[pair]struct{}) + } + visited[pair{localType, targetType}] = struct{}{} - if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { - return fmt.Errorf("type mismatch: %w", errImpossibleRelocation) - } + switch lv := localType.(type) { + case *Void, *Struct, *Union, *Enum, *Fwd, *Int: + return nil - switch lv := (localType).(type) { - case *Void, *Struct, *Union, *Enum, *Fwd, *Int: - // Nothing to do here + case *Pointer: + tv := targetType.(*Pointer) + return coreAreTypesCompatible(lv.Target, tv.Target, visited) - case *Pointer, *Array: - depth++ - localType.walk(&localTs) - targetType.walk(&targetTs) + case *Array: + tv := targetType.(*Array) + if err := coreAreTypesCompatible(lv.Index, tv.Index, visited); err != nil { + return err + } - case *FuncProto: - tv := targetType.(*FuncProto) - if len(lv.Params) != len(tv.Params) { - return fmt.Errorf("function param mismatch: %w", errImpossibleRelocation) - } + return coreAreTypesCompatible(lv.Type, tv.Type, visited) - depth++ - localType.walk(&localTs) - targetType.walk(&targetTs) + case *FuncProto: + tv := targetType.(*FuncProto) + if err := coreAreTypesCompatible(lv.Return, tv.Return, visited); err != nil { + return err + } - default: - return fmt.Errorf("unsupported type %T", localType) + if len(lv.Params) != len(tv.Params) { + return fmt.Errorf("function param mismatch: %w", errIncompatibleTypes) } - } - if l != nil { - return fmt.Errorf("dangling local type %T", *l) - } + for i, localParam := range lv.Params { + targetParam := tv.Params[i] + if err := coreAreTypesCompatible(localParam.Type, targetParam.Type, visited); err != nil { + return err + } + } - if t != nil { - return fmt.Errorf("dangling target type %T", *t) - } + return nil - return nil + default: + return fmt.Errorf("unsupported type %T", localType) + } } /* coreAreMembersCompatible checks two types for field-based relocation compatibility. @@ -931,18 +1039,8 @@ func coreAreTypesCompatible(localType Type, targetType Type) error { * Returns errImpossibleRelocation if the members are not compatible. */ func coreAreMembersCompatible(localType Type, targetType Type) error { - doNamesMatch := func(a, b string) error { - if a == "" || b == "" { - // allow anonymous and named type to match - return nil - } - - if newEssentialName(a) == newEssentialName(b) { - return nil - } - - return fmt.Errorf("names don't match: %w", errImpossibleRelocation) - } + localType = UnderlyingType(localType) + targetType = UnderlyingType(targetType) _, lok := localType.(composite) _, tok := targetType.(composite) @@ -960,13 +1058,204 @@ func coreAreMembersCompatible(localType Type, targetType Type) error { case *Enum: tv := targetType.(*Enum) - return doNamesMatch(lv.Name, tv.Name) + if !coreEssentialNamesMatch(lv.Name, tv.Name) { + return fmt.Errorf("names %q and %q don't match: %w", lv.Name, tv.Name, errImpossibleRelocation) + } + + return nil case *Fwd: tv := targetType.(*Fwd) - return doNamesMatch(lv.Name, tv.Name) + if !coreEssentialNamesMatch(lv.Name, tv.Name) { + return fmt.Errorf("names %q and %q don't match: %w", lv.Name, tv.Name, errImpossibleRelocation) + } + + return nil default: return fmt.Errorf("type %s: %w", localType, ErrNotSupported) } } + +// coreEssentialNamesMatch compares two names while ignoring their flavour suffix. +// +// This should only be used on names which are in the global scope, like struct +// names, typedefs or enum values. +func coreEssentialNamesMatch(a, b string) bool { + if a == "" || b == "" { + // allow anonymous and named type to match + return true + } + + return newEssentialName(a) == newEssentialName(b) +} + +/* The comment below is from __bpf_core_types_match in relo_core.c: + * + * Check that two types "match". This function assumes that root types were + * already checked for name match. + * + * The matching relation is defined as follows: + * - modifiers and typedefs are stripped (and, hence, effectively ignored) + * - generally speaking types need to be of same kind (struct vs. struct, union + * vs. union, etc.) + * - exceptions are struct/union behind a pointer which could also match a + * forward declaration of a struct or union, respectively, and enum vs. + * enum64 (see below) + * Then, depending on type: + * - integers: + * - match if size and signedness match + * - arrays & pointers: + * - target types are recursively matched + * - structs & unions: + * - local members need to exist in target with the same name + * - for each member we recursively check match unless it is already behind a + * pointer, in which case we only check matching names and compatible kind + * - enums: + * - local variants have to have a match in target by symbolic name (but not + * numeric value) + * - size has to match (but enum may match enum64 and vice versa) + * - function pointers: + * - number and position of arguments in local type has to match target + * - for each argument and the return value we recursively check match + */ +func coreTypesMatch(localType Type, targetType Type, visited map[pair]struct{}) error { + localType = UnderlyingType(localType) + targetType = UnderlyingType(targetType) + + if !coreEssentialNamesMatch(localType.TypeName(), targetType.TypeName()) { + return fmt.Errorf("type name %q don't match %q: %w", localType.TypeName(), targetType.TypeName(), errIncompatibleTypes) + } + + if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { + return fmt.Errorf("type mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) + } + + if _, ok := visited[pair{localType, targetType}]; ok { + return nil + } + if visited == nil { + visited = make(map[pair]struct{}) + } + visited[pair{localType, targetType}] = struct{}{} + + switch lv := (localType).(type) { + case *Void: + + case *Fwd: + if targetType.(*Fwd).Kind != lv.Kind { + return fmt.Errorf("fwd kind mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) + } + + case *Enum: + return coreEnumsMatch(lv, targetType.(*Enum)) + + case composite: + tv := targetType.(composite) + + if len(lv.members()) > len(tv.members()) { + return errIncompatibleTypes + } + + localMembers := lv.members() + targetMembers := map[string]Member{} + for _, member := range tv.members() { + targetMembers[member.Name] = member + } + + for _, localMember := range localMembers { + targetMember, found := targetMembers[localMember.Name] + if !found { + return fmt.Errorf("no field %q in %v: %w", localMember.Name, targetType, errIncompatibleTypes) + } + + err := coreTypesMatch(localMember.Type, targetMember.Type, visited) + if err != nil { + return err + } + } + + case *Int: + if !coreEncodingMatches(lv, targetType.(*Int)) { + return fmt.Errorf("int mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) + } + + case *Pointer: + tv := targetType.(*Pointer) + + // Allow a pointer to a forward declaration to match a struct + // or union. + if fwd, ok := As[*Fwd](lv.Target); ok && fwd.matches(tv.Target) { + return nil + } + + if fwd, ok := As[*Fwd](tv.Target); ok && fwd.matches(lv.Target) { + return nil + } + + return coreTypesMatch(lv.Target, tv.Target, visited) + + case *Array: + tv := targetType.(*Array) + + if lv.Nelems != tv.Nelems { + return fmt.Errorf("array mismatch between %v and %v: %w", localType, targetType, errIncompatibleTypes) + } + + return coreTypesMatch(lv.Type, tv.Type, visited) + + case *FuncProto: + tv := targetType.(*FuncProto) + + if len(lv.Params) != len(tv.Params) { + return fmt.Errorf("function param mismatch: %w", errIncompatibleTypes) + } + + for i, lparam := range lv.Params { + if err := coreTypesMatch(lparam.Type, tv.Params[i].Type, visited); err != nil { + return err + } + } + + return coreTypesMatch(lv.Return, tv.Return, visited) + + default: + return fmt.Errorf("unsupported type %T", localType) + } + + return nil +} + +// coreEncodingMatches returns true if both ints have the same size and signedness. +// All encodings other than `Signed` are considered unsigned. +func coreEncodingMatches(local, target *Int) bool { + return local.Size == target.Size && (local.Encoding == Signed) == (target.Encoding == Signed) +} + +// coreEnumsMatch checks two enums match, which is considered to be the case if the following is true: +// - size has to match (but enum may match enum64 and vice versa) +// - local variants have to have a match in target by symbolic name (but not numeric value) +func coreEnumsMatch(local *Enum, target *Enum) error { + if local.Size != target.Size { + return fmt.Errorf("size mismatch between %v and %v: %w", local, target, errIncompatibleTypes) + } + + // If there are more values in the local than the target, there must be at least one value in the local + // that isn't in the target, and therefor the types are incompatible. + if len(local.Values) > len(target.Values) { + return fmt.Errorf("local has more values than target: %w", errIncompatibleTypes) + } + +outer: + for _, lv := range local.Values { + for _, rv := range target.Values { + if coreEssentialNamesMatch(lv.Name, rv.Name) { + continue outer + } + } + + return fmt.Errorf("no match for %v in %v: %w", lv, target, errIncompatibleTypes) + } + + return nil +} diff --git a/vendor/github.com/cilium/ebpf/btf/ext_info.go b/vendor/github.com/cilium/ebpf/btf/ext_info.go index 2c0e1afe29..eb9044badf 100644 --- a/vendor/github.com/cilium/ebpf/btf/ext_info.go +++ b/vendor/github.com/cilium/ebpf/btf/ext_info.go @@ -16,15 +16,15 @@ import ( // ExtInfos contains ELF section metadata. type ExtInfos struct { // The slices are sorted by offset in ascending order. - funcInfos map[string][]funcInfo - lineInfos map[string][]lineInfo - relocationInfos map[string][]coreRelocationInfo + funcInfos map[string]FuncInfos + lineInfos map[string]LineInfos + relocationInfos map[string]CORERelocationInfos } // loadExtInfosFromELF parses ext infos from the .BTF.ext section in an ELF. // // Returns an error wrapping ErrNotFound if no ext infos are present. -func loadExtInfosFromELF(file *internal.SafeELFFile, ts types, strings *stringTable) (*ExtInfos, error) { +func loadExtInfosFromELF(file *internal.SafeELFFile, spec *Spec) (*ExtInfos, error) { section := file.Section(".BTF.ext") if section == nil { return nil, fmt.Errorf("btf ext infos: %w", ErrNotFound) @@ -34,11 +34,11 @@ func loadExtInfosFromELF(file *internal.SafeELFFile, ts types, strings *stringTa return nil, fmt.Errorf("compressed ext_info is not supported") } - return loadExtInfos(section.ReaderAt, file.ByteOrder, ts, strings) + return loadExtInfos(section.ReaderAt, file.ByteOrder, spec) } // loadExtInfos parses bare ext infos. -func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringTable) (*ExtInfos, error) { +func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, spec *Spec) (*ExtInfos, error) { // Open unbuffered section reader. binary.Read() calls io.ReadFull on // the header structs, resulting in one syscall per header. headerRd := io.NewSectionReader(r, 0, math.MaxInt64) @@ -53,28 +53,28 @@ func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringT } buf := internal.NewBufferedSectionReader(r, extHeader.funcInfoStart(), int64(extHeader.FuncInfoLen)) - btfFuncInfos, err := parseFuncInfos(buf, bo, strings) + btfFuncInfos, err := parseFuncInfos(buf, bo, spec.strings) if err != nil { return nil, fmt.Errorf("parsing BTF function info: %w", err) } - funcInfos := make(map[string][]funcInfo, len(btfFuncInfos)) + funcInfos := make(map[string]FuncInfos, len(btfFuncInfos)) for section, bfis := range btfFuncInfos { - funcInfos[section], err = newFuncInfos(bfis, ts) + funcInfos[section], err = newFuncInfos(bfis, spec) if err != nil { return nil, fmt.Errorf("section %s: func infos: %w", section, err) } } buf = internal.NewBufferedSectionReader(r, extHeader.lineInfoStart(), int64(extHeader.LineInfoLen)) - btfLineInfos, err := parseLineInfos(buf, bo, strings) + btfLineInfos, err := parseLineInfos(buf, bo, spec.strings) if err != nil { return nil, fmt.Errorf("parsing BTF line info: %w", err) } - lineInfos := make(map[string][]lineInfo, len(btfLineInfos)) + lineInfos := make(map[string]LineInfos, len(btfLineInfos)) for section, blis := range btfLineInfos { - lineInfos[section], err = newLineInfos(blis, strings) + lineInfos[section], err = newLineInfos(blis, spec.strings) if err != nil { return nil, fmt.Errorf("section %s: line infos: %w", section, err) } @@ -86,14 +86,14 @@ func loadExtInfos(r io.ReaderAt, bo binary.ByteOrder, ts types, strings *stringT var btfCORERelos map[string][]bpfCORERelo buf = internal.NewBufferedSectionReader(r, extHeader.coreReloStart(coreHeader), int64(coreHeader.COREReloLen)) - btfCORERelos, err = parseCORERelos(buf, bo, strings) + btfCORERelos, err = parseCORERelos(buf, bo, spec.strings) if err != nil { return nil, fmt.Errorf("parsing CO-RE relocation info: %w", err) } - coreRelos := make(map[string][]coreRelocationInfo, len(btfCORERelos)) + coreRelos := make(map[string]CORERelocationInfos, len(btfCORERelos)) for section, brs := range btfCORERelos { - coreRelos[section], err = newRelocationInfos(brs, ts, strings) + coreRelos[section], err = newRelocationInfos(brs, spec, spec.strings) if err != nil { return nil, fmt.Errorf("section %s: CO-RE relocations: %w", section, err) } @@ -111,51 +111,87 @@ func (ei *ExtInfos) Assign(insns asm.Instructions, section string) { lineInfos := ei.lineInfos[section] reloInfos := ei.relocationInfos[section] + AssignMetadataToInstructions(insns, funcInfos, lineInfos, reloInfos) +} + +// Assign per-instruction metadata to the instructions in insns. +func AssignMetadataToInstructions( + insns asm.Instructions, + funcInfos FuncInfos, + lineInfos LineInfos, + reloInfos CORERelocationInfos, +) { iter := insns.Iterate() for iter.Next() { - if len(funcInfos) > 0 && funcInfos[0].offset == iter.Offset { - iter.Ins.Metadata.Set(funcInfoMeta{}, funcInfos[0].fn) - funcInfos = funcInfos[1:] + if len(funcInfos.infos) > 0 && funcInfos.infos[0].offset == iter.Offset { + *iter.Ins = WithFuncMetadata(*iter.Ins, funcInfos.infos[0].fn) + funcInfos.infos = funcInfos.infos[1:] } - if len(lineInfos) > 0 && lineInfos[0].offset == iter.Offset { - *iter.Ins = iter.Ins.WithSource(lineInfos[0].line) - lineInfos = lineInfos[1:] + if len(lineInfos.infos) > 0 && lineInfos.infos[0].offset == iter.Offset { + *iter.Ins = iter.Ins.WithSource(lineInfos.infos[0].line) + lineInfos.infos = lineInfos.infos[1:] } - if len(reloInfos) > 0 && reloInfos[0].offset == iter.Offset { - iter.Ins.Metadata.Set(coreRelocationMeta{}, reloInfos[0].relo) - reloInfos = reloInfos[1:] + if len(reloInfos.infos) > 0 && reloInfos.infos[0].offset == iter.Offset { + iter.Ins.Metadata.Set(coreRelocationMeta{}, reloInfos.infos[0].relo) + reloInfos.infos = reloInfos.infos[1:] } } } // MarshalExtInfos encodes function and line info embedded in insns into kernel // wire format. -func MarshalExtInfos(insns asm.Instructions, typeID func(Type) (TypeID, error)) (funcInfos, lineInfos []byte, _ error) { +// +// If an instruction has an [asm.Comment], it will be synthesized into a mostly +// empty line info. +func MarshalExtInfos(insns asm.Instructions, b *Builder) (funcInfos, lineInfos []byte, _ error) { iter := insns.Iterate() - var fiBuf, liBuf bytes.Buffer for iter.Next() { + if iter.Ins.Source() != nil || FuncMetadata(iter.Ins) != nil { + goto marshal + } + } + + return nil, nil, nil + +marshal: + var fiBuf, liBuf bytes.Buffer + for { if fn := FuncMetadata(iter.Ins); fn != nil { fi := &funcInfo{ fn: fn, offset: iter.Offset, } - if err := fi.marshal(&fiBuf, typeID); err != nil { + if err := fi.marshal(&fiBuf, b); err != nil { return nil, nil, fmt.Errorf("write func info: %w", err) } } - if line, ok := iter.Ins.Source().(*Line); ok { + if source := iter.Ins.Source(); source != nil { + var line *Line + if l, ok := source.(*Line); ok { + line = l + } else { + line = &Line{ + line: source.String(), + } + } + li := &lineInfo{ line: line, offset: iter.Offset, } - if err := li.marshal(&liBuf); err != nil { + if err := li.marshal(&liBuf, b); err != nil { return nil, nil, fmt.Errorf("write line info: %w", err) } } + + if !iter.Next() { + break + } } + return fiBuf.Bytes(), liBuf.Bytes(), nil } @@ -297,6 +333,11 @@ func parseExtInfoRecordSize(r io.Reader, bo binary.ByteOrder) (uint32, error) { return recordSize, nil } +// FuncInfos contains a sorted list of func infos. +type FuncInfos struct { + infos []funcInfo +} + // The size of a FuncInfo in BTF wire format. var FuncInfoSize = uint32(binary.Size(bpfFuncInfo{})) @@ -311,8 +352,8 @@ type bpfFuncInfo struct { TypeID TypeID } -func newFuncInfo(fi bpfFuncInfo, ts types) (*funcInfo, error) { - typ, err := ts.ByID(fi.TypeID) +func newFuncInfo(fi bpfFuncInfo, spec *Spec) (*funcInfo, error) { + typ, err := spec.TypeByID(fi.TypeID) if err != nil { return nil, err } @@ -333,24 +374,42 @@ func newFuncInfo(fi bpfFuncInfo, ts types) (*funcInfo, error) { }, nil } -func newFuncInfos(bfis []bpfFuncInfo, ts types) ([]funcInfo, error) { - fis := make([]funcInfo, 0, len(bfis)) +func newFuncInfos(bfis []bpfFuncInfo, spec *Spec) (FuncInfos, error) { + fis := FuncInfos{ + infos: make([]funcInfo, 0, len(bfis)), + } for _, bfi := range bfis { - fi, err := newFuncInfo(bfi, ts) + fi, err := newFuncInfo(bfi, spec) if err != nil { - return nil, fmt.Errorf("offset %d: %w", bfi.InsnOff, err) + return FuncInfos{}, fmt.Errorf("offset %d: %w", bfi.InsnOff, err) } - fis = append(fis, *fi) + fis.infos = append(fis.infos, *fi) } - sort.Slice(fis, func(i, j int) bool { - return fis[i].offset <= fis[j].offset + sort.Slice(fis.infos, func(i, j int) bool { + return fis.infos[i].offset <= fis.infos[j].offset }) return fis, nil } +// LoadFuncInfos parses BTF func info in kernel wire format. +func LoadFuncInfos(reader io.Reader, bo binary.ByteOrder, recordNum uint32, spec *Spec) (FuncInfos, error) { + fis, err := parseFuncInfoRecords( + reader, + bo, + FuncInfoSize, + recordNum, + false, + ) + if err != nil { + return FuncInfos{}, fmt.Errorf("parsing BTF func info: %w", err) + } + + return newFuncInfos(fis, spec) +} + // marshal into the BTF wire format. -func (fi *funcInfo) marshal(w io.Writer, typeID func(Type) (TypeID, error)) error { - id, err := typeID(fi.fn) +func (fi *funcInfo) marshal(w *bytes.Buffer, b *Builder) error { + id, err := b.Add(fi.fn) if err != nil { return err } @@ -358,10 +417,14 @@ func (fi *funcInfo) marshal(w io.Writer, typeID func(Type) (TypeID, error)) erro InsnOff: uint32(fi.offset), TypeID: id, } - return binary.Write(w, internal.NativeEndian, &bfi) + buf := make([]byte, FuncInfoSize) + internal.NativeEndian.PutUint32(buf, bfi.InsnOff) + internal.NativeEndian.PutUint32(buf[4:], uint32(bfi.TypeID)) + _, err = w.Write(buf) + return err } -// parseLineInfos parses a func_info sub-section within .BTF.ext ito a map of +// parseFuncInfos parses a func_info sub-section within .BTF.ext ito a map of // func infos indexed by section name. func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map[string][]bpfFuncInfo, error) { recordSize, err := parseExtInfoRecordSize(r, bo) @@ -379,7 +442,7 @@ func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map return nil, err } - records, err := parseFuncInfoRecords(r, bo, recordSize, infoHeader.NumInfo) + records, err := parseFuncInfoRecords(r, bo, recordSize, infoHeader.NumInfo, true) if err != nil { return nil, fmt.Errorf("section %v: %w", secName, err) } @@ -391,7 +454,7 @@ func parseFuncInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map // parseFuncInfoRecords parses a stream of func_infos into a funcInfos. // These records appear after a btf_ext_info_sec header in the func_info // sub-section of .BTF.ext. -func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfFuncInfo, error) { +func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32, offsetInBytes bool) ([]bpfFuncInfo, error) { var out []bpfFuncInfo var fi bpfFuncInfo @@ -405,13 +468,15 @@ func parseFuncInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, r return nil, fmt.Errorf("can't read function info: %v", err) } - if fi.InsnOff%asm.InstructionSize != 0 { - return nil, fmt.Errorf("offset %v is not aligned with instruction size", fi.InsnOff) - } + if offsetInBytes { + if fi.InsnOff%asm.InstructionSize != 0 { + return nil, fmt.Errorf("offset %v is not aligned with instruction size", fi.InsnOff) + } - // ELF tracks offset in bytes, the kernel expects raw BPF instructions. - // Convert as early as possible. - fi.InsnOff /= asm.InstructionSize + // ELF tracks offset in bytes, the kernel expects raw BPF instructions. + // Convert as early as possible. + fi.InsnOff /= asm.InstructionSize + } out = append(out, fi) } @@ -428,12 +493,6 @@ type Line struct { line string lineNumber uint32 lineColumn uint32 - - // TODO: We should get rid of the fields below, but for that we need to be - // able to write BTF. - - fileNameOff uint32 - lineOff uint32 } func (li *Line) FileName() string { @@ -456,6 +515,11 @@ func (li *Line) String() string { return li.line } +// LineInfos contains a sorted list of line infos. +type LineInfos struct { + infos []lineInfo +} + type lineInfo struct { line *Line offset asm.RawInstructionOffset @@ -476,50 +540,66 @@ type bpfLineInfo struct { LineCol uint32 } -func newLineInfo(li bpfLineInfo, strings *stringTable) (*lineInfo, error) { +// LoadLineInfos parses BTF line info in kernel wire format. +func LoadLineInfos(reader io.Reader, bo binary.ByteOrder, recordNum uint32, spec *Spec) (LineInfos, error) { + lis, err := parseLineInfoRecords( + reader, + bo, + LineInfoSize, + recordNum, + false, + ) + if err != nil { + return LineInfos{}, fmt.Errorf("parsing BTF line info: %w", err) + } + + return newLineInfos(lis, spec.strings) +} + +func newLineInfo(li bpfLineInfo, strings *stringTable) (lineInfo, error) { line, err := strings.Lookup(li.LineOff) if err != nil { - return nil, fmt.Errorf("lookup of line: %w", err) + return lineInfo{}, fmt.Errorf("lookup of line: %w", err) } fileName, err := strings.Lookup(li.FileNameOff) if err != nil { - return nil, fmt.Errorf("lookup of filename: %w", err) + return lineInfo{}, fmt.Errorf("lookup of filename: %w", err) } lineNumber := li.LineCol >> bpfLineShift lineColumn := li.LineCol & bpfColumnMax - return &lineInfo{ + return lineInfo{ &Line{ fileName, line, lineNumber, lineColumn, - li.FileNameOff, - li.LineOff, }, asm.RawInstructionOffset(li.InsnOff), }, nil } -func newLineInfos(blis []bpfLineInfo, strings *stringTable) ([]lineInfo, error) { - lis := make([]lineInfo, 0, len(blis)) +func newLineInfos(blis []bpfLineInfo, strings *stringTable) (LineInfos, error) { + lis := LineInfos{ + infos: make([]lineInfo, 0, len(blis)), + } for _, bli := range blis { li, err := newLineInfo(bli, strings) if err != nil { - return nil, fmt.Errorf("offset %d: %w", bli.InsnOff, err) + return LineInfos{}, fmt.Errorf("offset %d: %w", bli.InsnOff, err) } - lis = append(lis, *li) + lis.infos = append(lis.infos, li) } - sort.Slice(lis, func(i, j int) bool { - return lis[i].offset <= lis[j].offset + sort.Slice(lis.infos, func(i, j int) bool { + return lis.infos[i].offset <= lis.infos[j].offset }) return lis, nil } // marshal writes the binary representation of the LineInfo to w. -func (li *lineInfo) marshal(w io.Writer) error { +func (li *lineInfo) marshal(w *bytes.Buffer, b *Builder) error { line := li.line if line.lineNumber > bpfLineMax { return fmt.Errorf("line %d exceeds %d", line.lineNumber, bpfLineMax) @@ -529,13 +609,30 @@ func (li *lineInfo) marshal(w io.Writer) error { return fmt.Errorf("column %d exceeds %d", line.lineColumn, bpfColumnMax) } + fileNameOff, err := b.addString(line.fileName) + if err != nil { + return fmt.Errorf("file name %q: %w", line.fileName, err) + } + + lineOff, err := b.addString(line.line) + if err != nil { + return fmt.Errorf("line %q: %w", line.line, err) + } + bli := bpfLineInfo{ uint32(li.offset), - line.fileNameOff, - line.lineOff, + fileNameOff, + lineOff, (line.lineNumber << bpfLineShift) | line.lineColumn, } - return binary.Write(w, internal.NativeEndian, &bli) + + buf := make([]byte, LineInfoSize) + internal.NativeEndian.PutUint32(buf, bli.InsnOff) + internal.NativeEndian.PutUint32(buf[4:], bli.FileNameOff) + internal.NativeEndian.PutUint32(buf[8:], bli.LineOff) + internal.NativeEndian.PutUint32(buf[12:], bli.LineCol) + _, err = w.Write(buf) + return err } // parseLineInfos parses a line_info sub-section within .BTF.ext ito a map of @@ -556,7 +653,7 @@ func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map return nil, err } - records, err := parseLineInfoRecords(r, bo, recordSize, infoHeader.NumInfo) + records, err := parseLineInfoRecords(r, bo, recordSize, infoHeader.NumInfo, true) if err != nil { return nil, fmt.Errorf("section %v: %w", secName, err) } @@ -568,8 +665,7 @@ func parseLineInfos(r io.Reader, bo binary.ByteOrder, strings *stringTable) (map // parseLineInfoRecords parses a stream of line_infos into a lineInfos. // These records appear after a btf_ext_info_sec header in the line_info // sub-section of .BTF.ext. -func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32) ([]bpfLineInfo, error) { - var out []bpfLineInfo +func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, recordNum uint32, offsetInBytes bool) ([]bpfLineInfo, error) { var li bpfLineInfo if exp, got := uint32(binary.Size(li)), recordSize; exp != got { @@ -577,18 +673,21 @@ func parseLineInfoRecords(r io.Reader, bo binary.ByteOrder, recordSize uint32, r return nil, fmt.Errorf("expected LineInfo record size %d, but BTF blob contains %d", exp, got) } + out := make([]bpfLineInfo, 0, recordNum) for i := uint32(0); i < recordNum; i++ { if err := binary.Read(r, bo, &li); err != nil { return nil, fmt.Errorf("can't read line info: %v", err) } - if li.InsnOff%asm.InstructionSize != 0 { - return nil, fmt.Errorf("offset %v is not aligned with instruction size", li.InsnOff) - } + if offsetInBytes { + if li.InsnOff%asm.InstructionSize != 0 { + return nil, fmt.Errorf("offset %v is not aligned with instruction size", li.InsnOff) + } - // ELF tracks offset in bytes, the kernel expects raw BPF instructions. - // Convert as early as possible. - li.InsnOff /= asm.InstructionSize + // ELF tracks offset in bytes, the kernel expects raw BPF instructions. + // Convert as early as possible. + li.InsnOff /= asm.InstructionSize + } out = append(out, li) } @@ -605,9 +704,16 @@ type bpfCORERelo struct { } type CORERelocation struct { + // The local type of the relocation, stripped of typedefs and qualifiers. typ Type accessor coreAccessor kind coreKind + // The ID of the local type in the source BTF. + id TypeID +} + +func (cr *CORERelocation) String() string { + return fmt.Sprintf("CORERelocation(%s, %s[%s], local_id=%d)", cr.kind, cr.typ, cr.accessor, cr.id) } func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation { @@ -615,13 +721,18 @@ func CORERelocationMetadata(ins *asm.Instruction) *CORERelocation { return relo } +// CORERelocationInfos contains a sorted list of co:re relocation infos. +type CORERelocationInfos struct { + infos []coreRelocationInfo +} + type coreRelocationInfo struct { relo *CORERelocation offset asm.RawInstructionOffset } -func newRelocationInfo(relo bpfCORERelo, ts types, strings *stringTable) (*coreRelocationInfo, error) { - typ, err := ts.ByID(relo.TypeID) +func newRelocationInfo(relo bpfCORERelo, spec *Spec, strings *stringTable) (*coreRelocationInfo, error) { + typ, err := spec.TypeByID(relo.TypeID) if err != nil { return nil, err } @@ -641,22 +752,25 @@ func newRelocationInfo(relo bpfCORERelo, ts types, strings *stringTable) (*coreR typ, accessor, relo.Kind, + relo.TypeID, }, asm.RawInstructionOffset(relo.InsnOff), }, nil } -func newRelocationInfos(brs []bpfCORERelo, ts types, strings *stringTable) ([]coreRelocationInfo, error) { - rs := make([]coreRelocationInfo, 0, len(brs)) +func newRelocationInfos(brs []bpfCORERelo, spec *Spec, strings *stringTable) (CORERelocationInfos, error) { + rs := CORERelocationInfos{ + infos: make([]coreRelocationInfo, 0, len(brs)), + } for _, br := range brs { - relo, err := newRelocationInfo(br, ts, strings) + relo, err := newRelocationInfo(br, spec, strings) if err != nil { - return nil, fmt.Errorf("offset %d: %w", br.InsnOff, err) + return CORERelocationInfos{}, fmt.Errorf("offset %d: %w", br.InsnOff, err) } - rs = append(rs, *relo) + rs.infos = append(rs.infos, *relo) } - sort.Slice(rs, func(i, j int) bool { - return rs[i].offset < rs[j].offset + sort.Slice(rs.infos, func(i, j int) bool { + return rs.infos[i].offset < rs.infos[j].offset }) return rs, nil } diff --git a/vendor/github.com/cilium/ebpf/btf/feature.go b/vendor/github.com/cilium/ebpf/btf/feature.go new file mode 100644 index 0000000000..6feb08dfbb --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/feature.go @@ -0,0 +1,123 @@ +package btf + +import ( + "errors" + "math" + + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// haveBTF attempts to load a BTF blob containing an Int. It should pass on any +// kernel that supports BPF_BTF_LOAD. +var haveBTF = internal.NewFeatureTest("BTF", "4.18", func() error { + // 0-length anonymous integer + err := probeBTF(&Int{}) + if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { + return internal.ErrNotSupported + } + return err +}) + +// haveMapBTF attempts to load a minimal BTF blob containing a Var. It is +// used as a proxy for .bss, .data and .rodata map support, which generally +// come with a Var and Datasec. These were introduced in Linux 5.2. +var haveMapBTF = internal.NewFeatureTest("Map BTF (Var/Datasec)", "5.2", func() error { + if err := haveBTF(); err != nil { + return err + } + + v := &Var{ + Name: "a", + Type: &Pointer{(*Void)(nil)}, + } + + err := probeBTF(v) + if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { + // Treat both EINVAL and EPERM as not supported: creating the map may still + // succeed without Btf* attrs. + return internal.ErrNotSupported + } + return err +}) + +// haveProgBTF attempts to load a BTF blob containing a Func and FuncProto. It +// is used as a proxy for ext_info (func_info) support, which depends on +// Func(Proto) by definition. +var haveProgBTF = internal.NewFeatureTest("Program BTF (func/line_info)", "5.0", func() error { + if err := haveBTF(); err != nil { + return err + } + + fn := &Func{ + Name: "a", + Type: &FuncProto{Return: (*Void)(nil)}, + } + + err := probeBTF(fn) + if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { + return internal.ErrNotSupported + } + return err +}) + +var haveFuncLinkage = internal.NewFeatureTest("BTF func linkage", "5.6", func() error { + if err := haveProgBTF(); err != nil { + return err + } + + fn := &Func{ + Name: "a", + Type: &FuncProto{Return: (*Void)(nil)}, + Linkage: GlobalFunc, + } + + err := probeBTF(fn) + if errors.Is(err, unix.EINVAL) { + return internal.ErrNotSupported + } + return err +}) + +var haveEnum64 = internal.NewFeatureTest("ENUM64", "6.0", func() error { + if err := haveBTF(); err != nil { + return err + } + + enum := &Enum{ + Size: 8, + Values: []EnumValue{ + {"TEST", math.MaxUint32 + 1}, + }, + } + + err := probeBTF(enum) + if errors.Is(err, unix.EINVAL) { + return internal.ErrNotSupported + } + return err +}) + +func probeBTF(typ Type) error { + b, err := NewBuilder([]Type{typ}) + if err != nil { + return err + } + + buf, err := b.Marshal(nil, nil) + if err != nil { + return err + } + + fd, err := sys.BtfLoad(&sys.BtfLoadAttr{ + Btf: sys.NewSlicePointer(buf), + BtfSize: uint32(len(buf)), + }) + + if err == nil { + fd.Close() + } + + return err +} diff --git a/vendor/github.com/cilium/ebpf/btf/format.go b/vendor/github.com/cilium/ebpf/btf/format.go index e7688a2a6e..5e581b4a85 100644 --- a/vendor/github.com/cilium/ebpf/btf/format.go +++ b/vendor/github.com/cilium/ebpf/btf/format.go @@ -56,54 +56,46 @@ func (gf *GoFormatter) enumIdentifier(name, element string) string { // // It encodes https://golang.org/ref/spec#Type_declarations: // -// type foo struct { bar uint32; } -// type bar int32 +// type foo struct { bar uint32; } +// type bar int32 func (gf *GoFormatter) writeTypeDecl(name string, typ Type) error { if name == "" { return fmt.Errorf("need a name for type %s", typ) } - switch v := skipQualifiers(typ).(type) { - case *Enum: - fmt.Fprintf(&gf.w, "type %s ", name) - switch v.Size { - case 1: - gf.w.WriteString("int8") - case 2: - gf.w.WriteString("int16") - case 4: - gf.w.WriteString("int32") - case 8: - gf.w.WriteString("int64") - default: - return fmt.Errorf("%s: invalid enum size %d", typ, v.Size) - } - - if len(v.Values) == 0 { - return nil - } - - gf.w.WriteString("; const ( ") - for _, ev := range v.Values { - id := gf.enumIdentifier(name, ev.Name) - fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, ev.Value) - } - gf.w.WriteString(")") + typ = skipQualifiers(typ) + fmt.Fprintf(&gf.w, "type %s ", name) + if err := gf.writeTypeLit(typ, 0); err != nil { + return err + } + e, ok := typ.(*Enum) + if !ok || len(e.Values) == 0 { return nil + } - default: - fmt.Fprintf(&gf.w, "type %s ", name) - return gf.writeTypeLit(v, 0) + gf.w.WriteString("; const ( ") + for _, ev := range e.Values { + id := gf.enumIdentifier(name, ev.Name) + var value any + if e.Signed { + value = int64(ev.Value) + } else { + value = ev.Value + } + fmt.Fprintf(&gf.w, "%s %s = %d; ", id, name, value) } + gf.w.WriteString(")") + + return nil } // writeType outputs the name of a named type or a literal describing the type. // // It encodes https://golang.org/ref/spec#Types. // -// foo (if foo is a named type) -// uint32 +// foo (if foo is a named type) +// uint32 func (gf *GoFormatter) writeType(typ Type, depth int) error { typ = skipQualifiers(typ) @@ -122,21 +114,35 @@ func (gf *GoFormatter) writeType(typ Type, depth int) error { // // It encodes https://golang.org/ref/spec#TypeLit. // -// struct { bar uint32; } -// uint32 +// struct { bar uint32; } +// uint32 func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error { depth++ - if depth > maxTypeDepth { + if depth > maxResolveDepth { return errNestedTooDeep } var err error switch v := skipQualifiers(typ).(type) { case *Int: - gf.writeIntLit(v) + err = gf.writeIntLit(v) case *Enum: - gf.w.WriteString("int32") + if !v.Signed { + gf.w.WriteRune('u') + } + switch v.Size { + case 1: + gf.w.WriteString("int8") + case 2: + gf.w.WriteString("int16") + case 4: + gf.w.WriteString("int32") + case 8: + gf.w.WriteString("int64") + default: + err = fmt.Errorf("invalid enum size %d", v.Size) + } case *Typedef: err = gf.writeType(v.Type, depth) @@ -166,19 +172,36 @@ func (gf *GoFormatter) writeTypeLit(typ Type, depth int) error { return nil } -func (gf *GoFormatter) writeIntLit(i *Int) { - // NB: Encoding.IsChar is ignored. - if i.Encoding.IsBool() && i.Size == 1 { - gf.w.WriteString("bool") - return - } - +func (gf *GoFormatter) writeIntLit(i *Int) error { bits := i.Size * 8 - if i.Encoding.IsSigned() { - fmt.Fprintf(&gf.w, "int%d", bits) - } else { - fmt.Fprintf(&gf.w, "uint%d", bits) + switch i.Encoding { + case Bool: + if i.Size != 1 { + return fmt.Errorf("bool with size %d", i.Size) + } + gf.w.WriteString("bool") + case Char: + if i.Size != 1 { + return fmt.Errorf("char with size %d", i.Size) + } + // BTF doesn't have a way to specify the signedness of a char. Assume + // we are dealing with unsigned, since this works nicely with []byte + // in Go code. + fallthrough + case Unsigned, Signed: + stem := "uint" + if i.Encoding == Signed { + stem = "int" + } + if i.Size > 8 { + fmt.Fprintf(&gf.w, "[%d]byte /* %s%d */", i.Size, stem, i.Size*8) + } else { + fmt.Fprintf(&gf.w, "%s%d", stem, bits) + } + default: + return fmt.Errorf("can't encode %s", i.Encoding) } + return nil } func (gf *GoFormatter) writeStructLit(size uint32, members []Member, depth int) error { @@ -199,11 +222,15 @@ func (gf *GoFormatter) writeStructLit(size uint32, members []Member, depth int) gf.writePadding(n) } - size, err := Sizeof(m.Type) + fieldSize, err := Sizeof(m.Type) if err != nil { return fmt.Errorf("field %d: %w", i, err) } - prevOffset = offset + uint32(size) + + prevOffset = offset + uint32(fieldSize) + if prevOffset > size { + return fmt.Errorf("field %d of size %d exceeds type size %d", i, fieldSize, size) + } if err := gf.writeStructField(m, depth); err != nil { return fmt.Errorf("field %d: %w", i, err) @@ -238,7 +265,7 @@ func (gf *GoFormatter) writeStructField(m Member, depth int) error { } depth++ - if depth > maxTypeDepth { + if depth > maxResolveDepth { return errNestedTooDeep } @@ -272,7 +299,11 @@ func (gf *GoFormatter) writeDatasecLit(ds *Datasec, depth int) error { prevOffset := uint32(0) for i, vsi := range ds.Vars { - v := vsi.Type.(*Var) + v, ok := vsi.Type.(*Var) + if !ok { + return fmt.Errorf("can't format %s as part of data section", vsi.Type) + } + if v.Linkage != GlobalVar { // Ignore static, extern, etc. for now. continue @@ -307,7 +338,7 @@ func (gf *GoFormatter) writePadding(bytes uint32) { func skipQualifiers(typ Type) Type { result := typ - for depth := 0; depth <= maxTypeDepth; depth++ { + for depth := 0; depth <= maxResolveDepth; depth++ { switch v := (result).(type) { case qualifier: result = v.qualify() diff --git a/vendor/github.com/cilium/ebpf/btf/handle.go b/vendor/github.com/cilium/ebpf/btf/handle.go index 128e9b35cf..b6b3e87f50 100644 --- a/vendor/github.com/cilium/ebpf/btf/handle.go +++ b/vendor/github.com/cilium/ebpf/btf/handle.go @@ -1,14 +1,142 @@ package btf import ( + "bytes" "errors" "fmt" + "math" "os" + "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) +// Handle is a reference to BTF loaded into the kernel. +type Handle struct { + fd *sys.FD + + // Size of the raw BTF in bytes. + size uint32 + + needsKernelBase bool +} + +// NewHandle loads the contents of a [Builder] into the kernel. +// +// Returns an error wrapping ErrNotSupported if the kernel doesn't support BTF. +func NewHandle(b *Builder) (*Handle, error) { + small := getByteSlice() + defer putByteSlice(small) + + buf, err := b.Marshal(*small, KernelMarshalOptions()) + if err != nil { + return nil, fmt.Errorf("marshal BTF: %w", err) + } + + return NewHandleFromRawBTF(buf) +} + +// NewHandleFromRawBTF loads raw BTF into the kernel. +// +// Returns an error wrapping ErrNotSupported if the kernel doesn't support BTF. +func NewHandleFromRawBTF(btf []byte) (*Handle, error) { + if uint64(len(btf)) > math.MaxUint32 { + return nil, errors.New("BTF exceeds the maximum size") + } + + attr := &sys.BtfLoadAttr{ + Btf: sys.NewSlicePointer(btf), + BtfSize: uint32(len(btf)), + } + + fd, err := sys.BtfLoad(attr) + if err == nil { + return &Handle{fd, attr.BtfSize, false}, nil + } + + if err := haveBTF(); err != nil { + return nil, err + } + + logBuf := make([]byte, 64*1024) + attr.BtfLogBuf = sys.NewSlicePointer(logBuf) + attr.BtfLogSize = uint32(len(logBuf)) + attr.BtfLogLevel = 1 + + // Up until at least kernel 6.0, the BTF verifier does not return ENOSPC + // if there are other verification errors. ENOSPC is only returned when + // the BTF blob is correct, a log was requested, and the provided buffer + // is too small. + _, ve := sys.BtfLoad(attr) + return nil, internal.ErrorWithLog("load btf", err, logBuf, errors.Is(ve, unix.ENOSPC)) +} + +// NewHandleFromID returns the BTF handle for a given id. +// +// Prefer calling [ebpf.Program.Handle] or [ebpf.Map.Handle] if possible. +// +// Returns ErrNotExist, if there is no BTF with the given id. +// +// Requires CAP_SYS_ADMIN. +func NewHandleFromID(id ID) (*Handle, error) { + fd, err := sys.BtfGetFdById(&sys.BtfGetFdByIdAttr{ + Id: uint32(id), + }) + if err != nil { + return nil, fmt.Errorf("get FD for ID %d: %w", id, err) + } + + info, err := newHandleInfoFromFD(fd) + if err != nil { + _ = fd.Close() + return nil, err + } + + return &Handle{fd, info.size, info.IsModule()}, nil +} + +// Spec parses the kernel BTF into Go types. +// +// base must contain type information for vmlinux if the handle is for +// a kernel module. It may be nil otherwise. +func (h *Handle) Spec(base *Spec) (*Spec, error) { + var btfInfo sys.BtfInfo + btfBuffer := make([]byte, h.size) + btfInfo.Btf, btfInfo.BtfSize = sys.NewSlicePointerLen(btfBuffer) + + if err := sys.ObjInfo(h.fd, &btfInfo); err != nil { + return nil, err + } + + if h.needsKernelBase && base == nil { + return nil, fmt.Errorf("missing base types") + } + + return loadRawSpec(bytes.NewReader(btfBuffer), internal.NativeEndian, base) +} + +// Close destroys the handle. +// +// Subsequent calls to FD will return an invalid value. +func (h *Handle) Close() error { + if h == nil { + return nil + } + + return h.fd.Close() +} + +// FD returns the file descriptor for the handle. +func (h *Handle) FD() int { + return h.fd.Int() +} + +// Info returns metadata about the handle. +func (h *Handle) Info() (*HandleInfo, error) { + return newHandleInfoFromFD(h.fd) +} + // HandleInfo describes a Handle. type HandleInfo struct { // ID of this handle in the kernel. The ID is only valid as long as the @@ -59,7 +187,7 @@ func newHandleInfoFromFD(fd *sys.FD) (*HandleInfo, error) { }, nil } -// IsModule returns true if the BTF is for the kernel itself. +// IsVmlinux returns true if the BTF is for the kernel itself. func (i *HandleInfo) IsVmlinux() bool { return i.IsKernel && i.Name == "vmlinux" } @@ -71,51 +199,89 @@ func (i *HandleInfo) IsModule() bool { // HandleIterator allows enumerating BTF blobs loaded into the kernel. type HandleIterator struct { - // The ID of the last retrieved handle. Only valid after a call to Next. - ID ID - err error + // The ID of the current handle. Only valid after a call to Next. + ID ID + // The current Handle. Only valid until a call to Next. + // See Take if you want to retain the handle. + Handle *Handle + err error } -// Next retrieves a handle for the next BTF blob. +// Next retrieves a handle for the next BTF object. // -// [Handle.Close] is called if *handle is non-nil to avoid leaking fds. -// -// Returns true if another BTF blob was found. Call [HandleIterator.Err] after +// Returns true if another BTF object was found. Call [HandleIterator.Err] after // the function returns false. -func (it *HandleIterator) Next(handle **Handle) bool { - if *handle != nil { - (*handle).Close() - *handle = nil - } - +func (it *HandleIterator) Next() bool { id := it.ID for { attr := &sys.BtfGetNextIdAttr{Id: id} err := sys.BtfGetNextId(attr) if errors.Is(err, os.ErrNotExist) { // There are no more BTF objects. - return false + break } else if err != nil { it.err = fmt.Errorf("get next BTF ID: %w", err) - return false + break } id = attr.NextId - *handle, err = NewHandleFromID(id) + handle, err := NewHandleFromID(id) if errors.Is(err, os.ErrNotExist) { // Try again with the next ID. continue } else if err != nil { it.err = fmt.Errorf("retrieve handle for ID %d: %w", id, err) - return false + break } - it.ID = id + it.Handle.Close() + it.ID, it.Handle = id, handle return true } + + // No more handles or we encountered an error. + it.Handle.Close() + it.Handle = nil + return false +} + +// Take the ownership of the current handle. +// +// It's the callers responsibility to close the handle. +func (it *HandleIterator) Take() *Handle { + handle := it.Handle + it.Handle = nil + return handle } // Err returns an error if iteration failed for some reason. func (it *HandleIterator) Err() error { return it.err } + +// FindHandle returns the first handle for which predicate returns true. +// +// Requires CAP_SYS_ADMIN. +// +// Returns an error wrapping ErrNotFound if predicate never returns true or if +// there is no BTF loaded into the kernel. +func FindHandle(predicate func(info *HandleInfo) bool) (*Handle, error) { + it := new(HandleIterator) + defer it.Handle.Close() + + for it.Next() { + info, err := it.Handle.Info() + if err != nil { + return nil, fmt.Errorf("info for ID %d: %w", it.ID, err) + } + + if predicate(info) { + return it.Take(), nil + } + } + if err := it.Err(); err != nil { + return nil, fmt.Errorf("iterate handles: %w", err) + } + + return nil, fmt.Errorf("find handle: %w", ErrNotFound) +} diff --git a/vendor/github.com/cilium/ebpf/btf/kernel.go b/vendor/github.com/cilium/ebpf/btf/kernel.go new file mode 100644 index 0000000000..8584ebcb93 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/kernel.go @@ -0,0 +1,159 @@ +package btf + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/kallsyms" +) + +var kernelBTF = struct { + sync.RWMutex + kernel *Spec + modules map[string]*Spec +}{ + modules: make(map[string]*Spec), +} + +// FlushKernelSpec removes any cached kernel type information. +func FlushKernelSpec() { + kallsyms.FlushKernelModuleCache() + + kernelBTF.Lock() + defer kernelBTF.Unlock() + + kernelBTF.kernel = nil + kernelBTF.modules = make(map[string]*Spec) +} + +// LoadKernelSpec returns the current kernel's BTF information. +// +// Defaults to /sys/kernel/btf/vmlinux and falls back to scanning the file system +// for vmlinux ELFs. Returns an error wrapping ErrNotSupported if BTF is not enabled. +func LoadKernelSpec() (*Spec, error) { + kernelBTF.RLock() + spec := kernelBTF.kernel + kernelBTF.RUnlock() + + if spec == nil { + kernelBTF.Lock() + defer kernelBTF.Unlock() + + spec = kernelBTF.kernel + } + + if spec != nil { + return spec.Copy(), nil + } + + spec, _, err := loadKernelSpec() + if err != nil { + return nil, err + } + + kernelBTF.kernel = spec + return spec.Copy(), nil +} + +// LoadKernelModuleSpec returns the BTF information for the named kernel module. +// +// Defaults to /sys/kernel/btf/. +// Returns an error wrapping ErrNotSupported if BTF is not enabled. +// Returns an error wrapping fs.ErrNotExist if BTF for the specific module doesn't exist. +func LoadKernelModuleSpec(module string) (*Spec, error) { + kernelBTF.RLock() + spec := kernelBTF.modules[module] + kernelBTF.RUnlock() + + if spec != nil { + return spec.Copy(), nil + } + + base, err := LoadKernelSpec() + if err != nil { + return nil, fmt.Errorf("load kernel spec: %w", err) + } + + kernelBTF.Lock() + defer kernelBTF.Unlock() + + if spec = kernelBTF.modules[module]; spec != nil { + return spec.Copy(), nil + } + + spec, err = loadKernelModuleSpec(module, base) + if err != nil { + return nil, err + } + + kernelBTF.modules[module] = spec + return spec.Copy(), nil +} + +func loadKernelSpec() (_ *Spec, fallback bool, _ error) { + fh, err := os.Open("/sys/kernel/btf/vmlinux") + if err == nil { + defer fh.Close() + + spec, err := loadRawSpec(fh, internal.NativeEndian, nil) + return spec, false, err + } + + file, err := findVMLinux() + if err != nil { + return nil, false, err + } + defer file.Close() + + spec, err := LoadSpecFromReader(file) + return spec, true, err +} + +func loadKernelModuleSpec(module string, base *Spec) (*Spec, error) { + dir, file := filepath.Split(module) + if dir != "" || filepath.Ext(file) != "" { + return nil, fmt.Errorf("invalid module name %q", module) + } + + fh, err := os.Open(filepath.Join("/sys/kernel/btf", module)) + if err != nil { + return nil, err + } + defer fh.Close() + + return loadRawSpec(fh, internal.NativeEndian, base) +} + +// findVMLinux scans multiple well-known paths for vmlinux kernel images. +func findVMLinux() (*os.File, error) { + release, err := internal.KernelRelease() + if err != nil { + return nil, err + } + + // use same list of locations as libbpf + // https://github.com/libbpf/libbpf/blob/9a3a42608dbe3731256a5682a125ac1e23bced8f/src/btf.c#L3114-L3122 + locations := []string{ + "/boot/vmlinux-%s", + "/lib/modules/%s/vmlinux-%[1]s", + "/lib/modules/%s/build/vmlinux", + "/usr/lib/modules/%s/kernel/vmlinux", + "/usr/lib/debug/boot/vmlinux-%s", + "/usr/lib/debug/boot/vmlinux-%s.debug", + "/usr/lib/debug/lib/modules/%s/vmlinux", + } + + for _, loc := range locations { + file, err := os.Open(fmt.Sprintf(loc, release)) + if errors.Is(err, os.ErrNotExist) { + continue + } + return file, err + } + + return nil, fmt.Errorf("no BTF found for kernel version %s: %w", release, internal.ErrNotSupported) +} diff --git a/vendor/github.com/cilium/ebpf/btf/marshal.go b/vendor/github.com/cilium/ebpf/btf/marshal.go new file mode 100644 index 0000000000..f14cfa6e97 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/marshal.go @@ -0,0 +1,611 @@ +package btf + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "maps" + "math" + "slices" + "sync" + + "github.com/cilium/ebpf/internal" +) + +type MarshalOptions struct { + // Target byte order. Defaults to the system's native endianness. + Order binary.ByteOrder + // Remove function linkage information for compatibility with <5.6 kernels. + StripFuncLinkage bool + // Replace Enum64 with a placeholder for compatibility with <6.0 kernels. + ReplaceEnum64 bool + // Prevent the "No type found" error when loading BTF without any types. + PreventNoTypeFound bool +} + +// KernelMarshalOptions will generate BTF suitable for the current kernel. +func KernelMarshalOptions() *MarshalOptions { + return &MarshalOptions{ + Order: internal.NativeEndian, + StripFuncLinkage: haveFuncLinkage() != nil, + ReplaceEnum64: haveEnum64() != nil, + PreventNoTypeFound: true, // All current kernels require this. + } +} + +// encoder turns Types into raw BTF. +type encoder struct { + MarshalOptions + + pending internal.Deque[Type] + buf *bytes.Buffer + strings *stringTableBuilder + ids map[Type]TypeID + visited map[Type]struct{} + lastID TypeID +} + +var bufferPool = sync.Pool{ + New: func() any { + buf := make([]byte, btfHeaderLen+128) + return &buf + }, +} + +func getByteSlice() *[]byte { + return bufferPool.Get().(*[]byte) +} + +func putByteSlice(buf *[]byte) { + *buf = (*buf)[:0] + bufferPool.Put(buf) +} + +// Builder turns Types into raw BTF. +// +// The default value may be used and represents an empty BTF blob. Void is +// added implicitly if necessary. +type Builder struct { + // Explicitly added types. + types []Type + // IDs for all added types which the user knows about. + stableIDs map[Type]TypeID + // Explicitly added strings. + strings *stringTableBuilder +} + +// NewBuilder creates a Builder from a list of types. +// +// It is more efficient than calling [Add] individually. +// +// Returns an error if adding any of the types fails. +func NewBuilder(types []Type) (*Builder, error) { + b := &Builder{ + make([]Type, 0, len(types)), + make(map[Type]TypeID, len(types)), + nil, + } + + for _, typ := range types { + _, err := b.Add(typ) + if err != nil { + return nil, fmt.Errorf("add %s: %w", typ, err) + } + } + + return b, nil +} + +// Empty returns true if neither types nor strings have been added. +func (b *Builder) Empty() bool { + return len(b.types) == 0 && (b.strings == nil || b.strings.Length() == 0) +} + +// Add a Type and allocate a stable ID for it. +// +// Adding the identical Type multiple times is valid and will return the same ID. +// +// See [Type] for details on identity. +func (b *Builder) Add(typ Type) (TypeID, error) { + if b.stableIDs == nil { + b.stableIDs = make(map[Type]TypeID) + } + + if _, ok := typ.(*Void); ok { + // Equality is weird for void, since it is a zero sized type. + return 0, nil + } + + if ds, ok := typ.(*Datasec); ok { + if err := datasecResolveWorkaround(b, ds); err != nil { + return 0, err + } + } + + id, ok := b.stableIDs[typ] + if ok { + return id, nil + } + + b.types = append(b.types, typ) + + id = TypeID(len(b.types)) + if int(id) != len(b.types) { + return 0, fmt.Errorf("no more type IDs") + } + + b.stableIDs[typ] = id + return id, nil +} + +// Marshal encodes all types in the Marshaler into BTF wire format. +// +// opts may be nil. +func (b *Builder) Marshal(buf []byte, opts *MarshalOptions) ([]byte, error) { + stb := b.strings + if stb == nil { + // Assume that most types are named. This makes encoding large BTF like + // vmlinux a lot cheaper. + stb = newStringTableBuilder(len(b.types)) + } else { + // Avoid modifying the Builder's string table. + stb = b.strings.Copy() + } + + if opts == nil { + opts = &MarshalOptions{Order: internal.NativeEndian} + } + + // Reserve space for the BTF header. + buf = slices.Grow(buf, btfHeaderLen)[:btfHeaderLen] + + w := internal.NewBuffer(buf) + defer internal.PutBuffer(w) + + e := encoder{ + MarshalOptions: *opts, + buf: w, + strings: stb, + lastID: TypeID(len(b.types)), + visited: make(map[Type]struct{}, len(b.types)), + ids: maps.Clone(b.stableIDs), + } + + if e.ids == nil { + e.ids = make(map[Type]TypeID) + } + + types := b.types + if len(types) == 0 && stb.Length() > 0 && opts.PreventNoTypeFound { + // We have strings that need to be written out, + // but no types (besides the implicit Void). + // Kernels as recent as v6.7 refuse to load such BTF + // with a "No type found" error in the log. + // Fix this by adding a dummy type. + types = []Type{&Int{Size: 0}} + } + + // Ensure that types are marshaled in the exact order they were Add()ed. + // Otherwise the ID returned from Add() won't match. + e.pending.Grow(len(types)) + for _, typ := range types { + e.pending.Push(typ) + } + + if err := e.deflatePending(); err != nil { + return nil, err + } + + length := e.buf.Len() + typeLen := uint32(length - btfHeaderLen) + + stringLen := e.strings.Length() + buf = e.strings.AppendEncoded(e.buf.Bytes()) + + // Fill out the header, and write it out. + header := &btfHeader{ + Magic: btfMagic, + Version: 1, + Flags: 0, + HdrLen: uint32(btfHeaderLen), + TypeOff: 0, + TypeLen: typeLen, + StringOff: typeLen, + StringLen: uint32(stringLen), + } + + err := binary.Write(sliceWriter(buf[:btfHeaderLen]), e.Order, header) + if err != nil { + return nil, fmt.Errorf("write header: %v", err) + } + + return buf, nil +} + +// addString adds a string to the resulting BTF. +// +// Adding the same string multiple times will return the same result. +// +// Returns an identifier into the string table or an error if the string +// contains invalid characters. +func (b *Builder) addString(str string) (uint32, error) { + if b.strings == nil { + b.strings = newStringTableBuilder(0) + } + + return b.strings.Add(str) +} + +func (e *encoder) allocateIDs(root Type) (err error) { + visitInPostorder(root, e.visited, func(typ Type) bool { + if _, ok := typ.(*Void); ok { + return true + } + + if _, ok := e.ids[typ]; ok { + return true + } + + id := e.lastID + 1 + if id < e.lastID { + err = errors.New("type ID overflow") + return false + } + + e.pending.Push(typ) + e.ids[typ] = id + e.lastID = id + return true + }) + return +} + +// id returns the ID for the given type or panics with an error. +func (e *encoder) id(typ Type) TypeID { + if _, ok := typ.(*Void); ok { + return 0 + } + + id, ok := e.ids[typ] + if !ok { + panic(fmt.Errorf("no ID for type %v", typ)) + } + + return id +} + +func (e *encoder) deflatePending() error { + // Declare root outside of the loop to avoid repeated heap allocations. + var root Type + + for !e.pending.Empty() { + root = e.pending.Shift() + + // Allocate IDs for all children of typ, including transitive dependencies. + if err := e.allocateIDs(root); err != nil { + return err + } + + if err := e.deflateType(root); err != nil { + id := e.ids[root] + return fmt.Errorf("deflate %v with ID %d: %w", root, id, err) + } + } + + return nil +} + +func (e *encoder) deflateType(typ Type) (err error) { + defer func() { + if r := recover(); r != nil { + var ok bool + err, ok = r.(error) + if !ok { + panic(r) + } + } + }() + + var raw rawType + raw.NameOff, err = e.strings.Add(typ.TypeName()) + if err != nil { + return err + } + + switch v := typ.(type) { + case *Void: + return errors.New("Void is implicit in BTF wire format") + + case *Int: + raw.SetKind(kindInt) + raw.SetSize(v.Size) + + var bi btfInt + bi.SetEncoding(v.Encoding) + // We need to set bits in addition to size, since btf_type_int_is_regular + // otherwise flags this as a bitfield. + bi.SetBits(byte(v.Size) * 8) + raw.data = bi + + case *Pointer: + raw.SetKind(kindPointer) + raw.SetType(e.id(v.Target)) + + case *Array: + raw.SetKind(kindArray) + raw.data = &btfArray{ + e.id(v.Type), + e.id(v.Index), + v.Nelems, + } + + case *Struct: + raw.SetKind(kindStruct) + raw.SetSize(v.Size) + raw.data, err = e.convertMembers(&raw.btfType, v.Members) + + case *Union: + err = e.deflateUnion(&raw, v) + + case *Enum: + if v.Size == 8 { + err = e.deflateEnum64(&raw, v) + } else { + err = e.deflateEnum(&raw, v) + } + + case *Fwd: + raw.SetKind(kindForward) + raw.SetFwdKind(v.Kind) + + case *Typedef: + raw.SetKind(kindTypedef) + raw.SetType(e.id(v.Type)) + + case *Volatile: + raw.SetKind(kindVolatile) + raw.SetType(e.id(v.Type)) + + case *Const: + raw.SetKind(kindConst) + raw.SetType(e.id(v.Type)) + + case *Restrict: + raw.SetKind(kindRestrict) + raw.SetType(e.id(v.Type)) + + case *Func: + raw.SetKind(kindFunc) + raw.SetType(e.id(v.Type)) + if !e.StripFuncLinkage { + raw.SetLinkage(v.Linkage) + } + + case *FuncProto: + raw.SetKind(kindFuncProto) + raw.SetType(e.id(v.Return)) + raw.SetVlen(len(v.Params)) + raw.data, err = e.deflateFuncParams(v.Params) + + case *Var: + raw.SetKind(kindVar) + raw.SetType(e.id(v.Type)) + raw.data = btfVariable{uint32(v.Linkage)} + + case *Datasec: + raw.SetKind(kindDatasec) + raw.SetSize(v.Size) + raw.SetVlen(len(v.Vars)) + raw.data = e.deflateVarSecinfos(v.Vars) + + case *Float: + raw.SetKind(kindFloat) + raw.SetSize(v.Size) + + case *declTag: + raw.SetKind(kindDeclTag) + raw.SetType(e.id(v.Type)) + raw.data = &btfDeclTag{uint32(v.Index)} + raw.NameOff, err = e.strings.Add(v.Value) + + case *typeTag: + raw.SetKind(kindTypeTag) + raw.SetType(e.id(v.Type)) + raw.NameOff, err = e.strings.Add(v.Value) + + default: + return fmt.Errorf("don't know how to deflate %T", v) + } + + if err != nil { + return err + } + + return raw.Marshal(e.buf, e.Order) +} + +func (e *encoder) deflateUnion(raw *rawType, union *Union) (err error) { + raw.SetKind(kindUnion) + raw.SetSize(union.Size) + raw.data, err = e.convertMembers(&raw.btfType, union.Members) + return +} + +func (e *encoder) convertMembers(header *btfType, members []Member) ([]btfMember, error) { + bms := make([]btfMember, 0, len(members)) + isBitfield := false + for _, member := range members { + isBitfield = isBitfield || member.BitfieldSize > 0 + + offset := member.Offset + if isBitfield { + offset = member.BitfieldSize<<24 | (member.Offset & 0xffffff) + } + + nameOff, err := e.strings.Add(member.Name) + if err != nil { + return nil, err + } + + bms = append(bms, btfMember{ + nameOff, + e.id(member.Type), + uint32(offset), + }) + } + + header.SetVlen(len(members)) + header.SetBitfield(isBitfield) + return bms, nil +} + +func (e *encoder) deflateEnum(raw *rawType, enum *Enum) (err error) { + raw.SetKind(kindEnum) + raw.SetSize(enum.Size) + raw.SetVlen(len(enum.Values)) + // Signedness appeared together with ENUM64 support. + raw.SetSigned(enum.Signed && !e.ReplaceEnum64) + raw.data, err = e.deflateEnumValues(enum) + return +} + +func (e *encoder) deflateEnumValues(enum *Enum) ([]btfEnum, error) { + bes := make([]btfEnum, 0, len(enum.Values)) + for _, value := range enum.Values { + nameOff, err := e.strings.Add(value.Name) + if err != nil { + return nil, err + } + + if enum.Signed { + if signedValue := int64(value.Value); signedValue < math.MinInt32 || signedValue > math.MaxInt32 { + return nil, fmt.Errorf("value %d of enum %q exceeds 32 bits", signedValue, value.Name) + } + } else { + if value.Value > math.MaxUint32 { + return nil, fmt.Errorf("value %d of enum %q exceeds 32 bits", value.Value, value.Name) + } + } + + bes = append(bes, btfEnum{ + nameOff, + uint32(value.Value), + }) + } + + return bes, nil +} + +func (e *encoder) deflateEnum64(raw *rawType, enum *Enum) (err error) { + if e.ReplaceEnum64 { + // Replace the ENUM64 with a union of fields with the correct size. + // This matches libbpf behaviour on purpose. + placeholder := &Int{ + "enum64_placeholder", + enum.Size, + Unsigned, + } + if enum.Signed { + placeholder.Encoding = Signed + } + if err := e.allocateIDs(placeholder); err != nil { + return fmt.Errorf("add enum64 placeholder: %w", err) + } + + members := make([]Member, 0, len(enum.Values)) + for _, v := range enum.Values { + members = append(members, Member{ + Name: v.Name, + Type: placeholder, + }) + } + + return e.deflateUnion(raw, &Union{enum.Name, enum.Size, members}) + } + + raw.SetKind(kindEnum64) + raw.SetSize(enum.Size) + raw.SetVlen(len(enum.Values)) + raw.SetSigned(enum.Signed) + raw.data, err = e.deflateEnum64Values(enum.Values) + return +} + +func (e *encoder) deflateEnum64Values(values []EnumValue) ([]btfEnum64, error) { + bes := make([]btfEnum64, 0, len(values)) + for _, value := range values { + nameOff, err := e.strings.Add(value.Name) + if err != nil { + return nil, err + } + + bes = append(bes, btfEnum64{ + nameOff, + uint32(value.Value), + uint32(value.Value >> 32), + }) + } + + return bes, nil +} + +func (e *encoder) deflateFuncParams(params []FuncParam) ([]btfParam, error) { + bps := make([]btfParam, 0, len(params)) + for _, param := range params { + nameOff, err := e.strings.Add(param.Name) + if err != nil { + return nil, err + } + + bps = append(bps, btfParam{ + nameOff, + e.id(param.Type), + }) + } + return bps, nil +} + +func (e *encoder) deflateVarSecinfos(vars []VarSecinfo) []btfVarSecinfo { + vsis := make([]btfVarSecinfo, 0, len(vars)) + for _, v := range vars { + vsis = append(vsis, btfVarSecinfo{ + e.id(v.Type), + v.Offset, + v.Size, + }) + } + return vsis +} + +// MarshalMapKV creates a BTF object containing a map key and value. +// +// The function is intended for the use of the ebpf package and may be removed +// at any point in time. +func MarshalMapKV(key, value Type) (_ *Handle, keyID, valueID TypeID, err error) { + var b Builder + + if key != nil { + keyID, err = b.Add(key) + if err != nil { + return nil, 0, 0, fmt.Errorf("add key type: %w", err) + } + } + + if value != nil { + valueID, err = b.Add(value) + if err != nil { + return nil, 0, 0, fmt.Errorf("add value type: %w", err) + } + } + + handle, err := NewHandle(&b) + if err != nil { + // Check for 'full' map BTF support, since kernels between 4.18 and 5.2 + // already support BTF blobs for maps without Var or Datasec just fine. + if err := haveMapBTF(); err != nil { + return nil, 0, 0, err + } + } + return handle, keyID, valueID, err +} diff --git a/vendor/github.com/cilium/ebpf/btf/strings.go b/vendor/github.com/cilium/ebpf/btf/strings.go index 67626e0dd1..7c31461c30 100644 --- a/vendor/github.com/cilium/ebpf/btf/strings.go +++ b/vendor/github.com/cilium/ebpf/btf/strings.go @@ -6,11 +6,15 @@ import ( "errors" "fmt" "io" + "maps" + "slices" + "strings" ) type stringTable struct { base *stringTable offsets []uint32 + prevIdx int strings []string } @@ -57,7 +61,7 @@ func readStringTable(r sizedReader, base *stringTable) (*stringTable, error) { return nil, errors.New("first item in string table is non-empty") } - return &stringTable{base, offsets, strings}, nil + return &stringTable{base, offsets, 0, strings}, nil } func splitNull(data []byte, atEOF bool) (advance int, token []byte, err error) { @@ -80,49 +84,115 @@ func (st *stringTable) Lookup(offset uint32) (string, error) { } func (st *stringTable) lookup(offset uint32) (string, error) { - i := search(st.offsets, offset) - if i == len(st.offsets) || st.offsets[i] != offset { + // Fast path: zero offset is the empty string, looked up frequently. + if offset == 0 && st.base == nil { + return "", nil + } + + // Accesses tend to be globally increasing, so check if the next string is + // the one we want. This skips the binary search in about 50% of cases. + if st.prevIdx+1 < len(st.offsets) && st.offsets[st.prevIdx+1] == offset { + st.prevIdx++ + return st.strings[st.prevIdx], nil + } + + i, found := slices.BinarySearch(st.offsets, offset) + if !found { return "", fmt.Errorf("offset %d isn't start of a string", offset) } + // Set the new increment index, but only if its greater than the current. + if i > st.prevIdx+1 { + st.prevIdx = i + } + return st.strings[i], nil } -func (st *stringTable) Length() int { - last := len(st.offsets) - 1 - return int(st.offsets[last]) + len(st.strings[last]) + 1 +// Num returns the number of strings in the table. +func (st *stringTable) Num() int { + return len(st.strings) } -func (st *stringTable) Marshal(w io.Writer) error { - for _, str := range st.strings { - _, err := io.WriteString(w, str) - if err != nil { - return err - } - _, err = w.Write([]byte{0}) - if err != nil { - return err - } +// stringTableBuilder builds BTF string tables. +type stringTableBuilder struct { + length uint32 + strings map[string]uint32 +} + +// newStringTableBuilder creates a builder with the given capacity. +// +// capacity may be zero. +func newStringTableBuilder(capacity int) *stringTableBuilder { + var stb stringTableBuilder + + if capacity == 0 { + // Use the runtime's small default size. + stb.strings = make(map[string]uint32) + } else { + stb.strings = make(map[string]uint32, capacity) } - return nil + + // Ensure that the empty string is at index 0. + stb.append("") + return &stb } -// search is a copy of sort.Search specialised for uint32. +// Add a string to the table. // -// Licensed under https://go.dev/LICENSE -func search(ints []uint32, needle uint32) int { - // Define f(-1) == false and f(n) == true. - // Invariant: f(i-1) == false, f(j) == true. - i, j := 0, len(ints) - for i < j { - h := int(uint(i+j) >> 1) // avoid overflow when computing h - // i ≤ h < j - if !(ints[h] >= needle) { - i = h + 1 // preserves f(i-1) == false - } else { - j = h // preserves f(j) == true - } +// Adding the same string multiple times will only store it once. +func (stb *stringTableBuilder) Add(str string) (uint32, error) { + if strings.IndexByte(str, 0) != -1 { + return 0, fmt.Errorf("string contains null: %q", str) + } + + offset, ok := stb.strings[str] + if ok { + return offset, nil + } + + return stb.append(str), nil +} + +func (stb *stringTableBuilder) append(str string) uint32 { + offset := stb.length + stb.length += uint32(len(str)) + 1 + stb.strings[str] = offset + return offset +} + +// Lookup finds the offset of a string in the table. +// +// Returns an error if str hasn't been added yet. +func (stb *stringTableBuilder) Lookup(str string) (uint32, error) { + offset, ok := stb.strings[str] + if !ok { + return 0, fmt.Errorf("string %q is not in table", str) + } + + return offset, nil +} + +// Length returns the length in bytes. +func (stb *stringTableBuilder) Length() int { + return int(stb.length) +} + +// AppendEncoded appends the string table to the end of the provided buffer. +func (stb *stringTableBuilder) AppendEncoded(buf []byte) []byte { + n := len(buf) + buf = append(buf, make([]byte, stb.Length())...) + strings := buf[n:] + for str, offset := range stb.strings { + copy(strings[offset:], str) + } + return buf +} + +// Copy the string table builder. +func (stb *stringTableBuilder) Copy() *stringTableBuilder { + return &stringTableBuilder{ + stb.length, + maps.Clone(stb.strings), } - // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i. - return i } diff --git a/vendor/github.com/cilium/ebpf/btf/traversal.go b/vendor/github.com/cilium/ebpf/btf/traversal.go new file mode 100644 index 0000000000..c39dc66e46 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/traversal.go @@ -0,0 +1,123 @@ +package btf + +import ( + "fmt" +) + +// Functions to traverse a cyclic graph of types. The below was very useful: +// https://eli.thegreenplace.net/2015/directed-graph-traversal-orderings-and-applications-to-data-flow-analysis/#post-order-and-reverse-post-order + +// Visit all types reachable from root in postorder. +// +// Traversal stops if yield returns false. +// +// Returns false if traversal was aborted. +func visitInPostorder(root Type, visited map[Type]struct{}, yield func(typ Type) bool) bool { + if _, ok := visited[root]; ok { + return true + } + if visited == nil { + visited = make(map[Type]struct{}) + } + visited[root] = struct{}{} + + cont := children(root, func(child *Type) bool { + return visitInPostorder(*child, visited, yield) + }) + if !cont { + return false + } + + return yield(root) +} + +// children calls yield on each child of typ. +// +// Traversal stops if yield returns false. +// +// Returns false if traversal was aborted. +func children(typ Type, yield func(child *Type) bool) bool { + // Explicitly type switch on the most common types to allow the inliner to + // do its work. This avoids allocating intermediate slices from walk() on + // the heap. + switch v := typ.(type) { + case *Void, *Int, *Enum, *Fwd, *Float: + // No children to traverse. + case *Pointer: + if !yield(&v.Target) { + return false + } + case *Array: + if !yield(&v.Index) { + return false + } + if !yield(&v.Type) { + return false + } + case *Struct: + for i := range v.Members { + if !yield(&v.Members[i].Type) { + return false + } + } + case *Union: + for i := range v.Members { + if !yield(&v.Members[i].Type) { + return false + } + } + case *Typedef: + if !yield(&v.Type) { + return false + } + case *Volatile: + if !yield(&v.Type) { + return false + } + case *Const: + if !yield(&v.Type) { + return false + } + case *Restrict: + if !yield(&v.Type) { + return false + } + case *Func: + if !yield(&v.Type) { + return false + } + case *FuncProto: + if !yield(&v.Return) { + return false + } + for i := range v.Params { + if !yield(&v.Params[i].Type) { + return false + } + } + case *Var: + if !yield(&v.Type) { + return false + } + case *Datasec: + for i := range v.Vars { + if !yield(&v.Vars[i].Type) { + return false + } + } + case *declTag: + if !yield(&v.Type) { + return false + } + case *typeTag: + if !yield(&v.Type) { + return false + } + case *cycle: + // cycle has children, but we ignore them deliberately. + default: + panic(fmt.Sprintf("don't know how to walk Type %T", v)) + } + + return true +} diff --git a/vendor/github.com/cilium/ebpf/btf/types.go b/vendor/github.com/cilium/ebpf/btf/types.go index 402a363c28..3cb9184f00 100644 --- a/vendor/github.com/cilium/ebpf/btf/types.go +++ b/vendor/github.com/cilium/ebpf/btf/types.go @@ -1,21 +1,38 @@ package btf import ( + "encoding/binary" + "errors" "fmt" "io" "math" - "reflect" + "slices" "strings" "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" ) -const maxTypeDepth = 32 +// Mirrors MAX_RESOLVE_DEPTH in libbpf. +// https://github.com/libbpf/libbpf/blob/e26b84dc330c9644c07428c271ab491b0f01f4e1/src/btf.c#L761 +const maxResolveDepth = 32 // TypeID identifies a type in a BTF section. -type TypeID uint32 +type TypeID = sys.TypeID // Type represents a type described by BTF. +// +// Identity of Type follows the [Go specification]: two Types are considered +// equal if they have the same concrete type and the same dynamic value, aka +// they point at the same location in memory. This means that the following +// Types are considered distinct even though they have the same "shape". +// +// a := &Int{Size: 1} +// b := &Int{Size: 1} +// a != b +// +// [Go specification]: https://go.dev/ref/spec#Comparison_operators type Type interface { // Type can be formatted using the %s and %v verbs. %s outputs only the // identity of the type, without any detail. %v outputs additional detail. @@ -35,9 +52,7 @@ type Type interface { // Make a copy of the type, without copying Type members. copy() Type - // Enumerate all nested Types. Repeated calls must visit nested - // types in the same order. - walk(*typeDeque) + // New implementations must update walkType. } var ( @@ -51,20 +66,11 @@ var ( _ Type = (*Var)(nil) _ Type = (*Datasec)(nil) _ Type = (*Float)(nil) + _ Type = (*declTag)(nil) + _ Type = (*typeTag)(nil) + _ Type = (*cycle)(nil) ) -// types is a list of Type. -// -// The order determines the ID of a type. -type types []Type - -func (ts types) ByID(id TypeID) (Type, error) { - if int(id) > len(ts) { - return nil, fmt.Errorf("type ID %d: %w", id, ErrNotFound) - } - return ts[id], nil -} - // Void is the unit type of BTF. type Void struct{} @@ -72,40 +78,32 @@ func (v *Void) Format(fs fmt.State, verb rune) { formatType(fs, verb, v) } func (v *Void) TypeName() string { return "" } func (v *Void) size() uint32 { return 0 } func (v *Void) copy() Type { return (*Void)(nil) } -func (v *Void) walk(*typeDeque) {} type IntEncoding byte +// Valid IntEncodings. +// +// These may look like they are flags, but they aren't. const ( - Signed IntEncoding = 1 << iota - Char - Bool + Unsigned IntEncoding = 0 + Signed IntEncoding = 1 + Char IntEncoding = 2 + Bool IntEncoding = 4 ) -func (ie IntEncoding) IsSigned() bool { - return ie&Signed != 0 -} - -func (ie IntEncoding) IsChar() bool { - return ie&Char != 0 -} - -func (ie IntEncoding) IsBool() bool { - return ie&Bool != 0 -} - func (ie IntEncoding) String() string { - switch { - case ie.IsChar() && ie.IsSigned(): + switch ie { + case Char: + // NB: There is no way to determine signedness for char. return "char" - case ie.IsChar() && !ie.IsSigned(): - return "uchar" - case ie.IsBool(): + case Bool: return "bool" - case ie.IsSigned(): + case Signed: return "signed" - default: + case Unsigned: return "unsigned" + default: + return fmt.Sprintf("IntEncoding(%d)", byte(ie)) } } @@ -121,12 +119,11 @@ type Int struct { } func (i *Int) Format(fs fmt.State, verb rune) { - formatType(fs, verb, i, i.Encoding, "size=", i.Size*8) + formatType(fs, verb, i, i.Encoding, "size=", i.Size) } func (i *Int) TypeName() string { return i.Name } func (i *Int) size() uint32 { return i.Size } -func (i *Int) walk(*typeDeque) {} func (i *Int) copy() Type { cpy := *i return &cpy @@ -141,9 +138,8 @@ func (p *Pointer) Format(fs fmt.State, verb rune) { formatType(fs, verb, p, "target=", p.Target) } -func (p *Pointer) TypeName() string { return "" } -func (p *Pointer) size() uint32 { return 8 } -func (p *Pointer) walk(tdq *typeDeque) { tdq.push(&p.Target) } +func (p *Pointer) TypeName() string { return "" } +func (p *Pointer) size() uint32 { return 8 } func (p *Pointer) copy() Type { cpy := *p return &cpy @@ -162,11 +158,6 @@ func (arr *Array) Format(fs fmt.State, verb rune) { func (arr *Array) TypeName() string { return "" } -func (arr *Array) walk(tdq *typeDeque) { - tdq.push(&arr.Index) - tdq.push(&arr.Type) -} - func (arr *Array) copy() Type { cpy := *arr return &cpy @@ -188,12 +179,6 @@ func (s *Struct) TypeName() string { return s.Name } func (s *Struct) size() uint32 { return s.Size } -func (s *Struct) walk(tdq *typeDeque) { - for i := range s.Members { - tdq.push(&s.Members[i].Type) - } -} - func (s *Struct) copy() Type { cpy := *s cpy.Members = copyMembers(s.Members) @@ -220,12 +205,6 @@ func (u *Union) TypeName() string { return u.Name } func (u *Union) size() uint32 { return u.Size } -func (u *Union) walk(tdq *typeDeque) { - for i := range u.Members { - tdq.push(&u.Members[i].Type) - } -} - func (u *Union) copy() Type { cpy := *u cpy.Members = copyMembers(u.Members) @@ -243,6 +222,7 @@ func copyMembers(orig []Member) []Member { } type composite interface { + Type members() []Member } @@ -273,7 +253,9 @@ type Member struct { type Enum struct { Name string // Size of the enum value in bytes. - Size uint32 + Size uint32 + // True if the values should be interpreted as signed integers. + Signed bool Values []EnumValue } @@ -288,11 +270,10 @@ func (e *Enum) TypeName() string { return e.Name } // Is is not a valid Type type EnumValue struct { Name string - Value int32 + Value uint64 } -func (e *Enum) size() uint32 { return e.Size } -func (e *Enum) walk(*typeDeque) {} +func (e *Enum) size() uint32 { return e.Size } func (e *Enum) copy() Type { cpy := *e cpy.Values = make([]EnumValue, len(e.Values)) @@ -332,12 +313,23 @@ func (f *Fwd) Format(fs fmt.State, verb rune) { func (f *Fwd) TypeName() string { return f.Name } -func (f *Fwd) walk(*typeDeque) {} func (f *Fwd) copy() Type { cpy := *f return &cpy } +func (f *Fwd) matches(typ Type) bool { + if _, ok := As[*Struct](typ); ok && f.Kind == FwdStruct { + return true + } + + if _, ok := As[*Union](typ); ok && f.Kind == FwdUnion { + return true + } + + return false +} + // Typedef is an alias of a Type. type Typedef struct { Name string @@ -350,7 +342,6 @@ func (td *Typedef) Format(fs fmt.State, verb rune) { func (td *Typedef) TypeName() string { return td.Name } -func (td *Typedef) walk(tdq *typeDeque) { tdq.push(&td.Type) } func (td *Typedef) copy() Type { cpy := *td return &cpy @@ -367,8 +358,7 @@ func (v *Volatile) Format(fs fmt.State, verb rune) { func (v *Volatile) TypeName() string { return "" } -func (v *Volatile) qualify() Type { return v.Type } -func (v *Volatile) walk(tdq *typeDeque) { tdq.push(&v.Type) } +func (v *Volatile) qualify() Type { return v.Type } func (v *Volatile) copy() Type { cpy := *v return &cpy @@ -385,8 +375,7 @@ func (c *Const) Format(fs fmt.State, verb rune) { func (c *Const) TypeName() string { return "" } -func (c *Const) qualify() Type { return c.Type } -func (c *Const) walk(tdq *typeDeque) { tdq.push(&c.Type) } +func (c *Const) qualify() Type { return c.Type } func (c *Const) copy() Type { cpy := *c return &cpy @@ -403,8 +392,7 @@ func (r *Restrict) Format(fs fmt.State, verb rune) { func (r *Restrict) TypeName() string { return "" } -func (r *Restrict) qualify() Type { return r.Type } -func (r *Restrict) walk(tdq *typeDeque) { tdq.push(&r.Type) } +func (r *Restrict) qualify() Type { return r.Type } func (r *Restrict) copy() Type { cpy := *r return &cpy @@ -422,13 +410,18 @@ func FuncMetadata(ins *asm.Instruction) *Func { return fn } +// WithFuncMetadata adds a btf.Func to the Metadata of asm.Instruction. +func WithFuncMetadata(ins asm.Instruction, fn *Func) asm.Instruction { + ins.Metadata.Set(funcInfoMeta{}, fn) + return ins +} + func (f *Func) Format(fs fmt.State, verb rune) { formatType(fs, verb, f, f.Linkage, "proto=", f.Type) } func (f *Func) TypeName() string { return f.Name } -func (f *Func) walk(tdq *typeDeque) { tdq.push(&f.Type) } func (f *Func) copy() Type { cpy := *f return &cpy @@ -446,13 +439,6 @@ func (fp *FuncProto) Format(fs fmt.State, verb rune) { func (fp *FuncProto) TypeName() string { return "" } -func (fp *FuncProto) walk(tdq *typeDeque) { - tdq.push(&fp.Return) - for i := range fp.Params { - tdq.push(&fp.Params[i].Type) - } -} - func (fp *FuncProto) copy() Type { cpy := *fp cpy.Params = make([]FuncParam, len(fp.Params)) @@ -478,7 +464,6 @@ func (v *Var) Format(fs fmt.State, verb rune) { func (v *Var) TypeName() string { return v.Name } -func (v *Var) walk(tdq *typeDeque) { tdq.push(&v.Type) } func (v *Var) copy() Type { cpy := *v return &cpy @@ -499,12 +484,6 @@ func (ds *Datasec) TypeName() string { return ds.Name } func (ds *Datasec) size() uint32 { return ds.Size } -func (ds *Datasec) walk(tdq *typeDeque) { - for i := range ds.Vars { - tdq.push(&ds.Vars[i].Type) - } -} - func (ds *Datasec) copy() Type { cpy := *ds cpy.Vars = make([]VarSecinfo, len(ds.Vars)) @@ -516,6 +495,7 @@ func (ds *Datasec) copy() Type { // // It is not a valid Type. type VarSecinfo struct { + // Var or Func. Type Type Offset uint32 Size uint32 @@ -535,12 +515,48 @@ func (f *Float) Format(fs fmt.State, verb rune) { func (f *Float) TypeName() string { return f.Name } func (f *Float) size() uint32 { return f.Size } -func (f *Float) walk(*typeDeque) {} func (f *Float) copy() Type { cpy := *f return &cpy } +// declTag associates metadata with a declaration. +type declTag struct { + Type Type + Value string + // The index this tag refers to in the target type. For composite types, + // a value of -1 indicates that the tag refers to the whole type. Otherwise + // it indicates which member or argument the tag applies to. + Index int +} + +func (dt *declTag) Format(fs fmt.State, verb rune) { + formatType(fs, verb, dt, "type=", dt.Type, "value=", dt.Value, "index=", dt.Index) +} + +func (dt *declTag) TypeName() string { return "" } +func (dt *declTag) copy() Type { + cpy := *dt + return &cpy +} + +// typeTag associates metadata with a type. +type typeTag struct { + Type Type + Value string +} + +func (tt *typeTag) Format(fs fmt.State, verb rune) { + formatType(fs, verb, tt, "type=", tt.Type, "value=", tt.Value) +} + +func (tt *typeTag) TypeName() string { return "" } +func (tt *typeTag) qualify() Type { return tt.Type } +func (tt *typeTag) copy() Type { + cpy := *tt + return &cpy +} + // cycle is a type which had to be elided since it exceeded maxTypeDepth. type cycle struct { root Type @@ -549,7 +565,6 @@ type cycle struct { func (c *cycle) ID() TypeID { return math.MaxUint32 } func (c *cycle) Format(fs fmt.State, verb rune) { formatType(fs, verb, c, "root=", c.root) } func (c *cycle) TypeName() string { return "" } -func (c *cycle) walk(*typeDeque) {} func (c *cycle) copy() Type { cpy := *c return &cpy @@ -576,8 +591,11 @@ var ( _ qualifier = (*Const)(nil) _ qualifier = (*Restrict)(nil) _ qualifier = (*Volatile)(nil) + _ qualifier = (*typeTag)(nil) ) +var errUnsizedType = errors.New("type is unsized") + // Sizeof returns the size of a type in bytes. // // Returns an error if the size can't be computed. @@ -587,7 +605,7 @@ func Sizeof(typ Type) (int, error) { elem int64 ) - for i := 0; i < maxTypeDepth; i++ { + for i := 0; i < maxResolveDepth; i++ { switch v := typ.(type) { case *Array: if n > 0 && int64(v.Nelems) > math.MaxInt64/n { @@ -612,7 +630,7 @@ func Sizeof(typ Type) (int, error) { continue default: - return 0, fmt.Errorf("unsized type %T", typ) + return 0, fmt.Errorf("type %T: %w", typ, errUnsizedType) } if n > 0 && elem > math.MaxInt64/n { @@ -632,169 +650,91 @@ func Sizeof(typ Type) (int, error) { // alignof returns the alignment of a type. // -// Currently only supports the subset of types necessary for bitfield relocations. +// Returns an error if the Type can't be aligned, like an integer with an uneven +// size. Currently only supports the subset of types necessary for bitfield +// relocations. func alignof(typ Type) (int, error) { + var n int + switch t := UnderlyingType(typ).(type) { case *Enum: - return int(t.size()), nil + n = int(t.size()) case *Int: - return int(t.Size), nil + n = int(t.Size) + case *Array: + return alignof(t.Type) default: return 0, fmt.Errorf("can't calculate alignment of %T", t) } -} -// Transformer modifies a given Type and returns the result. -// -// For example, UnderlyingType removes any qualifiers or typedefs from a type. -// See the example on Copy for how to use a transform. -type Transformer func(Type) Type + if !internal.IsPow(n) { + return 0, fmt.Errorf("alignment value %d is not a power of two", n) + } -// Copy a Type recursively. -// -// typ may form a cycle. If transform is not nil, it is called with the -// to be copied type, and the returned value is copied instead. -func Copy(typ Type, transform Transformer) Type { - copies := make(copier) - copies.copy(&typ, transform) - return typ + return n, nil } -// copy a slice of Types recursively. +// Copy a Type recursively. // -// See Copy for the semantics. -func copyTypes(types []Type, transform Transformer) []Type { - result := make([]Type, len(types)) - copy(result, types) - - copies := make(copier) - for i := range result { - copies.copy(&result[i], transform) - } - - return result +// typ may form a cycle. +func Copy(typ Type) Type { + return copyType(typ, nil, make(map[Type]Type), nil) } -type copier map[Type]Type - -func (c copier) copy(typ *Type, transform Transformer) { - var work typeDeque - for t := typ; t != nil; t = work.pop() { - // *t is the identity of the type. - if cpy := c[*t]; cpy != nil { - *t = cpy - continue - } - - var cpy Type - if transform != nil { - cpy = transform(*t).copy() - } else { - cpy = (*t).copy() - } - - c[*t] = cpy - *t = cpy - - // Mark any nested types for copying. - cpy.walk(&work) +func copyType(typ Type, ids map[Type]TypeID, copies map[Type]Type, copiedIDs map[Type]TypeID) Type { + cpy, ok := copies[typ] + if ok { + // This has been copied previously, no need to continue. + return cpy } -} -// typeDeque keeps track of pointers to types which still -// need to be visited. -type typeDeque struct { - types []*Type - read, write uint64 - mask uint64 -} - -func (dq *typeDeque) empty() bool { - return dq.read == dq.write -} - -// push adds a type to the stack. -func (dq *typeDeque) push(t *Type) { - if dq.write-dq.read < uint64(len(dq.types)) { - dq.types[dq.write&dq.mask] = t - dq.write++ - return - } + cpy = typ.copy() + copies[typ] = cpy - new := len(dq.types) * 2 - if new == 0 { - new = 8 + if id, ok := ids[typ]; ok { + copiedIDs[cpy] = id } - types := make([]*Type, new) - pivot := dq.read & dq.mask - n := copy(types, dq.types[pivot:]) - n += copy(types[n:], dq.types[:pivot]) - types[n] = t + children(cpy, func(child *Type) bool { + *child = copyType(*child, ids, copies, copiedIDs) + return true + }) - dq.types = types - dq.mask = uint64(new) - 1 - dq.read, dq.write = 0, uint64(n+1) -} - -// shift returns the first element or null. -func (dq *typeDeque) shift() *Type { - if dq.empty() { - return nil - } - - index := dq.read & dq.mask - t := dq.types[index] - dq.types[index] = nil - dq.read++ - return t + return cpy } -// pop returns the last element or null. -func (dq *typeDeque) pop() *Type { - if dq.empty() { - return nil - } +type typeDeque = internal.Deque[*Type] - dq.write-- - index := dq.write & dq.mask - t := dq.types[index] - dq.types[index] = nil - return t -} - -// all returns all elements. +// readAndInflateTypes reads the raw btf type info and turns it into a graph +// of Types connected via pointers. // -// The deque is empty after calling this method. -func (dq *typeDeque) all() []*Type { - length := dq.write - dq.read - types := make([]*Type, 0, length) - for t := dq.shift(); t != nil; t = dq.shift() { - types = append(types, t) - } - return types -} - -// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns -// it into a graph of Types connected via pointers. +// If base is provided, then the types are considered to be of a split BTF +// (e.g., a kernel module). // -// If baseTypes are provided, then the raw types are -// considered to be of a split BTF (e.g., a kernel module). -// -// Returns a slice of types indexed by TypeID. Since BTF ignores compilation +// Returns a slice of types indexed by TypeID. Since BTF ignores compilation // units, multiple types may share the same name. A Type may form a cyclic graph // by pointing at itself. -func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTable) ([]Type, error) { - types := make([]Type, 0, len(rawTypes)+1) // +1 for Void added to base types - - typeIDOffset := TypeID(1) // Void is TypeID(0), so the rest starts from TypeID(1) +func readAndInflateTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32, rawStrings *stringTable, base *Spec) ([]Type, error) { + // because of the interleaving between types and struct members it is difficult to + // precompute the numbers of raw types this will parse + // this "guess" is a good first estimation + sizeOfbtfType := uintptr(btfTypeLen) + tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2 + types := make([]Type, 0, tyMaxCount) + + // Void is defined to always be type ID 0, and is thus omitted from BTF. + types = append(types, (*Void)(nil)) + + firstTypeID := TypeID(0) + if base != nil { + var err error + firstTypeID, err = base.nextTypeID() + if err != nil { + return nil, err + } - if baseTypes == nil { - // Void is defined to always be type ID 0, and is thus omitted from BTF. - types = append(types, (*Void)(nil)) - } else { - // For split BTF, the next ID is max base BTF type ID + 1 - typeIDOffset = TypeID(len(baseTypes)) + // Split BTF doesn't contain Void. + types = types[:0] } type fixupDef struct { @@ -804,39 +744,21 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl var fixups []fixupDef fixup := func(id TypeID, typ *Type) { - if id < TypeID(len(baseTypes)) { - *typ = baseTypes[id] - return + if id < firstTypeID { + if baseType, err := base.TypeByID(id); err == nil { + *typ = baseType + return + } } - idx := id - if baseTypes != nil { - idx = id - TypeID(len(baseTypes)) - } - if idx < TypeID(len(types)) { + idx := int(id - firstTypeID) + if idx < len(types) { // We've already inflated this type, fix it up immediately. *typ = types[idx] return } - fixups = append(fixups, fixupDef{id, typ}) - } - type assertion struct { - typ *Type - want reflect.Type - } - - var assertions []assertion - assert := func(typ *Type, want reflect.Type) error { - if *typ != nil { - // The type has already been fixed up, check the type immediately. - if reflect.TypeOf(*typ) != want { - return fmt.Errorf("expected %s, got %T", want, *typ) - } - return nil - } - assertions = append(assertions, assertion{typ, want}) - return nil + fixups = append(fixups, fixupDef{id, typ}) } type bitfieldFixupDef struct { @@ -903,106 +825,182 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl return members, nil } - for i, raw := range rawTypes { + var ( + buf = make([]byte, 1024) + header btfType + bInt btfInt + bArr btfArray + bMembers []btfMember + bEnums []btfEnum + bParams []btfParam + bVariable btfVariable + bSecInfos []btfVarSecinfo + bDeclTag btfDeclTag + bEnums64 []btfEnum64 + ) + + var declTags []*declTag + for { var ( - id = typeIDOffset + TypeID(i) + id = firstTypeID + TypeID(len(types)) typ Type ) - name, err := rawStrings.Lookup(raw.NameOff) + if _, err := io.ReadFull(r, buf[:btfTypeLen]); err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("can't read type info for id %v: %v", id, err) + } + + if _, err := unmarshalBtfType(&header, buf[:btfTypeLen], bo); err != nil { + return nil, fmt.Errorf("can't unmarshal type info for id %v: %v", id, err) + } + + if id < firstTypeID { + return nil, fmt.Errorf("no more type IDs") + } + + name, err := rawStrings.Lookup(header.NameOff) if err != nil { return nil, fmt.Errorf("get name for type id %d: %w", id, err) } - switch raw.Kind() { + switch header.Kind() { case kindInt: - size := raw.Size() - bi := raw.data.(*btfInt) - if bi.Offset() > 0 || bi.Bits().Bytes() != size { - legacyBitfields[id] = [2]Bits{bi.Offset(), bi.Bits()} + size := header.Size() + buf = buf[:btfIntLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfInt, id: %d: %w", id, err) } - typ = &Int{name, raw.Size(), bi.Encoding()} + if _, err := unmarshalBtfInt(&bInt, buf, bo); err != nil { + return nil, fmt.Errorf("can't unmarshal btfInt, id: %d: %w", id, err) + } + if bInt.Offset() > 0 || bInt.Bits().Bytes() != size { + legacyBitfields[id] = [2]Bits{bInt.Offset(), bInt.Bits()} + } + typ = &Int{name, header.Size(), bInt.Encoding()} case kindPointer: ptr := &Pointer{nil} - fixup(raw.Type(), &ptr.Target) + fixup(header.Type(), &ptr.Target) typ = ptr case kindArray: - btfArr := raw.data.(*btfArray) - arr := &Array{nil, nil, btfArr.Nelems} - fixup(btfArr.IndexType, &arr.Index) - fixup(btfArr.Type, &arr.Type) + buf = buf[:btfArrayLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfArray, id: %d: %w", id, err) + } + if _, err := unmarshalBtfArray(&bArr, buf, bo); err != nil { + return nil, fmt.Errorf("can't unmarshal btfArray, id: %d: %w", id, err) + } + + arr := &Array{nil, nil, bArr.Nelems} + fixup(bArr.IndexType, &arr.Index) + fixup(bArr.Type, &arr.Type) typ = arr case kindStruct: - members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag()) + vlen := header.Vlen() + bMembers = slices.Grow(bMembers[:0], vlen)[:vlen] + buf = slices.Grow(buf[:0], vlen*btfMemberLen)[:vlen*btfMemberLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfMembers, id: %d: %w", id, err) + } + if _, err := unmarshalBtfMembers(bMembers, buf, bo); err != nil { + return nil, fmt.Errorf("can't unmarshal btfMembers, id: %d: %w", id, err) + } + + members, err := convertMembers(bMembers, header.Bitfield()) if err != nil { return nil, fmt.Errorf("struct %s (id %d): %w", name, id, err) } - typ = &Struct{name, raw.Size(), members} + typ = &Struct{name, header.Size(), members} case kindUnion: - members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag()) + vlen := header.Vlen() + bMembers = slices.Grow(bMembers[:0], vlen)[:vlen] + buf = slices.Grow(buf[:0], vlen*btfMemberLen)[:vlen*btfMemberLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfMembers, id: %d: %w", id, err) + } + if _, err := unmarshalBtfMembers(bMembers, buf, bo); err != nil { + return nil, fmt.Errorf("can't unmarshal btfMembers, id: %d: %w", id, err) + } + + members, err := convertMembers(bMembers, header.Bitfield()) if err != nil { return nil, fmt.Errorf("union %s (id %d): %w", name, id, err) } - typ = &Union{name, raw.Size(), members} + typ = &Union{name, header.Size(), members} case kindEnum: - rawvals := raw.data.([]btfEnum) - vals := make([]EnumValue, 0, len(rawvals)) - for i, btfVal := range rawvals { + vlen := header.Vlen() + bEnums = slices.Grow(bEnums[:0], vlen)[:vlen] + buf = slices.Grow(buf[:0], vlen*btfEnumLen)[:vlen*btfEnumLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfEnums, id: %d: %w", id, err) + } + if _, err := unmarshalBtfEnums(bEnums, buf, bo); err != nil { + return nil, fmt.Errorf("can't unmarshal btfEnums, id: %d: %w", id, err) + } + + vals := make([]EnumValue, 0, vlen) + signed := header.Signed() + for i, btfVal := range bEnums { name, err := rawStrings.Lookup(btfVal.NameOff) if err != nil { return nil, fmt.Errorf("get name for enum value %d: %s", i, err) } - vals = append(vals, EnumValue{ - Name: name, - Value: btfVal.Val, - }) + value := uint64(btfVal.Val) + if signed { + // Sign extend values to 64 bit. + value = uint64(int32(btfVal.Val)) + } + vals = append(vals, EnumValue{name, value}) } - typ = &Enum{name, raw.Size(), vals} + typ = &Enum{name, header.Size(), signed, vals} case kindForward: - if raw.KindFlag() { - typ = &Fwd{name, FwdUnion} - } else { - typ = &Fwd{name, FwdStruct} - } + typ = &Fwd{name, header.FwdKind()} case kindTypedef: typedef := &Typedef{name, nil} - fixup(raw.Type(), &typedef.Type) + fixup(header.Type(), &typedef.Type) typ = typedef case kindVolatile: volatile := &Volatile{nil} - fixup(raw.Type(), &volatile.Type) + fixup(header.Type(), &volatile.Type) typ = volatile case kindConst: cnst := &Const{nil} - fixup(raw.Type(), &cnst.Type) + fixup(header.Type(), &cnst.Type) typ = cnst case kindRestrict: restrict := &Restrict{nil} - fixup(raw.Type(), &restrict.Type) + fixup(header.Type(), &restrict.Type) typ = restrict case kindFunc: - fn := &Func{name, nil, raw.Linkage()} - fixup(raw.Type(), &fn.Type) - if err := assert(&fn.Type, reflect.TypeOf((*FuncProto)(nil))); err != nil { - return nil, err - } + fn := &Func{name, nil, header.Linkage()} + fixup(header.Type(), &fn.Type) typ = fn case kindFuncProto: - rawparams := raw.data.([]btfParam) - params := make([]FuncParam, 0, len(rawparams)) - for i, param := range rawparams { + vlen := header.Vlen() + bParams = slices.Grow(bParams[:0], vlen)[:vlen] + buf = slices.Grow(buf[:0], vlen*btfParamLen)[:vlen*btfParamLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfParams, id: %d: %w", id, err) + } + if _, err := unmarshalBtfParams(bParams, buf, bo); err != nil { + return nil, fmt.Errorf("can't unmarshal btfParams, id: %d: %w", id, err) + } + + params := make([]FuncParam, 0, vlen) + for i, param := range bParams { name, err := rawStrings.Lookup(param.NameOff) if err != nil { return nil, fmt.Errorf("get name for func proto parameter %d: %s", i, err) @@ -1012,60 +1010,121 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl }) } for i := range params { - fixup(rawparams[i].Type, ¶ms[i].Type) + fixup(bParams[i].Type, ¶ms[i].Type) } fp := &FuncProto{nil, params} - fixup(raw.Type(), &fp.Return) + fixup(header.Type(), &fp.Return) typ = fp case kindVar: - variable := raw.data.(*btfVariable) - v := &Var{name, nil, VarLinkage(variable.Linkage)} - fixup(raw.Type(), &v.Type) + buf = buf[:btfVariableLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfVariable, id: %d: %w", id, err) + } + if _, err := unmarshalBtfVariable(&bVariable, buf, bo); err != nil { + return nil, fmt.Errorf("can't read btfVariable, id: %d: %w", id, err) + } + + v := &Var{name, nil, VarLinkage(bVariable.Linkage)} + fixup(header.Type(), &v.Type) typ = v case kindDatasec: - btfVars := raw.data.([]btfVarSecinfo) - vars := make([]VarSecinfo, 0, len(btfVars)) - for _, btfVar := range btfVars { + vlen := header.Vlen() + bSecInfos = slices.Grow(bSecInfos[:0], vlen)[:vlen] + buf = slices.Grow(buf[:0], vlen*btfVarSecinfoLen)[:vlen*btfVarSecinfoLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfVarSecInfos, id: %d: %w", id, err) + } + if _, err := unmarshalBtfVarSecInfos(bSecInfos, buf, bo); err != nil { + return nil, fmt.Errorf("can't unmarshal btfVarSecInfos, id: %d: %w", id, err) + } + + vars := make([]VarSecinfo, 0, vlen) + for _, btfVar := range bSecInfos { vars = append(vars, VarSecinfo{ Offset: btfVar.Offset, Size: btfVar.Size, }) } for i := range vars { - fixup(btfVars[i].Type, &vars[i].Type) - if err := assert(&vars[i].Type, reflect.TypeOf((*Var)(nil))); err != nil { - return nil, err - } + fixup(bSecInfos[i].Type, &vars[i].Type) } - typ = &Datasec{name, raw.SizeType, vars} + typ = &Datasec{name, header.Size(), vars} case kindFloat: - typ = &Float{name, raw.Size()} + typ = &Float{name, header.Size()} + + case kindDeclTag: + buf = buf[:btfDeclTagLen] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfDeclTag, id: %d: %w", id, err) + } + if _, err := unmarshalBtfDeclTag(&bDeclTag, buf, bo); err != nil { + return nil, fmt.Errorf("can't read btfDeclTag, id: %d: %w", id, err) + } + + btfIndex := bDeclTag.ComponentIdx + if uint64(btfIndex) > math.MaxInt { + return nil, fmt.Errorf("type id %d: index exceeds int", id) + } + + dt := &declTag{nil, name, int(int32(btfIndex))} + fixup(header.Type(), &dt.Type) + typ = dt + + declTags = append(declTags, dt) + + case kindTypeTag: + tt := &typeTag{nil, name} + fixup(header.Type(), &tt.Type) + typ = tt + + case kindEnum64: + vlen := header.Vlen() + bEnums64 = slices.Grow(bEnums64[:0], vlen)[:vlen] + buf = slices.Grow(buf[:0], vlen*btfEnum64Len)[:vlen*btfEnum64Len] + if _, err := io.ReadFull(r, buf); err != nil { + return nil, fmt.Errorf("can't read btfEnum64s, id: %d: %w", id, err) + } + if _, err := unmarshalBtfEnums64(bEnums64, buf, bo); err != nil { + return nil, fmt.Errorf("can't unmarshal btfEnum64s, id: %d: %w", id, err) + } + + vals := make([]EnumValue, 0, vlen) + for i, btfVal := range bEnums64 { + name, err := rawStrings.Lookup(btfVal.NameOff) + if err != nil { + return nil, fmt.Errorf("get name for enum64 value %d: %s", i, err) + } + value := (uint64(btfVal.ValHi32) << 32) | uint64(btfVal.ValLo32) + vals = append(vals, EnumValue{name, value}) + } + typ = &Enum{name, header.Size(), header.Signed(), vals} default: - return nil, fmt.Errorf("type id %d: unknown kind: %v", id, raw.Kind()) + return nil, fmt.Errorf("type id %d: unknown kind: %v", id, header.Kind()) } types = append(types, typ) } for _, fixup := range fixups { - i := int(fixup.id) - if i >= len(types)+len(baseTypes) { - return nil, fmt.Errorf("reference to invalid type id: %d", fixup.id) + if fixup.id < firstTypeID { + return nil, fmt.Errorf("fixup for base type id %d is not expected", fixup.id) } - if i < len(baseTypes) { - return nil, fmt.Errorf("fixup for base type id %d is not expected", i) + + idx := int(fixup.id - firstTypeID) + if idx >= len(types) { + return nil, fmt.Errorf("reference to invalid type id: %d", fixup.id) } - *fixup.typ = types[i-len(baseTypes)] + *fixup.typ = types[idx] } for _, bitfieldFixup := range bitfieldFixups { - if bitfieldFixup.id < TypeID(len(baseTypes)) { + if bitfieldFixup.id < firstTypeID { return nil, fmt.Errorf("bitfield fixup from split to base types is not expected") } @@ -1077,9 +1136,30 @@ func inflateRawTypes(rawTypes []rawType, baseTypes types, rawStrings *stringTabl } } - for _, assertion := range assertions { - if reflect.TypeOf(*assertion.typ) != assertion.want { - return nil, fmt.Errorf("expected %s, got %T", assertion.want, *assertion.typ) + for _, dt := range declTags { + switch t := dt.Type.(type) { + case *Var, *Typedef: + if dt.Index != -1 { + return nil, fmt.Errorf("type %s: index %d is not -1", dt, dt.Index) + } + + case composite: + if dt.Index >= len(t.members()) { + return nil, fmt.Errorf("type %s: index %d exceeds members of %s", dt, dt.Index, t) + } + + case *Func: + fp, ok := t.Type.(*FuncProto) + if !ok { + return nil, fmt.Errorf("type %s: %s is not a FuncProto", dt, t.Type) + } + + if dt.Index >= len(fp.Params) { + return nil, fmt.Errorf("type %s: index %d exceeds params of %s", dt, dt.Index, t) + } + + default: + return nil, fmt.Errorf("type %s: decl tag for type %s is not supported", dt, t) } } @@ -1110,7 +1190,7 @@ func newEssentialName(name string) essentialName { // UnderlyingType skips qualifiers and Typedefs. func UnderlyingType(typ Type) Type { result := typ - for depth := 0; depth <= maxTypeDepth; depth++ { + for depth := 0; depth <= maxResolveDepth; depth++ { switch v := (result).(type) { case qualifier: result = v.qualify() @@ -1123,6 +1203,32 @@ func UnderlyingType(typ Type) Type { return &cycle{typ} } +// As returns typ if is of type T. Otherwise it peels qualifiers and Typedefs +// until it finds a T. +// +// Returns the zero value and false if there is no T or if the type is nested +// too deeply. +func As[T Type](typ Type) (T, bool) { + // NB: We can't make this function return (*T) since then + // we can't assert that a type matches an interface which + // embeds Type: as[composite](T). + for depth := 0; depth <= maxResolveDepth; depth++ { + switch v := (typ).(type) { + case T: + return v, true + case qualifier: + typ = v.qualify() + case *Typedef: + typ = v.Type + default: + goto notFound + } + } +notFound: + var zero T + return zero, false +} + type formatState struct { fmt.State depth int @@ -1145,10 +1251,7 @@ func formatType(f fmt.State, verb rune, t formattableType, extra ...interface{}) return } - // This is the same as %T, but elides the package name. Assumes that - // formattableType is implemented by a pointer receiver. - goTypeName := reflect.TypeOf(t).Elem().Name() - _, _ = io.WriteString(f, goTypeName) + _, _ = io.WriteString(f, internal.GoTypeName(t)) if name := t.TypeName(); name != "" { // Output BTF type name if present. diff --git a/vendor/github.com/cilium/ebpf/btf/workarounds.go b/vendor/github.com/cilium/ebpf/btf/workarounds.go new file mode 100644 index 0000000000..12a89b87ee --- /dev/null +++ b/vendor/github.com/cilium/ebpf/btf/workarounds.go @@ -0,0 +1,26 @@ +package btf + +// datasecResolveWorkaround ensures that certain vars in a Datasec are added +// to a Spec before the Datasec. This avoids a bug in kernel BTF validation. +// +// See https://lore.kernel.org/bpf/20230302123440.1193507-1-lmb@isovalent.com/ +func datasecResolveWorkaround(b *Builder, ds *Datasec) error { + for _, vsi := range ds.Vars { + v, ok := vsi.Type.(*Var) + if !ok { + continue + } + + switch v.Type.(type) { + case *Typedef, *Volatile, *Const, *Restrict, *typeTag: + // NB: We must never call Add on a Datasec, otherwise we risk + // infinite recursion. + _, err := b.Add(v.Type) + if err != nil { + return err + } + } + } + + return nil +} diff --git a/vendor/github.com/cilium/ebpf/collection.go b/vendor/github.com/cilium/ebpf/collection.go index 8c2ddc3802..a5532220fd 100644 --- a/vendor/github.com/cilium/ebpf/collection.go +++ b/vendor/github.com/cilium/ebpf/collection.go @@ -9,6 +9,9 @@ import ( "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/kconfig" + "github.com/cilium/ebpf/internal/sysenc" ) // CollectionOptions control loading a collection into the kernel. @@ -107,12 +110,22 @@ func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error { return nil } +// MissingConstantsError is returned by [CollectionSpec.RewriteConstants]. +type MissingConstantsError struct { + // The constants missing from .rodata. + Constants []string +} + +func (m *MissingConstantsError) Error() string { + return fmt.Sprintf("some constants are missing from .rodata: %s", strings.Join(m.Constants, ", ")) +} + // RewriteConstants replaces the value of multiple constants. // // The constant must be defined like so in the C program: // -// volatile const type foobar; -// volatile const type foobar = default; +// volatile const type foobar; +// volatile const type foobar = default; // // Replacement values must be of the same length as the C sizeof(type). // If necessary, they are marshalled according to the same rules as @@ -120,7 +133,7 @@ func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error { // // From Linux 5.5 the verifier will use constants to eliminate dead code. // -// Returns an error if a constant doesn't exist. +// Returns an error wrapping [MissingConstantsError] if a constant doesn't exist. func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error { replaced := make(map[string]bool) @@ -151,6 +164,10 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error continue } + if _, ok := v.Type.(*btf.Var); !ok { + return fmt.Errorf("section %s: unexpected type %T for variable %s", name, v.Type, vname) + } + if replaced[vname] { return fmt.Errorf("section %s: duplicate variable %s", name, vname) } @@ -159,12 +176,12 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error return fmt.Errorf("section %s: offset %d(+%d) for variable %s is out of bounds", name, v.Offset, v.Size, vname) } - b, err := marshalBytes(replacement, int(v.Size)) + b, err := sysenc.Marshal(replacement, int(v.Size)) if err != nil { return fmt.Errorf("marshaling constant replacement %s: %w", vname, err) } - copy(cpy[v.Offset:v.Offset+v.Size], b) + b.CopyTo(cpy[v.Offset : v.Offset+v.Size]) replaced[vname] = true } @@ -180,7 +197,7 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error } if len(missing) != 0 { - return fmt.Errorf("spec is missing one or more constants: %s", strings.Join(missing, ",")) + return fmt.Errorf("rewrite constants: %w", &MissingConstantsError{Constants: missing}) } return nil @@ -198,11 +215,11 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error // The tag's value specifies the name of the program or map as // found in the CollectionSpec. // -// struct { -// Foo *ebpf.ProgramSpec `ebpf:"xdp_foo"` -// Bar *ebpf.MapSpec `ebpf:"bar_map"` -// Ignored int -// } +// struct { +// Foo *ebpf.ProgramSpec `ebpf:"xdp_foo"` +// Bar *ebpf.MapSpec `ebpf:"bar_map"` +// Ignored int +// } // // Returns an error if any of the eBPF objects can't be found, or // if the same MapSpec or ProgramSpec is assigned multiple times. @@ -249,11 +266,11 @@ func (cs *CollectionSpec) Assign(to interface{}) error { // dependent resources are loaded into the kernel and populated with values if // specified. // -// struct { -// Foo *ebpf.Program `ebpf:"xdp_foo"` -// Bar *ebpf.Map `ebpf:"bar_map"` -// Ignored int -// } +// struct { +// Foo *ebpf.Program `ebpf:"xdp_foo"` +// Bar *ebpf.Map `ebpf:"bar_map"` +// Ignored int +// } // // opts may be nil. // @@ -292,7 +309,7 @@ func (cs *CollectionSpec) LoadAndAssign(to interface{}, opts *CollectionOptions) } // Populate the requested maps. Has a chance of lazy-loading other dependent maps. - if err := loader.populateMaps(); err != nil { + if err := loader.populateDeferredMaps(); err != nil { return err } @@ -372,7 +389,7 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co // Maps can contain Program and Map stubs, so populate them after // all Maps and Programs have been successfully loaded. - if err := loader.populateMaps(); err != nil { + if err := loader.populateDeferredMaps(); err != nil { return nil, err } @@ -386,42 +403,11 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co }, nil } -type handleCache struct { - btfHandles map[*btf.Spec]*btf.Handle -} - -func newHandleCache() *handleCache { - return &handleCache{ - btfHandles: make(map[*btf.Spec]*btf.Handle), - } -} - -func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) { - if hc.btfHandles[spec] != nil { - return hc.btfHandles[spec], nil - } - - handle, err := btf.NewHandle(spec) - if err != nil { - return nil, err - } - - hc.btfHandles[spec] = handle - return handle, nil -} - -func (hc handleCache) close() { - for _, handle := range hc.btfHandles { - handle.Close() - } -} - type collectionLoader struct { coll *CollectionSpec opts *CollectionOptions maps map[string]*Map programs map[string]*Program - handles *handleCache } func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collectionLoader, error) { @@ -436,7 +422,7 @@ func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collec return nil, fmt.Errorf("replacement map %s not found in CollectionSpec", name) } - if err := spec.checkCompatibility(m); err != nil { + if err := spec.Compatible(m); err != nil { return nil, fmt.Errorf("using replacement map %s: %w", spec.Name, err) } } @@ -446,13 +432,11 @@ func newCollectionLoader(coll *CollectionSpec, opts *CollectionOptions) (*collec opts, make(map[string]*Map), make(map[string]*Program), - newHandleCache(), }, nil } // close all resources left over in the collectionLoader. func (cl *collectionLoader) close() { - cl.handles.close() for _, m := range cl.maps { m.Close() } @@ -471,10 +455,6 @@ func (cl *collectionLoader) loadMap(mapName string) (*Map, error) { return nil, fmt.Errorf("missing map %s", mapName) } - if mapSpec.BTF != nil && cl.coll.Types != mapSpec.BTF { - return nil, fmt.Errorf("map %s: BTF doesn't match collection", mapName) - } - if replaceMap, ok := cl.opts.MapReplacements[mapName]; ok { // Clone the map to avoid closing user's map later on. m, err := replaceMap.Clone() @@ -486,11 +466,20 @@ func (cl *collectionLoader) loadMap(mapName string) (*Map, error) { return m, nil } - m, err := newMapWithOptions(mapSpec, cl.opts.Maps, cl.handles) + m, err := newMapWithOptions(mapSpec, cl.opts.Maps) if err != nil { return nil, fmt.Errorf("map %s: %w", mapName, err) } + // Finalize 'scalar' maps that don't refer to any other eBPF resources + // potentially pending creation. This is needed for frozen maps like .rodata + // that need to be finalized before invoking the verifier. + if !mapSpec.Type.canStoreMapOrProgram() { + if err := m.finalize(mapSpec); err != nil { + return nil, fmt.Errorf("finalizing map %s: %w", mapName, err) + } + } + cl.maps[mapName] = m return m, nil } @@ -511,10 +500,6 @@ func (cl *collectionLoader) loadProgram(progName string) (*Program, error) { return nil, fmt.Errorf("cannot load program %s: program type is unspecified", progName) } - if progSpec.BTF != nil && cl.coll.Types != progSpec.BTF { - return nil, fmt.Errorf("program %s: BTF doesn't match collection", progName) - } - progSpec = progSpec.Copy() // Rewrite any reference to a valid map in the program's instructions, @@ -543,7 +528,7 @@ func (cl *collectionLoader) loadProgram(progName string) (*Program, error) { } } - prog, err := newProgramWithOptions(progSpec, cl.opts.Programs, cl.handles) + prog, err := newProgramWithOptions(progSpec, cl.opts.Programs) if err != nil { return nil, fmt.Errorf("program %s: %w", progName, err) } @@ -552,13 +537,21 @@ func (cl *collectionLoader) loadProgram(progName string) (*Program, error) { return prog, nil } -func (cl *collectionLoader) populateMaps() error { +// populateDeferredMaps iterates maps holding programs or other maps and loads +// any dependencies. Populates all maps in cl and freezes them if specified. +func (cl *collectionLoader) populateDeferredMaps() error { for mapName, m := range cl.maps { mapSpec, ok := cl.coll.Maps[mapName] if !ok { return fmt.Errorf("missing map spec %s", mapName) } + // Scalar maps without Map or Program references are finalized during + // creation. Don't finalize them again. + if !mapSpec.Type.canStoreMapOrProgram() { + continue + } + mapSpec = mapSpec.Copy() // MapSpecs that refer to inner maps or programs within the same @@ -567,24 +560,27 @@ func (cl *collectionLoader) populateMaps() error { // Resolve those references to actual Map or Program resources that // have been loaded into the kernel. for i, kv := range mapSpec.Contents { - if objName, ok := kv.Value.(string); ok { - switch mapSpec.Type { - case ProgramArray: - // loadProgram is idempotent and could return an existing Program. - prog, err := cl.loadProgram(objName) - if err != nil { - return fmt.Errorf("loading program %s, for map %s: %w", objName, mapName, err) - } - mapSpec.Contents[i] = MapKV{kv.Key, prog} - - case ArrayOfMaps, HashOfMaps: - // loadMap is idempotent and could return an existing Map. - innerMap, err := cl.loadMap(objName) - if err != nil { - return fmt.Errorf("loading inner map %s, for map %s: %w", objName, mapName, err) - } - mapSpec.Contents[i] = MapKV{kv.Key, innerMap} + objName, ok := kv.Value.(string) + if !ok { + continue + } + + switch t := mapSpec.Type; { + case t.canStoreProgram(): + // loadProgram is idempotent and could return an existing Program. + prog, err := cl.loadProgram(objName) + if err != nil { + return fmt.Errorf("loading program %s, for map %s: %w", objName, mapName, err) + } + mapSpec.Contents[i] = MapKV{kv.Key, prog} + + case t.canStoreMap(): + // loadMap is idempotent and could return an existing Map. + innerMap, err := cl.loadMap(objName) + if err != nil { + return fmt.Errorf("loading inner map %s, for map %s: %w", objName, mapName, err) } + mapSpec.Contents[i] = MapKV{kv.Key, innerMap} } } @@ -597,6 +593,98 @@ func (cl *collectionLoader) populateMaps() error { return nil } +// resolveKconfig resolves all variables declared in .kconfig and populates +// m.Contents. Does nothing if the given m.Contents is non-empty. +func resolveKconfig(m *MapSpec) error { + ds, ok := m.Value.(*btf.Datasec) + if !ok { + return errors.New("map value is not a Datasec") + } + + type configInfo struct { + offset uint32 + typ btf.Type + } + + configs := make(map[string]configInfo) + + data := make([]byte, ds.Size) + for _, vsi := range ds.Vars { + v := vsi.Type.(*btf.Var) + n := v.TypeName() + + switch n { + case "LINUX_KERNEL_VERSION": + if integer, ok := v.Type.(*btf.Int); !ok || integer.Size != 4 { + return fmt.Errorf("variable %s must be a 32 bits integer, got %s", n, v.Type) + } + + kv, err := internal.KernelVersion() + if err != nil { + return fmt.Errorf("getting kernel version: %w", err) + } + internal.NativeEndian.PutUint32(data[vsi.Offset:], kv.Kernel()) + + case "LINUX_HAS_SYSCALL_WRAPPER": + integer, ok := v.Type.(*btf.Int) + if !ok { + return fmt.Errorf("variable %s must be an integer, got %s", n, v.Type) + } + var value uint64 = 1 + if err := haveSyscallWrapper(); errors.Is(err, ErrNotSupported) { + value = 0 + } else if err != nil { + return fmt.Errorf("unable to derive a value for LINUX_HAS_SYSCALL_WRAPPER: %w", err) + } + + if err := kconfig.PutInteger(data[vsi.Offset:], integer, value); err != nil { + return fmt.Errorf("set LINUX_HAS_SYSCALL_WRAPPER: %w", err) + } + + default: // Catch CONFIG_*. + configs[n] = configInfo{ + offset: vsi.Offset, + typ: v.Type, + } + } + } + + // We only parse kconfig file if a CONFIG_* variable was found. + if len(configs) > 0 { + f, err := kconfig.Find() + if err != nil { + return fmt.Errorf("cannot find a kconfig file: %w", err) + } + defer f.Close() + + filter := make(map[string]struct{}, len(configs)) + for config := range configs { + filter[config] = struct{}{} + } + + kernelConfig, err := kconfig.Parse(f, filter) + if err != nil { + return fmt.Errorf("cannot parse kconfig file: %w", err) + } + + for n, info := range configs { + value, ok := kernelConfig[n] + if !ok { + return fmt.Errorf("config option %q does not exists for this kernel", n) + } + + err := kconfig.PutValue(data[info.offset:], info.typ, value) + if err != nil { + return fmt.Errorf("problem adding value for %s: %w", n, err) + } + } + } + + m.Contents = []MapKV{{uint32(0), data}} + + return nil +} + // LoadCollection reads an object file and creates and loads its declared // resources into the kernel. // @@ -610,6 +698,71 @@ func LoadCollection(file string) (*Collection, error) { return NewCollection(spec) } +// Assign the contents of a Collection to a struct. +// +// This function bridges functionality between bpf2go generated +// code and any functionality better implemented in Collection. +// +// 'to' must be a pointer to a struct. A field of the +// struct is updated with values from Programs or Maps if it +// has an `ebpf` tag and its type is *Program or *Map. +// The tag's value specifies the name of the program or map as +// found in the CollectionSpec. +// +// struct { +// Foo *ebpf.Program `ebpf:"xdp_foo"` +// Bar *ebpf.Map `ebpf:"bar_map"` +// Ignored int +// } +// +// Returns an error if any of the eBPF objects can't be found, or +// if the same Map or Program is assigned multiple times. +// +// Ownership and Close()ing responsibility is transferred to `to` +// for any successful assigns. On error `to` is left in an undefined state. +func (coll *Collection) Assign(to interface{}) error { + assignedMaps := make(map[string]bool) + assignedProgs := make(map[string]bool) + + // Assign() only transfers already-loaded Maps and Programs. No extra + // loading is done. + getValue := func(typ reflect.Type, name string) (interface{}, error) { + switch typ { + + case reflect.TypeOf((*Program)(nil)): + if p := coll.Programs[name]; p != nil { + assignedProgs[name] = true + return p, nil + } + return nil, fmt.Errorf("missing program %q", name) + + case reflect.TypeOf((*Map)(nil)): + if m := coll.Maps[name]; m != nil { + assignedMaps[name] = true + return m, nil + } + return nil, fmt.Errorf("missing map %q", name) + + default: + return nil, fmt.Errorf("unsupported type %s", typ) + } + } + + if err := assignValues(to, getValue); err != nil { + return err + } + + // Finalize ownership transfer + for p := range assignedProgs { + delete(coll.Programs, p) + } + for m := range assignedMaps { + delete(coll.Maps, m) + } + + return nil +} + // Close frees all maps and programs associated with the collection. // // The collection mustn't be used afterwards. diff --git a/vendor/github.com/cilium/ebpf/internal/cpu.go b/vendor/github.com/cilium/ebpf/cpu.go similarity index 69% rename from vendor/github.com/cilium/ebpf/internal/cpu.go rename to vendor/github.com/cilium/ebpf/cpu.go index 3affa1efb9..07e959efdc 100644 --- a/vendor/github.com/cilium/ebpf/internal/cpu.go +++ b/vendor/github.com/cilium/ebpf/cpu.go @@ -1,4 +1,4 @@ -package internal +package ebpf import ( "fmt" @@ -7,20 +7,24 @@ import ( "sync" ) -var sysCPU struct { - once sync.Once - err error - num int -} +var possibleCPU = sync.OnceValues(func() (int, error) { + return parseCPUsFromFile("/sys/devices/system/cpu/possible") +}) -// PossibleCPUs returns the max number of CPUs a system may possibly have +// PossibleCPU returns the max number of CPUs a system may possibly have // Logical CPU numbers must be of the form 0-n -func PossibleCPUs() (int, error) { - sysCPU.once.Do(func() { - sysCPU.num, sysCPU.err = parseCPUsFromFile("/sys/devices/system/cpu/possible") - }) +func PossibleCPU() (int, error) { + return possibleCPU() +} - return sysCPU.num, sysCPU.err +// MustPossibleCPU is a helper that wraps a call to PossibleCPU and panics if +// the error is non-nil. +func MustPossibleCPU() int { + cpus, err := PossibleCPU() + if err != nil { + panic(err) + } + return cpus } func parseCPUsFromFile(path string) (int, error) { diff --git a/vendor/github.com/cilium/ebpf/elf_reader.go b/vendor/github.com/cilium/ebpf/elf_reader.go index df278895c6..d55ab88928 100644 --- a/vendor/github.com/cilium/ebpf/elf_reader.go +++ b/vendor/github.com/cilium/ebpf/elf_reader.go @@ -15,9 +15,24 @@ import ( "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) +type kconfigMetaKey struct{} + +type kconfigMeta struct { + Map *MapSpec + Offset uint32 +} + +type kfuncMetaKey struct{} + +type kfuncMeta struct { + Binding elf.SymBind + Func *btf.Func +} + // elfCode is a convenience to reduce the amount of arguments that have to // be passed around explicitly. You should treat its contents as immutable. type elfCode struct { @@ -27,6 +42,9 @@ type elfCode struct { version uint32 btf *btf.Spec extInfo *btf.ExtInfos + maps map[string]*MapSpec + kfuncs map[string]*btf.Func + kconfig *MapSpec } // LoadCollectionSpec parses an ELF file into a CollectionSpec. @@ -51,6 +69,12 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) { return nil, err } + // Checks if the ELF file is for BPF data. + // Old LLVM versions set e_machine to EM_NONE. + if f.File.Machine != unix.EM_NONE && f.File.Machine != elf.EM_BPF { + return nil, fmt.Errorf("unexpected machine type for BPF ELF: %s", f.File.Machine) + } + var ( licenseSection *elf.Section versionSection *elf.Section @@ -63,6 +87,8 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) { // Collect all the sections we're interested in. This includes relocations // which we parse later. + // + // Keep the documentation at docs/ebpf/loading/elf-sections.md up-to-date. for i, sec := range f.Sections { idx := elf.SectionIndex(i) @@ -107,6 +133,8 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) { version: version, btf: btfSpec, extInfo: btfExtInfo, + maps: make(map[string]*MapSpec), + kfuncs: make(map[string]*btf.Func), } symbols, err := f.Symbols() @@ -120,27 +148,33 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) { return nil, fmt.Errorf("load relocations: %w", err) } - // Collect all the various ways to define maps. - maps := make(map[string]*MapSpec) - if err := ec.loadMaps(maps); err != nil { + if err := ec.loadMaps(); err != nil { return nil, fmt.Errorf("load maps: %w", err) } - if err := ec.loadBTFMaps(maps); err != nil { + if err := ec.loadBTFMaps(); err != nil { return nil, fmt.Errorf("load BTF maps: %w", err) } - if err := ec.loadDataSections(maps); err != nil { + if err := ec.loadDataSections(); err != nil { return nil, fmt.Errorf("load data sections: %w", err) } + if err := ec.loadKconfigSection(); err != nil { + return nil, fmt.Errorf("load virtual .kconfig section: %w", err) + } + + if err := ec.loadKsymsSection(); err != nil { + return nil, fmt.Errorf("load virtual .ksyms section: %w", err) + } + // Finally, collect programs and link them. progs, err := ec.loadProgramSections() if err != nil { return nil, fmt.Errorf("load programs: %w", err) } - return &CollectionSpec{maps, progs, btfSpec, ec.ByteOrder}, nil + return &CollectionSpec{ec.maps, progs, btfSpec, ec.ByteOrder}, nil } func loadLicense(sec *elf.Section) (string, error) { @@ -261,10 +295,6 @@ func (ec *elfCode) loadRelocations(relSections map[elf.SectionIndex]*elf.Section return fmt.Errorf("section %q: reference to %q in section %s: %w", section.Name, rel.Name, rel.Section, ErrNotSupported) } - if target.Flags&elf.SHF_STRINGS > 0 { - return fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported) - } - target.references++ } @@ -312,7 +342,6 @@ func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) { KernelVersion: ec.version, Instructions: insns, ByteOrder: ec.ByteOrder, - BTF: ec.btf, } // Function names must be unique within a single ELF blob. @@ -350,7 +379,7 @@ func (ec *elfCode) loadFunctions(section *elfSection) (map[string]asm.Instructio r := bufio.NewReader(section.Open()) // Decode the section's instruction stream. - var insns asm.Instructions + insns := make(asm.Instructions, 0, section.Size/asm.InstructionSize) if err := insns.Unmarshal(r, ec.ByteOrder); err != nil { return nil, fmt.Errorf("decoding instructions for section %s: %w", section.Name, err) } @@ -433,6 +462,8 @@ func jumpTarget(offset uint64, ins asm.Instruction) uint64 { return uint64(dest) } +var errUnsupportedBinding = errors.New("unsupported binding") + func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error { var ( typ = elf.ST_TYPE(rel.Info) @@ -444,10 +475,14 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err switch target.kind { case mapSection, btfMapSection: - if bind != elf.STB_GLOBAL { + if bind == elf.STB_LOCAL { return fmt.Errorf("possible erroneous static qualifier on map definition: found reference to %q", name) } + if bind != elf.STB_GLOBAL { + return fmt.Errorf("map %q: %w: %s", name, errUnsupportedBinding, bind) + } + if typ != elf.STT_OBJECT && typ != elf.STT_NOTYPE { // STT_NOTYPE is generated on clang < 8 which doesn't tag // relocations appropriately. @@ -461,7 +496,7 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err switch typ { case elf.STT_SECTION: if bind != elf.STB_LOCAL { - return fmt.Errorf("direct load: %s: unsupported section relocation %s", name, bind) + return fmt.Errorf("direct load: %s: %w: %s", name, errUnsupportedBinding, bind) } // This is really a reference to a static symbol, which clang doesn't @@ -472,7 +507,7 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err case elf.STT_OBJECT: // LLVM 9 emits OBJECT-LOCAL symbols for anonymous constants. if bind != elf.STB_GLOBAL && bind != elf.STB_LOCAL { - return fmt.Errorf("direct load: %s: unsupported object relocation %s", name, bind) + return fmt.Errorf("direct load: %s: %w: %s", name, errUnsupportedBinding, bind) } offset = uint32(rel.Value) @@ -480,7 +515,7 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err case elf.STT_NOTYPE: // LLVM 7 emits NOTYPE-LOCAL symbols for anonymous constants. if bind != elf.STB_LOCAL { - return fmt.Errorf("direct load: %s: unsupported untyped relocation %s", name, bind) + return fmt.Errorf("direct load: %s: %w: %s", name, errUnsupportedBinding, bind) } offset = uint32(rel.Value) @@ -508,12 +543,12 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err switch typ { case elf.STT_NOTYPE, elf.STT_FUNC: if bind != elf.STB_GLOBAL { - return fmt.Errorf("call: %s: unsupported binding: %s", name, bind) + return fmt.Errorf("call: %s: %w: %s", name, errUnsupportedBinding, bind) } case elf.STT_SECTION: if bind != elf.STB_LOCAL { - return fmt.Errorf("call: %s: unsupported binding: %s", name, bind) + return fmt.Errorf("call: %s: %w: %s", name, errUnsupportedBinding, bind) } // The function we want to call is in the indicated section, @@ -536,12 +571,12 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err switch typ { case elf.STT_FUNC: if bind != elf.STB_GLOBAL { - return fmt.Errorf("load: %s: unsupported binding: %s", name, bind) + return fmt.Errorf("load: %s: %w: %s", name, errUnsupportedBinding, bind) } case elf.STT_SECTION: if bind != elf.STB_LOCAL { - return fmt.Errorf("load: %s: unsupported binding: %s", name, bind) + return fmt.Errorf("load: %s: %w: %s", name, errUnsupportedBinding, bind) } // ins.Constant already contains the offset in bytes from the @@ -565,16 +600,65 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err return fmt.Errorf("neither a call nor a load instruction: %v", ins) } + // The Undefined section is used for 'virtual' symbols that aren't backed by + // an ELF section. This includes symbol references from inline asm, forward + // function declarations, as well as extern kfunc declarations using __ksym + // and extern kconfig variables declared using __kconfig. case undefSection: - if bind != elf.STB_GLOBAL { - return fmt.Errorf("asm relocation: %s: unsupported binding: %s", name, bind) + if bind != elf.STB_GLOBAL && bind != elf.STB_WEAK { + return fmt.Errorf("asm relocation: %s: %w: %s", name, errUnsupportedBinding, bind) } if typ != elf.STT_NOTYPE { return fmt.Errorf("asm relocation: %s: unsupported type %s", name, typ) } - // There is nothing to do here but set ins.Reference. + kf := ec.kfuncs[name] + switch { + // If a Call / DWordLoad instruction is found and the datasec has a btf.Func with a Name + // that matches the symbol name we mark the instruction as a referencing a kfunc. + case kf != nil && ins.OpCode.JumpOp() == asm.Call: + ins.Metadata.Set(kfuncMetaKey{}, &kfuncMeta{ + Func: kf, + Binding: bind, + }) + + ins.Src = asm.PseudoKfuncCall + ins.Constant = -1 + + case kf != nil && ins.OpCode.IsDWordLoad(): + ins.Metadata.Set(kfuncMetaKey{}, &kfuncMeta{ + Func: kf, + Binding: bind, + }) + + ins.Constant = 0 + + // If no kconfig map is found, this must be a symbol reference from inline + // asm (see testdata/loader.c:asm_relocation()) or a call to a forward + // function declaration (see testdata/fwd_decl.c). Don't interfere, These + // remain standard symbol references. + // extern __kconfig reads are represented as dword loads that need to be + // rewritten to pseudo map loads from .kconfig. If the map is present, + // require it to contain the symbol to disambiguate between inline asm + // relos and kconfigs. + case ec.kconfig != nil && ins.OpCode.IsDWordLoad(): + if bind != elf.STB_GLOBAL { + return fmt.Errorf("asm relocation: %s: %w: %s", name, errUnsupportedBinding, bind) + } + + for _, vsi := range ec.kconfig.Value.(*btf.Datasec).Vars { + if vsi.Type.(*btf.Var).Name != rel.Name { + continue + } + + ins.Src = asm.PseudoMapValue + ins.Metadata.Set(kconfigMetaKey{}, &kconfigMeta{ec.kconfig, vsi.Offset}) + return nil + } + + return fmt.Errorf("kconfig %s not found in .kconfig", rel.Name) + } default: return fmt.Errorf("relocation to %q: %w", target.Name, ErrNotSupported) @@ -584,7 +668,7 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err return nil } -func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error { +func (ec *elfCode) loadMaps() error { for _, sec := range ec.sections { if sec.kind != mapSection { continue @@ -610,7 +694,7 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error { } mapName := mapSym.Name - if maps[mapName] != nil { + if ec.maps[mapName] != nil { return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym) } @@ -640,11 +724,7 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error { spec.Extra = bytes.NewReader(extra) } - if err := spec.clampPerfEventArraySize(); err != nil { - return fmt.Errorf("map %s: %w", mapName, err) - } - - maps[mapName] = &spec + ec.maps[mapName] = &spec } } @@ -654,7 +734,7 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error { // loadBTFMaps iterates over all ELF sections marked as BTF map sections // (like .maps) and parses them into MapSpecs. Dump the .maps section and // any relocations with `readelf -x .maps -r `. -func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error { +func (ec *elfCode) loadBTFMaps() error { for _, sec := range ec.sections { if sec.kind != btfMapSection { continue @@ -693,12 +773,12 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error { return fmt.Errorf("section %v: map %s: initializing BTF map definitions: %w", sec.Name, name, internal.ErrNotSupported) } - if maps[name] != nil { + if ec.maps[name] != nil { return fmt.Errorf("section %v: map %s already exists", sec.Name, name) } // Each Var representing a BTF map definition contains a Struct. - mapStruct, ok := v.Type.(*btf.Struct) + mapStruct, ok := btf.UnderlyingType(v.Type).(*btf.Struct) if !ok { return fmt.Errorf("expected struct, got %s", v.Type) } @@ -708,11 +788,7 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error { return fmt.Errorf("map %v: %w", name, err) } - if err := mapSpec.clampPerfEventArraySize(); err != nil { - return fmt.Errorf("map %v: %w", name, err) - } - - maps[name] = mapSpec + ec.maps[name] = mapSpec } // Drain the ELF section reader to make sure all bytes are accounted for @@ -731,7 +807,7 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error { // mapSpecFromBTF produces a MapSpec based on a btf.Struct def representing // a BTF map definition. The name and spec arguments will be copied to the -// resulting MapSpec, and inner must be true on any resursive invocations. +// resulting MapSpec, and inner must be true on any recursive invocations. func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *btf.Spec, name string, inner bool) (*MapSpec, error) { var ( key, value btf.Type @@ -901,13 +977,6 @@ func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *b } } - if key == nil { - key = &btf.Void{} - } - if value == nil { - value = &btf.Void{} - } - return &MapSpec{ Name: SanitizeName(name, -1), Type: MapType(mapType), @@ -917,7 +986,6 @@ func mapSpecFromBTF(es *elfSection, vs *btf.VarSecinfo, def *btf.Struct, spec *b Flags: flags, Key: key, Value: value, - BTF: spec, Pinning: pinType, InnerMap: innerMapSpec, Contents: contents, @@ -1008,14 +1076,14 @@ func resolveBTFValuesContents(es *elfSection, vs *btf.VarSecinfo, member btf.Mem case elf.STT_OBJECT: contents = append(contents, MapKV{uint32(k), r.Name}) default: - return nil, fmt.Errorf("unknown relocation type %v", t) + return nil, fmt.Errorf("unknown relocation type %v for symbol %s", t, r.Name) } } return contents, nil } -func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error { +func (ec *elfCode) loadDataSections() error { for _, sec := range ec.sections { if sec.kind != dataSection { continue @@ -1027,22 +1095,33 @@ func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error { continue } - data, err := sec.Data() - if err != nil { - return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err) - } - - if uint64(len(data)) > math.MaxUint32 { - return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name) - } - mapSpec := &MapSpec{ Name: SanitizeName(sec.Name, -1), Type: Array, KeySize: 4, - ValueSize: uint32(len(data)), + ValueSize: uint32(sec.Size), MaxEntries: 1, - Contents: []MapKV{{uint32(0), data}}, + } + + switch sec.Type { + // Only open the section if we know there's actual data to be read. + case elf.SHT_PROGBITS: + data, err := sec.Data() + if err != nil { + return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err) + } + + if uint64(len(data)) > math.MaxUint32 { + return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name) + } + mapSpec.Contents = []MapKV{{uint32(0), data}} + + case elf.SHT_NOBITS: + // NOBITS sections like .bss contain only zeroes, and since data sections + // are Arrays, the kernel already preallocates them. Skip reading zeroes + // from the ELF. + default: + return fmt.Errorf("data section %s: unknown section type %s", sec.Name, sec.Type) } // It is possible for a data section to exist without a corresponding BTF Datasec @@ -1051,123 +1130,181 @@ func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error { var ds *btf.Datasec if ec.btf.TypeByName(sec.Name, &ds) == nil { // Assign the spec's key and BTF only if the Datasec lookup was successful. - mapSpec.BTF = ec.btf mapSpec.Key = &btf.Void{} mapSpec.Value = ds } } - switch n := sec.Name; { - case strings.HasPrefix(n, ".rodata"): + if strings.HasPrefix(sec.Name, ".rodata") { mapSpec.Flags = unix.BPF_F_RDONLY_PROG mapSpec.Freeze = true - case n == ".bss": - // The kernel already zero-initializes the map - mapSpec.Contents = nil } - maps[sec.Name] = mapSpec + ec.maps[sec.Name] = mapSpec } + return nil } -func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) { - types := []struct { - prefix string - progType ProgramType - attachType AttachType - progFlags uint32 - }{ - // Please update the types from libbpf.c and follow the order of it. - // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c - {"socket", SocketFilter, AttachNone, 0}, - {"sk_reuseport/migrate", SkReuseport, AttachSkReuseportSelectOrMigrate, 0}, - {"sk_reuseport", SkReuseport, AttachSkReuseportSelect, 0}, - {"kprobe/", Kprobe, AttachNone, 0}, - {"uprobe/", Kprobe, AttachNone, 0}, - {"kretprobe/", Kprobe, AttachNone, 0}, - {"uretprobe/", Kprobe, AttachNone, 0}, - {"tc", SchedCLS, AttachNone, 0}, - {"classifier", SchedCLS, AttachNone, 0}, - {"action", SchedACT, AttachNone, 0}, - {"tracepoint/", TracePoint, AttachNone, 0}, - {"tp/", TracePoint, AttachNone, 0}, - {"raw_tracepoint/", RawTracepoint, AttachNone, 0}, - {"raw_tp/", RawTracepoint, AttachNone, 0}, - {"raw_tracepoint.w/", RawTracepointWritable, AttachNone, 0}, - {"raw_tp.w/", RawTracepointWritable, AttachNone, 0}, - {"tp_btf/", Tracing, AttachTraceRawTp, 0}, - {"fentry/", Tracing, AttachTraceFEntry, 0}, - {"fmod_ret/", Tracing, AttachModifyReturn, 0}, - {"fexit/", Tracing, AttachTraceFExit, 0}, - {"fentry.s/", Tracing, AttachTraceFEntry, unix.BPF_F_SLEEPABLE}, - {"fmod_ret.s/", Tracing, AttachModifyReturn, unix.BPF_F_SLEEPABLE}, - {"fexit.s/", Tracing, AttachTraceFExit, unix.BPF_F_SLEEPABLE}, - {"freplace/", Extension, AttachNone, 0}, - {"lsm/", LSM, AttachLSMMac, 0}, - {"lsm.s/", LSM, AttachLSMMac, unix.BPF_F_SLEEPABLE}, - {"iter/", Tracing, AttachTraceIter, 0}, - {"syscall", Syscall, AttachNone, 0}, - {"xdp_devmap/", XDP, AttachXDPDevMap, 0}, - {"xdp_cpumap/", XDP, AttachXDPCPUMap, 0}, - {"xdp", XDP, AttachNone, 0}, - {"perf_event", PerfEvent, AttachNone, 0}, - {"lwt_in", LWTIn, AttachNone, 0}, - {"lwt_out", LWTOut, AttachNone, 0}, - {"lwt_xmit", LWTXmit, AttachNone, 0}, - {"lwt_seg6local", LWTSeg6Local, AttachNone, 0}, - {"cgroup_skb/ingress", CGroupSKB, AttachCGroupInetIngress, 0}, - {"cgroup_skb/egress", CGroupSKB, AttachCGroupInetEgress, 0}, - {"cgroup/skb", CGroupSKB, AttachNone, 0}, - {"cgroup/sock_create", CGroupSock, AttachCGroupInetSockCreate, 0}, - {"cgroup/sock_release", CGroupSock, AttachCgroupInetSockRelease, 0}, - {"cgroup/sock", CGroupSock, AttachCGroupInetSockCreate, 0}, - {"cgroup/post_bind4", CGroupSock, AttachCGroupInet4PostBind, 0}, - {"cgroup/post_bind6", CGroupSock, AttachCGroupInet6PostBind, 0}, - {"cgroup/dev", CGroupDevice, AttachCGroupDevice, 0}, - {"sockops", SockOps, AttachCGroupSockOps, 0}, - {"sk_skb/stream_parser", SkSKB, AttachSkSKBStreamParser, 0}, - {"sk_skb/stream_verdict", SkSKB, AttachSkSKBStreamVerdict, 0}, - {"sk_skb", SkSKB, AttachNone, 0}, - {"sk_msg", SkMsg, AttachSkMsgVerdict, 0}, - {"lirc_mode2", LircMode2, AttachLircMode2, 0}, - {"flow_dissector", FlowDissector, AttachFlowDissector, 0}, - {"cgroup/bind4", CGroupSockAddr, AttachCGroupInet4Bind, 0}, - {"cgroup/bind6", CGroupSockAddr, AttachCGroupInet6Bind, 0}, - {"cgroup/connect4", CGroupSockAddr, AttachCGroupInet4Connect, 0}, - {"cgroup/connect6", CGroupSockAddr, AttachCGroupInet6Connect, 0}, - {"cgroup/sendmsg4", CGroupSockAddr, AttachCGroupUDP4Sendmsg, 0}, - {"cgroup/sendmsg6", CGroupSockAddr, AttachCGroupUDP6Sendmsg, 0}, - {"cgroup/recvmsg4", CGroupSockAddr, AttachCGroupUDP4Recvmsg, 0}, - {"cgroup/recvmsg6", CGroupSockAddr, AttachCGroupUDP6Recvmsg, 0}, - {"cgroup/getpeername4", CGroupSockAddr, AttachCgroupInet4GetPeername, 0}, - {"cgroup/getpeername6", CGroupSockAddr, AttachCgroupInet6GetPeername, 0}, - {"cgroup/getsockname4", CGroupSockAddr, AttachCgroupInet4GetSockname, 0}, - {"cgroup/getsockname6", CGroupSockAddr, AttachCgroupInet6GetSockname, 0}, - {"cgroup/sysctl", CGroupSysctl, AttachCGroupSysctl, 0}, - {"cgroup/getsockopt", CGroupSockopt, AttachCGroupGetsockopt, 0}, - {"cgroup/setsockopt", CGroupSockopt, AttachCGroupSetsockopt, 0}, - {"struct_ops+", StructOps, AttachNone, 0}, - {"sk_lookup/", SkLookup, AttachSkLookup, 0}, - - {"seccomp", SocketFilter, AttachNone, 0}, +// loadKconfigSection handles the 'virtual' Datasec .kconfig that doesn't +// have a corresponding ELF section and exist purely in BTF. +func (ec *elfCode) loadKconfigSection() error { + if ec.btf == nil { + return nil + } + + var ds *btf.Datasec + err := ec.btf.TypeByName(".kconfig", &ds) + if errors.Is(err, btf.ErrNotFound) { + return nil + } + if err != nil { + return err + } + + if ds.Size == 0 { + return errors.New("zero-length .kconfig") + } + + ec.kconfig = &MapSpec{ + Name: ".kconfig", + Type: Array, + KeySize: uint32(4), + ValueSize: ds.Size, + MaxEntries: 1, + Flags: unix.BPF_F_RDONLY_PROG, + Freeze: true, + Key: &btf.Int{Size: 4}, + Value: ds, + } + + return nil +} + +// loadKsymsSection handles the 'virtual' Datasec .ksyms that doesn't +// have a corresponding ELF section and exist purely in BTF. +func (ec *elfCode) loadKsymsSection() error { + if ec.btf == nil { + return nil } - for _, t := range types { - if !strings.HasPrefix(sectionName, t.prefix) { + var ds *btf.Datasec + err := ec.btf.TypeByName(".ksyms", &ds) + if errors.Is(err, btf.ErrNotFound) { + return nil + } + if err != nil { + return err + } + + for _, v := range ds.Vars { + // we have already checked the .ksyms Datasec to only contain Func Vars. + ec.kfuncs[v.Type.TypeName()] = v.Type.(*btf.Func) + } + + return nil +} + +type libbpfElfSectionDef struct { + pattern string + programType sys.ProgType + attachType sys.AttachType + flags libbpfElfSectionFlag +} + +type libbpfElfSectionFlag uint32 + +// The values correspond to enum sec_def_flags in libbpf. +const ( + _SEC_NONE libbpfElfSectionFlag = 0 + + _SEC_EXP_ATTACH_OPT libbpfElfSectionFlag = 1 << (iota - 1) + _SEC_ATTACHABLE + _SEC_ATTACH_BTF + _SEC_SLEEPABLE + _SEC_XDP_FRAGS + _SEC_USDT + + // Ignore any present extra in order to preserve backwards compatibility + // with earlier versions of the library. + ignoreExtra + + _SEC_ATTACHABLE_OPT = _SEC_ATTACHABLE | _SEC_EXP_ATTACH_OPT +) + +func init() { + // Compatibility with older versions of the library. + // We prepend libbpf definitions since they contain a prefix match + // for "xdp". + elfSectionDefs = append([]libbpfElfSectionDef{ + {"xdp.frags/", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP, _SEC_XDP_FRAGS | ignoreExtra}, + {"xdp.frags_devmap/", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_DEVMAP, _SEC_XDP_FRAGS}, + {"xdp_devmap/", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_DEVMAP, 0}, + {"xdp.frags_cpumap/", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_CPUMAP, _SEC_XDP_FRAGS}, + {"xdp_cpumap/", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_CPUMAP, 0}, + // This has been in the library since the beginning of time. Not sure + // where it came from. + {"seccomp", sys.BPF_PROG_TYPE_SOCKET_FILTER, 0, _SEC_NONE}, + }, elfSectionDefs...) +} + +func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) { + // Skip optional program marking for now. + sectionName = strings.TrimPrefix(sectionName, "?") + + for _, t := range elfSectionDefs { + extra, ok := matchSectionName(sectionName, t.pattern) + if !ok { continue } - if !strings.HasSuffix(t.prefix, "/") { - return t.progType, t.attachType, t.progFlags, "" + programType := ProgramType(t.programType) + attachType := AttachType(t.attachType) + + var flags uint32 + if t.flags&_SEC_SLEEPABLE > 0 { + flags |= unix.BPF_F_SLEEPABLE + } + if t.flags&_SEC_XDP_FRAGS > 0 { + flags |= unix.BPF_F_XDP_HAS_FRAGS + } + if t.flags&_SEC_EXP_ATTACH_OPT > 0 { + if programType == XDP { + // The library doesn't yet have code to fallback to not specifying + // attach type. Only do this for XDP since we've enforced correct + // attach type for all other program types. + attachType = AttachNone + } + } + if t.flags&ignoreExtra > 0 { + extra = "" } - return t.progType, t.attachType, t.progFlags, sectionName[len(t.prefix):] + return programType, attachType, flags, extra } return UnspecifiedProgram, AttachNone, 0, "" } +// matchSectionName checks a section name against a pattern. +// +// It's behaviour mirrors that of libbpf's sec_def_matches. +func matchSectionName(sectionName, pattern string) (extra string, found bool) { + have, extra, found := strings.Cut(sectionName, "/") + want := strings.TrimRight(pattern, "+/") + + if strings.HasSuffix(pattern, "/") { + // Section name must have a slash and extra may be empty. + return extra, have == want && found + } else if strings.HasSuffix(pattern, "+") { + // Section name may have a slash and extra may be empty. + return extra, have == want + } + + // Section name must have a prefix. extra is ignored. + return "", strings.HasPrefix(sectionName, pattern) +} + func (ec *elfCode) loadSectionRelocations(sec *elf.Section, symbols []elf.Symbol) (map[uint64]elf.Symbol, error) { rels := make(map[uint64]elf.Symbol) diff --git a/vendor/github.com/cilium/ebpf/elf_sections.go b/vendor/github.com/cilium/ebpf/elf_sections.go new file mode 100644 index 0000000000..4b58251d9a --- /dev/null +++ b/vendor/github.com/cilium/ebpf/elf_sections.go @@ -0,0 +1,109 @@ +// Code generated by internal/cmd/gensections.awk; DO NOT EDIT. + +package ebpf + +// Code in this file is derived from libbpf, available under BSD-2-Clause. + +import "github.com/cilium/ebpf/internal/sys" + +var elfSectionDefs = []libbpfElfSectionDef{ + {"socket", sys.BPF_PROG_TYPE_SOCKET_FILTER, 0, _SEC_NONE}, + {"sk_reuseport/migrate", sys.BPF_PROG_TYPE_SK_REUSEPORT, sys.BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, _SEC_ATTACHABLE}, + {"sk_reuseport", sys.BPF_PROG_TYPE_SK_REUSEPORT, sys.BPF_SK_REUSEPORT_SELECT, _SEC_ATTACHABLE}, + {"kprobe+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, + {"uprobe+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, + {"uprobe.s+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_SLEEPABLE}, + {"kretprobe+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, + {"uretprobe+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, + {"uretprobe.s+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_SLEEPABLE}, + {"kprobe.multi+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_KPROBE_MULTI, _SEC_NONE}, + {"kretprobe.multi+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_KPROBE_MULTI, _SEC_NONE}, + {"uprobe.multi+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_MULTI, _SEC_NONE}, + {"uretprobe.multi+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_MULTI, _SEC_NONE}, + {"uprobe.multi.s+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_MULTI, _SEC_SLEEPABLE}, + {"uretprobe.multi.s+", sys.BPF_PROG_TYPE_KPROBE, sys.BPF_TRACE_UPROBE_MULTI, _SEC_SLEEPABLE}, + {"ksyscall+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, + {"kretsyscall+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_NONE}, + {"usdt+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_USDT}, + {"usdt.s+", sys.BPF_PROG_TYPE_KPROBE, 0, _SEC_USDT | _SEC_SLEEPABLE}, + {"tc/ingress", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_TCX_INGRESS, _SEC_NONE}, + {"tc/egress", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_TCX_EGRESS, _SEC_NONE}, + {"tcx/ingress", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_TCX_INGRESS, _SEC_NONE}, + {"tcx/egress", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_TCX_EGRESS, _SEC_NONE}, + {"tc", sys.BPF_PROG_TYPE_SCHED_CLS, 0, _SEC_NONE}, + {"classifier", sys.BPF_PROG_TYPE_SCHED_CLS, 0, _SEC_NONE}, + {"action", sys.BPF_PROG_TYPE_SCHED_ACT, 0, _SEC_NONE}, + {"netkit/primary", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_NETKIT_PRIMARY, _SEC_NONE}, + {"netkit/peer", sys.BPF_PROG_TYPE_SCHED_CLS, sys.BPF_NETKIT_PEER, _SEC_NONE}, + {"tracepoint+", sys.BPF_PROG_TYPE_TRACEPOINT, 0, _SEC_NONE}, + {"tp+", sys.BPF_PROG_TYPE_TRACEPOINT, 0, _SEC_NONE}, + {"raw_tracepoint+", sys.BPF_PROG_TYPE_RAW_TRACEPOINT, 0, _SEC_NONE}, + {"raw_tp+", sys.BPF_PROG_TYPE_RAW_TRACEPOINT, 0, _SEC_NONE}, + {"raw_tracepoint.w+", sys.BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, 0, _SEC_NONE}, + {"raw_tp.w+", sys.BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, 0, _SEC_NONE}, + {"tp_btf+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_RAW_TP, _SEC_ATTACH_BTF}, + {"fentry+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_FENTRY, _SEC_ATTACH_BTF}, + {"fmod_ret+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_MODIFY_RETURN, _SEC_ATTACH_BTF}, + {"fexit+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_FEXIT, _SEC_ATTACH_BTF}, + {"fentry.s+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_FENTRY, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, + {"fmod_ret.s+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_MODIFY_RETURN, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, + {"fexit.s+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_FEXIT, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, + {"freplace+", sys.BPF_PROG_TYPE_EXT, 0, _SEC_ATTACH_BTF}, + {"lsm+", sys.BPF_PROG_TYPE_LSM, sys.BPF_LSM_MAC, _SEC_ATTACH_BTF}, + {"lsm.s+", sys.BPF_PROG_TYPE_LSM, sys.BPF_LSM_MAC, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, + {"lsm_cgroup+", sys.BPF_PROG_TYPE_LSM, sys.BPF_LSM_CGROUP, _SEC_ATTACH_BTF}, + {"iter+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_ITER, _SEC_ATTACH_BTF}, + {"iter.s+", sys.BPF_PROG_TYPE_TRACING, sys.BPF_TRACE_ITER, _SEC_ATTACH_BTF | _SEC_SLEEPABLE}, + {"syscall", sys.BPF_PROG_TYPE_SYSCALL, 0, _SEC_SLEEPABLE}, + {"xdp.frags/devmap", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_DEVMAP, _SEC_XDP_FRAGS}, + {"xdp/devmap", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_DEVMAP, _SEC_ATTACHABLE}, + {"xdp.frags/cpumap", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_CPUMAP, _SEC_XDP_FRAGS}, + {"xdp/cpumap", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP_CPUMAP, _SEC_ATTACHABLE}, + {"xdp.frags", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP, _SEC_XDP_FRAGS}, + {"xdp", sys.BPF_PROG_TYPE_XDP, sys.BPF_XDP, _SEC_ATTACHABLE_OPT}, + {"perf_event", sys.BPF_PROG_TYPE_PERF_EVENT, 0, _SEC_NONE}, + {"lwt_in", sys.BPF_PROG_TYPE_LWT_IN, 0, _SEC_NONE}, + {"lwt_out", sys.BPF_PROG_TYPE_LWT_OUT, 0, _SEC_NONE}, + {"lwt_xmit", sys.BPF_PROG_TYPE_LWT_XMIT, 0, _SEC_NONE}, + {"lwt_seg6local", sys.BPF_PROG_TYPE_LWT_SEG6LOCAL, 0, _SEC_NONE}, + {"sockops", sys.BPF_PROG_TYPE_SOCK_OPS, sys.BPF_CGROUP_SOCK_OPS, _SEC_ATTACHABLE_OPT}, + {"sk_skb/stream_parser", sys.BPF_PROG_TYPE_SK_SKB, sys.BPF_SK_SKB_STREAM_PARSER, _SEC_ATTACHABLE_OPT}, + {"sk_skb/stream_verdict", sys.BPF_PROG_TYPE_SK_SKB, sys.BPF_SK_SKB_STREAM_VERDICT, _SEC_ATTACHABLE_OPT}, + {"sk_skb", sys.BPF_PROG_TYPE_SK_SKB, 0, _SEC_NONE}, + {"sk_msg", sys.BPF_PROG_TYPE_SK_MSG, sys.BPF_SK_MSG_VERDICT, _SEC_ATTACHABLE_OPT}, + {"lirc_mode2", sys.BPF_PROG_TYPE_LIRC_MODE2, sys.BPF_LIRC_MODE2, _SEC_ATTACHABLE_OPT}, + {"flow_dissector", sys.BPF_PROG_TYPE_FLOW_DISSECTOR, sys.BPF_FLOW_DISSECTOR, _SEC_ATTACHABLE_OPT}, + {"cgroup_skb/ingress", sys.BPF_PROG_TYPE_CGROUP_SKB, sys.BPF_CGROUP_INET_INGRESS, _SEC_ATTACHABLE_OPT}, + {"cgroup_skb/egress", sys.BPF_PROG_TYPE_CGROUP_SKB, sys.BPF_CGROUP_INET_EGRESS, _SEC_ATTACHABLE_OPT}, + {"cgroup/skb", sys.BPF_PROG_TYPE_CGROUP_SKB, 0, _SEC_NONE}, + {"cgroup/sock_create", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET_SOCK_CREATE, _SEC_ATTACHABLE}, + {"cgroup/sock_release", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET_SOCK_RELEASE, _SEC_ATTACHABLE}, + {"cgroup/sock", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET_SOCK_CREATE, _SEC_ATTACHABLE_OPT}, + {"cgroup/post_bind4", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET4_POST_BIND, _SEC_ATTACHABLE}, + {"cgroup/post_bind6", sys.BPF_PROG_TYPE_CGROUP_SOCK, sys.BPF_CGROUP_INET6_POST_BIND, _SEC_ATTACHABLE}, + {"cgroup/bind4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET4_BIND, _SEC_ATTACHABLE}, + {"cgroup/bind6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET6_BIND, _SEC_ATTACHABLE}, + {"cgroup/connect4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET4_CONNECT, _SEC_ATTACHABLE}, + {"cgroup/connect6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET6_CONNECT, _SEC_ATTACHABLE}, + {"cgroup/connect_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_CONNECT, _SEC_ATTACHABLE}, + {"cgroup/sendmsg4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UDP4_SENDMSG, _SEC_ATTACHABLE}, + {"cgroup/sendmsg6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UDP6_SENDMSG, _SEC_ATTACHABLE}, + {"cgroup/sendmsg_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_SENDMSG, _SEC_ATTACHABLE}, + {"cgroup/recvmsg4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UDP4_RECVMSG, _SEC_ATTACHABLE}, + {"cgroup/recvmsg6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UDP6_RECVMSG, _SEC_ATTACHABLE}, + {"cgroup/recvmsg_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_RECVMSG, _SEC_ATTACHABLE}, + {"cgroup/getpeername4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET4_GETPEERNAME, _SEC_ATTACHABLE}, + {"cgroup/getpeername6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET6_GETPEERNAME, _SEC_ATTACHABLE}, + {"cgroup/getpeername_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_GETPEERNAME, _SEC_ATTACHABLE}, + {"cgroup/getsockname4", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET4_GETSOCKNAME, _SEC_ATTACHABLE}, + {"cgroup/getsockname6", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_INET6_GETSOCKNAME, _SEC_ATTACHABLE}, + {"cgroup/getsockname_unix", sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR, sys.BPF_CGROUP_UNIX_GETSOCKNAME, _SEC_ATTACHABLE}, + {"cgroup/sysctl", sys.BPF_PROG_TYPE_CGROUP_SYSCTL, sys.BPF_CGROUP_SYSCTL, _SEC_ATTACHABLE}, + {"cgroup/getsockopt", sys.BPF_PROG_TYPE_CGROUP_SOCKOPT, sys.BPF_CGROUP_GETSOCKOPT, _SEC_ATTACHABLE}, + {"cgroup/setsockopt", sys.BPF_PROG_TYPE_CGROUP_SOCKOPT, sys.BPF_CGROUP_SETSOCKOPT, _SEC_ATTACHABLE}, + {"cgroup/dev", sys.BPF_PROG_TYPE_CGROUP_DEVICE, sys.BPF_CGROUP_DEVICE, _SEC_ATTACHABLE_OPT}, + {"struct_ops+", sys.BPF_PROG_TYPE_STRUCT_OPS, 0, _SEC_NONE}, + {"struct_ops.s+", sys.BPF_PROG_TYPE_STRUCT_OPS, 0, _SEC_SLEEPABLE}, + {"sk_lookup", sys.BPF_PROG_TYPE_SK_LOOKUP, sys.BPF_SK_LOOKUP, _SEC_ATTACHABLE}, + {"netfilter", sys.BPF_PROG_TYPE_NETFILTER, sys.BPF_NETFILTER, _SEC_NONE}, +} diff --git a/vendor/github.com/cilium/ebpf/info.go b/vendor/github.com/cilium/ebpf/info.go index ae77bc6197..79b11c951f 100644 --- a/vendor/github.com/cilium/ebpf/info.go +++ b/vendor/github.com/cilium/ebpf/info.go @@ -48,7 +48,7 @@ func newMapInfoFromFd(fd *sys.FD) (*MapInfo, error) { info.KeySize, info.ValueSize, info.MaxEntries, - info.MapFlags, + uint32(info.MapFlags), unix.ByteSliceToString(info.Name[:]), }, nil } @@ -94,11 +94,18 @@ type ProgramInfo struct { // Name as supplied by user space at load time. Available from 4.15. Name string - btf btf.ID - stats *programStats + createdByUID uint32 + haveCreatedByUID bool + btf btf.ID + stats *programStats maps []MapID insns []byte + + lineInfos []byte + numLineInfos uint32 + funcInfos []byte + numFuncInfos uint32 } func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) { @@ -126,19 +133,53 @@ func newProgramInfoFromFd(fd *sys.FD) (*ProgramInfo, error) { // Start with a clean struct for the second call, otherwise we may get EFAULT. var info2 sys.ProgInfo + makeSecondCall := false + if info.NrMapIds > 0 { pi.maps = make([]MapID, info.NrMapIds) info2.NrMapIds = info.NrMapIds info2.MapIds = sys.NewPointer(unsafe.Pointer(&pi.maps[0])) + makeSecondCall = true + } else if haveProgramInfoMapIDs() == nil { + // This program really has no associated maps. + pi.maps = make([]MapID, 0) + } else { + // The kernel doesn't report associated maps. + pi.maps = nil + } + + // createdByUID and NrMapIds were introduced in the same kernel version. + if pi.maps != nil { + pi.createdByUID = info.CreatedByUid + pi.haveCreatedByUID = true } if info.XlatedProgLen > 0 { pi.insns = make([]byte, info.XlatedProgLen) info2.XlatedProgLen = info.XlatedProgLen info2.XlatedProgInsns = sys.NewSlicePointer(pi.insns) + makeSecondCall = true + } + + if info.NrLineInfo > 0 { + pi.lineInfos = make([]byte, btf.LineInfoSize*info.NrLineInfo) + info2.LineInfo = sys.NewSlicePointer(pi.lineInfos) + info2.LineInfoRecSize = btf.LineInfoSize + info2.NrLineInfo = info.NrLineInfo + pi.numLineInfos = info.NrLineInfo + makeSecondCall = true + } + + if info.NrFuncInfo > 0 { + pi.funcInfos = make([]byte, btf.FuncInfoSize*info.NrFuncInfo) + info2.FuncInfo = sys.NewSlicePointer(pi.funcInfos) + info2.FuncInfoRecSize = btf.FuncInfoSize + info2.NrFuncInfo = info.NrFuncInfo + pi.numFuncInfos = info.NrFuncInfo + makeSecondCall = true } - if info.NrMapIds > 0 || info.XlatedProgLen > 0 { + if makeSecondCall { if err := sys.ObjInfo(fd, &info2); err != nil { return nil, err } @@ -175,6 +216,15 @@ func (pi *ProgramInfo) ID() (ProgramID, bool) { return pi.id, pi.id > 0 } +// CreatedByUID returns the Uid that created the program. +// +// Available from 4.15. +// +// The bool return value indicates whether this optional field is available. +func (pi *ProgramInfo) CreatedByUID() (uint32, bool) { + return pi.createdByUID, pi.haveCreatedByUID +} + // BTFID returns the BTF ID associated with the program. // // The ID is only valid as long as the associated program is kept alive. @@ -222,7 +272,13 @@ func (pi *ProgramInfo) Runtime() (time.Duration, bool) { // // The first instruction is marked as a symbol using the Program's name. // -// Available from 4.13. Requires CAP_BPF or equivalent. +// If available, the instructions will be annotated with metadata from the +// BTF. This includes line information and function information. Reading +// this metadata requires CAP_SYS_ADMIN or equivalent. If capability is +// unavailable, the instructions will be returned without metadata. +// +// Available from 4.13. Requires CAP_BPF or equivalent for plain instructions. +// Requires CAP_SYS_ADMIN for instructions with metadata. func (pi *ProgramInfo) Instructions() (asm.Instructions, error) { // If the calling process is not BPF-capable or if the kernel doesn't // support getting xlated instructions, the field will be zero. @@ -236,8 +292,55 @@ func (pi *ProgramInfo) Instructions() (asm.Instructions, error) { return nil, fmt.Errorf("unmarshaling instructions: %w", err) } - // Tag the first instruction with the name of the program, if available. - insns[0] = insns[0].WithSymbol(pi.Name) + if pi.btf != 0 { + btfh, err := btf.NewHandleFromID(pi.btf) + if err != nil { + // Getting a BTF handle requires CAP_SYS_ADMIN, if not available we get an -EPERM. + // Ignore it and fall back to instructions without metadata. + if !errors.Is(err, unix.EPERM) { + return nil, fmt.Errorf("unable to get BTF handle: %w", err) + } + } + + // If we have a BTF handle, we can use it to assign metadata to the instructions. + if btfh != nil { + defer btfh.Close() + + spec, err := btfh.Spec(nil) + if err != nil { + return nil, fmt.Errorf("unable to get BTF spec: %w", err) + } + + lineInfos, err := btf.LoadLineInfos( + bytes.NewReader(pi.lineInfos), + internal.NativeEndian, + pi.numLineInfos, + spec, + ) + if err != nil { + return nil, fmt.Errorf("parse line info: %w", err) + } + + funcInfos, err := btf.LoadFuncInfos( + bytes.NewReader(pi.funcInfos), + internal.NativeEndian, + pi.numFuncInfos, + spec, + ) + if err != nil { + return nil, fmt.Errorf("parse func info: %w", err) + } + + btf.AssignMetadataToInstructions(insns, funcInfos, lineInfos, btf.CORERelocationInfos{}) + } + } + + fn := btf.FuncMetadata(&insns[0]) + name := pi.Name + if fn != nil { + name = fn.Name + } + insns[0] = insns[0].WithSymbol(name) return insns, nil } @@ -321,3 +424,30 @@ func EnableStats(which uint32) (io.Closer, error) { } return fd, nil } + +var haveProgramInfoMapIDs = internal.NewFeatureTest("map IDs in program info", "4.15", func() error { + prog, err := progLoad(asm.Instructions{ + asm.LoadImm(asm.R0, 0, asm.DWord), + asm.Return(), + }, SocketFilter, "MIT") + if err != nil { + return err + } + defer prog.Close() + + err = sys.ObjInfo(prog, &sys.ProgInfo{ + // NB: Don't need to allocate MapIds since the program isn't using + // any maps. + NrMapIds: 1, + }) + if errors.Is(err, unix.EINVAL) { + // Most likely the syscall doesn't exist. + return internal.ErrNotSupported + } + if errors.Is(err, unix.E2BIG) { + // We've hit check_uarg_tail_zero on older kernels. + return internal.ErrNotSupported + } + + return err +}) diff --git a/vendor/github.com/cilium/ebpf/internal/align.go b/vendor/github.com/cilium/ebpf/internal/align.go deleted file mode 100644 index 8b4f2658ea..0000000000 --- a/vendor/github.com/cilium/ebpf/internal/align.go +++ /dev/null @@ -1,6 +0,0 @@ -package internal - -// Align returns 'n' updated to 'alignment' boundary. -func Align(n, alignment int) int { - return (int(n) + alignment - 1) / alignment * alignment -} diff --git a/vendor/github.com/cilium/ebpf/internal/auxv.go b/vendor/github.com/cilium/ebpf/internal/auxv.go new file mode 100644 index 0000000000..45fd0d37f1 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/auxv.go @@ -0,0 +1,60 @@ +package internal + +import ( + "errors" + "io" + _ "unsafe" +) + +type auxvPairReader interface { + Close() error + ReadAuxvPair() (uint64, uint64, error) +} + +// See https://elixir.bootlin.com/linux/v6.5.5/source/include/uapi/linux/auxvec.h +const ( + _AT_NULL = 0 // End of vector + _AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image +) + +//go:linkname runtime_getAuxv runtime.getAuxv +func runtime_getAuxv() []uintptr + +type auxvRuntimeReader struct { + data []uintptr + index int +} + +func (r *auxvRuntimeReader) Close() error { + return nil +} + +func (r *auxvRuntimeReader) ReadAuxvPair() (uint64, uint64, error) { + if r.index >= len(r.data)+2 { + return 0, 0, io.EOF + } + + // we manually add the (_AT_NULL, _AT_NULL) pair at the end + // that is not provided by the go runtime + var tag, value uintptr + if r.index+1 < len(r.data) { + tag, value = r.data[r.index], r.data[r.index+1] + } else { + tag, value = _AT_NULL, _AT_NULL + } + r.index += 2 + return uint64(tag), uint64(value), nil +} + +func newAuxvRuntimeReader() (auxvPairReader, error) { + data := runtime_getAuxv() + + if len(data)%2 != 0 { + return nil, errors.New("malformed auxv passed from runtime") + } + + return &auxvRuntimeReader{ + data: data, + index: 0, + }, nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/buffer.go b/vendor/github.com/cilium/ebpf/internal/buffer.go new file mode 100644 index 0000000000..81c6544330 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/buffer.go @@ -0,0 +1,31 @@ +package internal + +import ( + "bytes" + "sync" +) + +var bytesBufferPool = sync.Pool{ + New: func() interface{} { + return new(bytes.Buffer) + }, +} + +// NewBuffer retrieves a [bytes.Buffer] from a pool an re-initialises it. +// +// The returned buffer should be passed to [PutBuffer]. +func NewBuffer(buf []byte) *bytes.Buffer { + wr := bytesBufferPool.Get().(*bytes.Buffer) + // Reinitialize the Buffer with a new backing slice since it is returned to + // the caller by wr.Bytes() below. Pooling is faster despite calling + // NewBuffer. The pooled alloc is still reused, it only needs to be zeroed. + *wr = *bytes.NewBuffer(buf) + return wr +} + +// PutBuffer releases a buffer to the pool. +func PutBuffer(buf *bytes.Buffer) { + // Release reference to the backing buffer. + *buf = *bytes.NewBuffer(nil) + bytesBufferPool.Put(buf) +} diff --git a/vendor/github.com/cilium/ebpf/internal/deque.go b/vendor/github.com/cilium/ebpf/internal/deque.go new file mode 100644 index 0000000000..e3a3050215 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/deque.go @@ -0,0 +1,91 @@ +package internal + +import "math/bits" + +// Deque implements a double ended queue. +type Deque[T any] struct { + elems []T + read, write uint64 + mask uint64 +} + +// Reset clears the contents of the deque while retaining the backing buffer. +func (dq *Deque[T]) Reset() { + var zero T + + for i := dq.read; i < dq.write; i++ { + dq.elems[i&dq.mask] = zero + } + + dq.read, dq.write = 0, 0 +} + +func (dq *Deque[T]) Empty() bool { + return dq.read == dq.write +} + +// Push adds an element to the end. +func (dq *Deque[T]) Push(e T) { + dq.Grow(1) + dq.elems[dq.write&dq.mask] = e + dq.write++ +} + +// Shift returns the first element or the zero value. +func (dq *Deque[T]) Shift() T { + var zero T + + if dq.Empty() { + return zero + } + + index := dq.read & dq.mask + t := dq.elems[index] + dq.elems[index] = zero + dq.read++ + return t +} + +// Pop returns the last element or the zero value. +func (dq *Deque[T]) Pop() T { + var zero T + + if dq.Empty() { + return zero + } + + dq.write-- + index := dq.write & dq.mask + t := dq.elems[index] + dq.elems[index] = zero + return t +} + +// Grow the deque's capacity, if necessary, to guarantee space for another n +// elements. +func (dq *Deque[T]) Grow(n int) { + have := dq.write - dq.read + need := have + uint64(n) + if need < have { + panic("overflow") + } + if uint64(len(dq.elems)) >= need { + return + } + + // Round up to the new power of two which is at least 8. + // See https://jameshfisher.com/2018/03/30/round-up-power-2/ + capacity := 1 << (64 - bits.LeadingZeros64(need-1)) + if capacity < 8 { + capacity = 8 + } + + elems := make([]T, have, capacity) + pivot := dq.read & dq.mask + copied := copy(elems, dq.elems[pivot:]) + copy(elems[copied:], dq.elems[:pivot]) + + dq.elems = elems[:capacity] + dq.mask = uint64(capacity) - 1 + dq.read, dq.write = 0, have +} diff --git a/vendor/github.com/cilium/ebpf/internal/endian_be.go b/vendor/github.com/cilium/ebpf/internal/endian_be.go index ad33cda851..a37777f21f 100644 --- a/vendor/github.com/cilium/ebpf/internal/endian_be.go +++ b/vendor/github.com/cilium/ebpf/internal/endian_be.go @@ -1,5 +1,4 @@ //go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64 -// +build armbe arm64be mips mips64 mips64p32 ppc64 s390 s390x sparc sparc64 package internal @@ -7,7 +6,4 @@ import "encoding/binary" // NativeEndian is set to either binary.BigEndian or binary.LittleEndian, // depending on the host's endianness. -var NativeEndian binary.ByteOrder = binary.BigEndian - -// ClangEndian is set to either "el" or "eb" depending on the host's endianness. -const ClangEndian = "eb" +var NativeEndian = binary.BigEndian diff --git a/vendor/github.com/cilium/ebpf/internal/endian_le.go b/vendor/github.com/cilium/ebpf/internal/endian_le.go index 41a68224c8..6dcd916d5d 100644 --- a/vendor/github.com/cilium/ebpf/internal/endian_le.go +++ b/vendor/github.com/cilium/ebpf/internal/endian_le.go @@ -1,5 +1,4 @@ -//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64 -// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64 +//go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64 package internal @@ -7,7 +6,4 @@ import "encoding/binary" // NativeEndian is set to either binary.BigEndian or binary.LittleEndian, // depending on the host's endianness. -var NativeEndian binary.ByteOrder = binary.LittleEndian - -// ClangEndian is set to either "el" or "eb" depending on the host's endianness. -const ClangEndian = "el" +var NativeEndian = binary.LittleEndian diff --git a/vendor/github.com/cilium/ebpf/internal/errors.go b/vendor/github.com/cilium/ebpf/internal/errors.go index b5ccdd7d05..bda01e2fde 100644 --- a/vendor/github.com/cilium/ebpf/internal/errors.go +++ b/vendor/github.com/cilium/ebpf/internal/errors.go @@ -7,32 +7,25 @@ import ( "strings" ) -// ErrorWithLog returns an error which includes logs from the kernel verifier. +// ErrorWithLog wraps err in a VerifierError that includes the parsed verifier +// log buffer. // // The default error output is a summary of the full log. The latter can be // accessed via VerifierError.Log or by formatting the error, see Format. -// -// A set of heuristics is used to determine whether the log has been truncated. -func ErrorWithLog(err error, log []byte) *VerifierError { +func ErrorWithLog(source string, err error, log []byte, truncated bool) *VerifierError { const whitespace = "\t\r\v\n " // Convert verifier log C string by truncating it on the first 0 byte // and trimming trailing whitespace before interpreting as a Go string. - truncated := false if i := bytes.IndexByte(log, 0); i != -1 { - if i == len(log)-1 && !bytes.HasSuffix(log[:i], []byte{'\n'}) { - // The null byte is at the end of the buffer and it's not preceded - // by a newline character. Most likely the buffer was too short. - truncated = true - } - log = log[:i] - } else if len(log) > 0 { - // No null byte? Dodgy! - truncated = true } log = bytes.Trim(log, whitespace) + if len(log) == 0 { + return &VerifierError{source, err, nil, truncated} + } + logLines := bytes.Split(log, []byte{'\n'}) lines := make([]string, 0, len(logLines)) for _, line := range logLines { @@ -41,13 +34,14 @@ func ErrorWithLog(err error, log []byte) *VerifierError { lines = append(lines, string(bytes.TrimRight(line, whitespace))) } - return &VerifierError{err, lines, truncated} + return &VerifierError{source, err, lines, truncated} } // VerifierError includes information from the eBPF verifier. // // It summarises the log output, see Format if you want to output the full contents. type VerifierError struct { + source string // The error which caused this error. Cause error // The verifier output split into lines. @@ -67,9 +61,12 @@ func (le *VerifierError) Error() string { log = log[:n-1] } + var b strings.Builder + fmt.Fprintf(&b, "%s: %s", le.source, le.Cause.Error()) + n := len(log) if n == 0 { - return le.Cause.Error() + return b.String() } lines := log[n-1:] @@ -78,14 +75,9 @@ func (le *VerifierError) Error() string { lines = log[n-2:] } - var b strings.Builder - fmt.Fprintf(&b, "%s: ", le.Cause.Error()) - - for i, line := range lines { + for _, line := range lines { + b.WriteString(": ") b.WriteString(strings.TrimSpace(line)) - if i != len(lines)-1 { - b.WriteString(": ") - } } omitted := len(le.Log) - len(lines) @@ -143,8 +135,8 @@ func includePreviousLine(line string) bool { // Understood verbs are %s and %v, which are equivalent to calling Error(). %v // allows outputting additional information using the following flags: // -// + Output the first lines, or all lines if no width is given. -// - Output the last lines, or all lines if no width is given. +// %+v: Output the first lines, or all lines if no width is given. +// %-v: Output the last lines, or all lines if no width is given. // // Use width to specify how many lines to output. Use the '-' flag to output // lines from the end of the log instead of the beginning. @@ -174,7 +166,7 @@ func (le *VerifierError) Format(f fmt.State, verb rune) { return } - fmt.Fprintf(f, "%s:", le.Cause.Error()) + fmt.Fprintf(f, "%s: %s:", le.source, le.Cause.Error()) omitted := len(le.Log) - n lines := le.Log[:n] diff --git a/vendor/github.com/cilium/ebpf/internal/feature.go b/vendor/github.com/cilium/ebpf/internal/feature.go index 0a6c2d1d52..2b856c735e 100644 --- a/vendor/github.com/cilium/ebpf/internal/feature.go +++ b/vendor/github.com/cilium/ebpf/internal/feature.go @@ -31,10 +31,20 @@ func (ufe *UnsupportedFeatureError) Is(target error) bool { return target == ErrNotSupported } -type featureTest struct { - sync.RWMutex - successful bool - result error +// FeatureTest caches the result of a [FeatureTestFn]. +// +// Fields should not be modified after creation. +type FeatureTest struct { + // The name of the feature being detected. + Name string + // Version in the form Major.Minor[.Patch]. + Version string + // The feature test itself. + Fn FeatureTestFn + + mu sync.RWMutex + done bool + result error } // FeatureTestFn is used to determine whether the kernel supports @@ -42,59 +52,133 @@ type featureTest struct { // // The return values have the following semantics: // -// err == ErrNotSupported: the feature is not available -// err == nil: the feature is available -// err != nil: the test couldn't be executed +// err == ErrNotSupported: the feature is not available +// err == nil: the feature is available +// err != nil: the test couldn't be executed type FeatureTestFn func() error -// FeatureTest wraps a function so that it is run at most once. +// NewFeatureTest is a convenient way to create a single [FeatureTest]. +func NewFeatureTest(name, version string, fn FeatureTestFn) func() error { + ft := &FeatureTest{ + Name: name, + Version: version, + Fn: fn, + } + + return ft.execute +} + +// execute the feature test. // -// name should identify the tested feature, while version must be in the -// form Major.Minor[.Patch]. +// The result is cached if the test is conclusive. // -// Returns an error wrapping ErrNotSupported if the feature is not supported. -func FeatureTest(name, version string, fn FeatureTestFn) func() error { - ft := new(featureTest) - return func() error { - ft.RLock() - if ft.successful { - defer ft.RUnlock() - return ft.result - } - ft.RUnlock() - ft.Lock() - defer ft.Unlock() - // check one more time on the off - // chance that two go routines - // were able to call into the write - // lock - if ft.successful { - return ft.result - } - err := fn() - switch { - case errors.Is(err, ErrNotSupported): - v, err := NewVersion(version) - if err != nil { - return err - } +// See [FeatureTestFn] for the meaning of the returned error. +func (ft *FeatureTest) execute() error { + ft.mu.RLock() + result, done := ft.result, ft.done + ft.mu.RUnlock() - ft.result = &UnsupportedFeatureError{ - MinimumVersion: v, - Name: name, - } - fallthrough + if done { + return result + } + + ft.mu.Lock() + defer ft.mu.Unlock() + + // The test may have been executed by another caller while we were + // waiting to acquire ft.mu. + if ft.done { + return ft.result + } + + err := ft.Fn() + if err == nil { + ft.done = true + return nil + } - case err == nil: - ft.successful = true + if errors.Is(err, ErrNotSupported) { + var v Version + if ft.Version != "" { + v, err = NewVersion(ft.Version) + if err != nil { + return fmt.Errorf("feature %s: %w", ft.Name, err) + } + } - default: - // We couldn't execute the feature test to a point - // where it could make a determination. - // Don't cache the result, just return it. - return fmt.Errorf("detect support for %s: %w", name, err) + ft.done = true + ft.result = &UnsupportedFeatureError{ + MinimumVersion: v, + Name: ft.Name, } return ft.result } + + // We couldn't execute the feature test to a point + // where it could make a determination. + // Don't cache the result, just return it. + return fmt.Errorf("detect support for %s: %w", ft.Name, err) +} + +// FeatureMatrix groups multiple related feature tests into a map. +// +// Useful when there is a small number of discrete features which are known +// at compile time. +// +// It must not be modified concurrently with calling [FeatureMatrix.Result]. +type FeatureMatrix[K comparable] map[K]*FeatureTest + +// Result returns the outcome of the feature test for the given key. +// +// It's safe to call this function concurrently. +func (fm FeatureMatrix[K]) Result(key K) error { + ft, ok := fm[key] + if !ok { + return fmt.Errorf("no feature probe for %v", key) + } + + return ft.execute() +} + +// FeatureCache caches a potentially unlimited number of feature probes. +// +// Useful when there is a high cardinality for a feature test. +type FeatureCache[K comparable] struct { + mu sync.RWMutex + newTest func(K) *FeatureTest + features map[K]*FeatureTest +} + +func NewFeatureCache[K comparable](newTest func(K) *FeatureTest) *FeatureCache[K] { + return &FeatureCache[K]{ + newTest: newTest, + features: make(map[K]*FeatureTest), + } +} + +func (fc *FeatureCache[K]) Result(key K) error { + // NB: Executing the feature test happens without fc.mu taken. + return fc.retrieve(key).execute() +} + +func (fc *FeatureCache[K]) retrieve(key K) *FeatureTest { + fc.mu.RLock() + ft := fc.features[key] + fc.mu.RUnlock() + + if ft != nil { + return ft + } + + fc.mu.Lock() + defer fc.mu.Unlock() + + if ft := fc.features[key]; ft != nil { + return ft + } + + ft = fc.newTest(key) + fc.features[key] = ft + return ft } diff --git a/vendor/github.com/cilium/ebpf/internal/io.go b/vendor/github.com/cilium/ebpf/internal/io.go index 30b6641f07..1eaf4775ad 100644 --- a/vendor/github.com/cilium/ebpf/internal/io.go +++ b/vendor/github.com/cilium/ebpf/internal/io.go @@ -2,10 +2,14 @@ package internal import ( "bufio" + "bytes" "compress/gzip" "errors" + "fmt" "io" "os" + "path/filepath" + "sync" ) // NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized @@ -60,3 +64,65 @@ func ReadAllCompressed(file string) ([]byte, error) { return io.ReadAll(gz) } + +// ReadUint64FromFile reads a uint64 from a file. +// +// format specifies the contents of the file in fmt.Scanf syntax. +func ReadUint64FromFile(format string, path ...string) (uint64, error) { + filename := filepath.Join(path...) + data, err := os.ReadFile(filename) + if err != nil { + return 0, fmt.Errorf("reading file %q: %w", filename, err) + } + + var value uint64 + n, err := fmt.Fscanf(bytes.NewReader(data), format, &value) + if err != nil { + return 0, fmt.Errorf("parsing file %q: %w", filename, err) + } + if n != 1 { + return 0, fmt.Errorf("parsing file %q: expected 1 item, got %d", filename, n) + } + + return value, nil +} + +type uint64FromFileKey struct { + format, path string +} + +var uint64FromFileCache = struct { + sync.RWMutex + values map[uint64FromFileKey]uint64 +}{ + values: map[uint64FromFileKey]uint64{}, +} + +// ReadUint64FromFileOnce is like readUint64FromFile but memoizes the result. +func ReadUint64FromFileOnce(format string, path ...string) (uint64, error) { + filename := filepath.Join(path...) + key := uint64FromFileKey{format, filename} + + uint64FromFileCache.RLock() + if value, ok := uint64FromFileCache.values[key]; ok { + uint64FromFileCache.RUnlock() + return value, nil + } + uint64FromFileCache.RUnlock() + + value, err := ReadUint64FromFile(format, filename) + if err != nil { + return 0, err + } + + uint64FromFileCache.Lock() + defer uint64FromFileCache.Unlock() + + if value, ok := uint64FromFileCache.values[key]; ok { + // Someone else got here before us, use what is cached. + return value, nil + } + + uint64FromFileCache.values[key] = value + return value, nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/kallsyms/kallsyms.go b/vendor/github.com/cilium/ebpf/internal/kallsyms/kallsyms.go new file mode 100644 index 0000000000..776c7a10a2 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/kallsyms/kallsyms.go @@ -0,0 +1,74 @@ +package kallsyms + +import ( + "bufio" + "bytes" + "io" + "os" + "sync" +) + +var kernelModules struct { + sync.RWMutex + // function to kernel module mapping + kmods map[string]string +} + +// KernelModule returns the kernel module, if any, a probe-able function is contained in. +func KernelModule(fn string) (string, error) { + kernelModules.RLock() + kmods := kernelModules.kmods + kernelModules.RUnlock() + + if kmods == nil { + kernelModules.Lock() + defer kernelModules.Unlock() + kmods = kernelModules.kmods + } + + if kmods != nil { + return kmods[fn], nil + } + + f, err := os.Open("/proc/kallsyms") + if err != nil { + return "", err + } + defer f.Close() + kmods, err = loadKernelModuleMapping(f) + if err != nil { + return "", err + } + + kernelModules.kmods = kmods + return kmods[fn], nil +} + +// FlushKernelModuleCache removes any cached information about function to kernel module mapping. +func FlushKernelModuleCache() { + kernelModules.Lock() + defer kernelModules.Unlock() + + kernelModules.kmods = nil +} + +func loadKernelModuleMapping(f io.Reader) (map[string]string, error) { + mods := make(map[string]string) + scanner := bufio.NewScanner(f) + for scanner.Scan() { + fields := bytes.Fields(scanner.Bytes()) + if len(fields) < 4 { + continue + } + switch string(fields[1]) { + case "t", "T": + mods[string(fields[2])] = string(bytes.Trim(fields[3], "[]")) + default: + continue + } + } + if scanner.Err() != nil { + return nil, scanner.Err() + } + return mods, nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/kconfig/kconfig.go b/vendor/github.com/cilium/ebpf/internal/kconfig/kconfig.go new file mode 100644 index 0000000000..1921e4f15a --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/kconfig/kconfig.go @@ -0,0 +1,293 @@ +package kconfig + +import ( + "bufio" + "bytes" + "compress/gzip" + "fmt" + "io" + "math" + "os" + "strconv" + "strings" + + "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal" +) + +// Find find a kconfig file on the host. +// It first reads from /boot/config- of the current running kernel and tries +// /proc/config.gz if nothing was found in /boot. +// If none of the file provide a kconfig, it returns an error. +func Find() (*os.File, error) { + kernelRelease, err := internal.KernelRelease() + if err != nil { + return nil, fmt.Errorf("cannot get kernel release: %w", err) + } + + path := "/boot/config-" + kernelRelease + f, err := os.Open(path) + if err == nil { + return f, nil + } + + f, err = os.Open("/proc/config.gz") + if err == nil { + return f, nil + } + + return nil, fmt.Errorf("neither %s nor /proc/config.gz provide a kconfig", path) +} + +// Parse parses the kconfig file for which a reader is given. +// All the CONFIG_* which are in filter and which are set set will be +// put in the returned map as key with their corresponding value as map value. +// If filter is nil, no filtering will occur. +// If the kconfig file is not valid, error will be returned. +func Parse(source io.ReaderAt, filter map[string]struct{}) (map[string]string, error) { + var r io.Reader + zr, err := gzip.NewReader(io.NewSectionReader(source, 0, math.MaxInt64)) + if err != nil { + r = io.NewSectionReader(source, 0, math.MaxInt64) + } else { + // Source is gzip compressed, transparently decompress. + r = zr + } + + ret := make(map[string]string, len(filter)) + + s := bufio.NewScanner(r) + + for s.Scan() { + line := s.Bytes() + err = processKconfigLine(line, ret, filter) + if err != nil { + return nil, fmt.Errorf("cannot parse line: %w", err) + } + + if filter != nil && len(ret) == len(filter) { + break + } + } + + if err := s.Err(); err != nil { + return nil, fmt.Errorf("cannot parse: %w", err) + } + + if zr != nil { + return ret, zr.Close() + } + + return ret, nil +} + +// Golang translation of libbpf bpf_object__process_kconfig_line(): +// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/libbpf.c#L1874 +// It does the same checks but does not put the data inside the BPF map. +func processKconfigLine(line []byte, m map[string]string, filter map[string]struct{}) error { + // Ignore empty lines and "# CONFIG_* is not set". + if !bytes.HasPrefix(line, []byte("CONFIG_")) { + return nil + } + + key, value, found := bytes.Cut(line, []byte{'='}) + if !found { + return fmt.Errorf("line %q does not contain separator '='", line) + } + + if len(value) == 0 { + return fmt.Errorf("line %q has no value", line) + } + + if filter != nil { + // NB: map[string(key)] gets special optimisation help from the compiler + // and doesn't allocate. Don't turn this into a variable. + _, ok := filter[string(key)] + if !ok { + return nil + } + } + + // This can seem odd, but libbpf only sets the value the first time the key is + // met: + // https://github.com/torvalds/linux/blob/0d85b27b0cc6/tools/lib/bpf/libbpf.c#L1906-L1908 + _, ok := m[string(key)] + if !ok { + m[string(key)] = string(value) + } + + return nil +} + +// PutValue translates the value given as parameter depending on the BTF +// type, the translated value is then written to the byte array. +func PutValue(data []byte, typ btf.Type, value string) error { + typ = btf.UnderlyingType(typ) + + switch value { + case "y", "n", "m": + return putValueTri(data, typ, value) + default: + if strings.HasPrefix(value, `"`) { + return putValueString(data, typ, value) + } + return putValueNumber(data, typ, value) + } +} + +// Golang translation of libbpf_tristate enum: +// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/bpf_helpers.h#L169 +type triState int + +const ( + TriNo triState = 0 + TriYes triState = 1 + TriModule triState = 2 +) + +func putValueTri(data []byte, typ btf.Type, value string) error { + switch v := typ.(type) { + case *btf.Int: + if v.Encoding != btf.Bool { + return fmt.Errorf("cannot add tri value, expected btf.Bool, got: %v", v.Encoding) + } + + if v.Size != 1 { + return fmt.Errorf("cannot add tri value, expected size of 1 byte, got: %d", v.Size) + } + + switch value { + case "y": + data[0] = 1 + case "n": + data[0] = 0 + default: + return fmt.Errorf("cannot use %q for btf.Bool", value) + } + case *btf.Enum: + if v.Name != "libbpf_tristate" { + return fmt.Errorf("cannot use enum %q, only libbpf_tristate is supported", v.Name) + } + + var tri triState + switch value { + case "y": + tri = TriYes + case "m": + tri = TriModule + case "n": + tri = TriNo + default: + return fmt.Errorf("value %q is not support for libbpf_tristate", value) + } + + internal.NativeEndian.PutUint64(data, uint64(tri)) + default: + return fmt.Errorf("cannot add number value, expected btf.Int or btf.Enum, got: %T", v) + } + + return nil +} + +func putValueString(data []byte, typ btf.Type, value string) error { + array, ok := typ.(*btf.Array) + if !ok { + return fmt.Errorf("cannot add string value, expected btf.Array, got %T", array) + } + + contentType, ok := btf.UnderlyingType(array.Type).(*btf.Int) + if !ok { + return fmt.Errorf("cannot add string value, expected array of btf.Int, got %T", contentType) + } + + // Any Int, which is not bool, of one byte could be used to store char: + // https://github.com/torvalds/linux/blob/1a5304fecee5/tools/lib/bpf/libbpf.c#L3637-L3638 + if contentType.Size != 1 && contentType.Encoding != btf.Bool { + return fmt.Errorf("cannot add string value, expected array of btf.Int of size 1, got array of btf.Int of size: %v", contentType.Size) + } + + if !strings.HasPrefix(value, `"`) || !strings.HasSuffix(value, `"`) { + return fmt.Errorf(`value %q must start and finish with '"'`, value) + } + + str := strings.Trim(value, `"`) + + // We need to trim string if the bpf array is smaller. + if uint32(len(str)) >= array.Nelems { + str = str[:array.Nelems] + } + + // Write the string content to .kconfig. + copy(data, str) + + return nil +} + +func putValueNumber(data []byte, typ btf.Type, value string) error { + integer, ok := typ.(*btf.Int) + if !ok { + return fmt.Errorf("cannot add number value, expected *btf.Int, got: %T", integer) + } + + size := integer.Size + sizeInBits := size * 8 + + var n uint64 + var err error + if integer.Encoding == btf.Signed { + parsed, e := strconv.ParseInt(value, 0, int(sizeInBits)) + + n = uint64(parsed) + err = e + } else { + parsed, e := strconv.ParseUint(value, 0, int(sizeInBits)) + + n = uint64(parsed) + err = e + } + + if err != nil { + return fmt.Errorf("cannot parse value: %w", err) + } + + return PutInteger(data, integer, n) +} + +// PutInteger writes n into data. +// +// integer determines how much is written into data and what the valid values +// are. +func PutInteger(data []byte, integer *btf.Int, n uint64) error { + // This function should match set_kcfg_value_num in libbpf. + if integer.Encoding == btf.Bool && n > 1 { + return fmt.Errorf("invalid boolean value: %d", n) + } + + if len(data) < int(integer.Size) { + return fmt.Errorf("can't fit an integer of size %d into a byte slice of length %d", integer.Size, len(data)) + } + + switch integer.Size { + case 1: + if integer.Encoding == btf.Signed && (int64(n) > math.MaxInt8 || int64(n) < math.MinInt8) { + return fmt.Errorf("can't represent %d as a signed integer of size %d", int64(n), integer.Size) + } + data[0] = byte(n) + case 2: + if integer.Encoding == btf.Signed && (int64(n) > math.MaxInt16 || int64(n) < math.MinInt16) { + return fmt.Errorf("can't represent %d as a signed integer of size %d", int64(n), integer.Size) + } + internal.NativeEndian.PutUint16(data, uint16(n)) + case 4: + if integer.Encoding == btf.Signed && (int64(n) > math.MaxInt32 || int64(n) < math.MinInt32) { + return fmt.Errorf("can't represent %d as a signed integer of size %d", int64(n), integer.Size) + } + internal.NativeEndian.PutUint32(data, uint32(n)) + case 8: + internal.NativeEndian.PutUint64(data, uint64(n)) + default: + return fmt.Errorf("size (%d) is not valid, expected: 1, 2, 4 or 8", integer.Size) + } + + return nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/math.go b/vendor/github.com/cilium/ebpf/internal/math.go new file mode 100644 index 0000000000..e95c8efde5 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/math.go @@ -0,0 +1,13 @@ +package internal + +import "golang.org/x/exp/constraints" + +// Align returns 'n' updated to 'alignment' boundary. +func Align[I constraints.Integer](n, alignment I) I { + return (n + alignment - 1) / alignment * alignment +} + +// IsPow returns true if n is a power of two. +func IsPow[I constraints.Integer](n I) bool { + return n != 0 && (n&(n-1)) == 0 +} diff --git a/vendor/github.com/cilium/ebpf/internal/output.go b/vendor/github.com/cilium/ebpf/internal/output.go index aeab37fcfa..dd6e6cbafe 100644 --- a/vendor/github.com/cilium/ebpf/internal/output.go +++ b/vendor/github.com/cilium/ebpf/internal/output.go @@ -6,6 +6,7 @@ import ( "go/format" "go/scanner" "io" + "reflect" "strings" "unicode" ) @@ -82,3 +83,15 @@ func WriteFormatted(src []byte, out io.Writer) error { return nel } + +// GoTypeName is like %T, but elides the package name. +// +// Pointers to a type are peeled off. +func GoTypeName(t any) string { + rT := reflect.TypeOf(t) + for rT.Kind() == reflect.Pointer { + rT = rT.Elem() + } + // Doesn't return the correct Name for generic types due to https://github.com/golang/go/issues/55924 + return rT.Name() +} diff --git a/vendor/github.com/cilium/ebpf/internal/pinning.go b/vendor/github.com/cilium/ebpf/internal/pinning.go index c711353c3e..01d892f934 100644 --- a/vendor/github.com/cilium/ebpf/internal/pinning.go +++ b/vendor/github.com/cilium/ebpf/internal/pinning.go @@ -6,15 +6,12 @@ import ( "os" "path/filepath" "runtime" - "unsafe" "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) func Pin(currentPath, newPath string, fd *sys.FD) error { - const bpfFSType = 0xcafe4a11 - if newPath == "" { return errors.New("given pinning path cannot be empty") } @@ -22,20 +19,11 @@ func Pin(currentPath, newPath string, fd *sys.FD) error { return nil } - var statfs unix.Statfs_t - if err := unix.Statfs(filepath.Dir(newPath), &statfs); err != nil { + fsType, err := FSType(filepath.Dir(newPath)) + if err != nil { return err } - - fsType := int64(statfs.Type) - if unsafe.Sizeof(statfs.Type) == 4 { - // We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a - // negative number when interpreted as int32 so we need to cast via - // uint32 to avoid sign extension. - fsType = int64(uint32(statfs.Type)) - } - - if fsType != bpfFSType { + if fsType != unix.BPF_FS_MAGIC { return fmt.Errorf("%s is not on a bpf filesystem", newPath) } @@ -50,7 +38,7 @@ func Pin(currentPath, newPath string, fd *sys.FD) error { // Renameat2 is used instead of os.Rename to disallow the new path replacing // an existing path. - err := unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE) + err = unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE) if err == nil { // Object is now moved to the new pinning path. return nil diff --git a/vendor/github.com/cilium/ebpf/internal/platform.go b/vendor/github.com/cilium/ebpf/internal/platform.go new file mode 100644 index 0000000000..6e90f2ef71 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/platform.go @@ -0,0 +1,43 @@ +package internal + +import ( + "runtime" +) + +// PlatformPrefix returns the platform-dependent syscall wrapper prefix used by +// the linux kernel. +// +// Based on https://github.com/golang/go/blob/master/src/go/build/syslist.go +// and https://github.com/libbpf/libbpf/blob/master/src/libbpf.c#L10047 +func PlatformPrefix() string { + switch runtime.GOARCH { + case "386": + return "__ia32_" + case "amd64", "amd64p32": + return "__x64_" + + case "arm", "armbe": + return "__arm_" + case "arm64", "arm64be": + return "__arm64_" + + case "mips", "mipsle", "mips64", "mips64le", "mips64p32", "mips64p32le": + return "__mips_" + + case "s390": + return "__s390_" + case "s390x": + return "__s390x_" + + case "riscv", "riscv64": + return "__riscv_" + + case "ppc": + return "__powerpc_" + case "ppc64", "ppc64le": + return "__powerpc64_" + + default: + return "" + } +} diff --git a/vendor/github.com/cilium/ebpf/internal/prog.go b/vendor/github.com/cilium/ebpf/internal/prog.go new file mode 100644 index 0000000000..d629145b62 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/prog.go @@ -0,0 +1,11 @@ +package internal + +// EmptyBPFContext is the smallest-possible BPF input context to be used for +// invoking `Program.{Run,Benchmark,Test}`. +// +// Programs require a context input buffer of at least 15 bytes. Looking in +// net/bpf/test_run.c, bpf_test_init() requires that the input is at least +// ETH_HLEN (14) bytes. As of Linux commit fd18942 ("bpf: Don't redirect packets +// with invalid pkt_len"), it also requires the skb to be non-empty after +// removing the Layer 2 header. +var EmptyBPFContext = make([]byte, 15) diff --git a/vendor/github.com/cilium/ebpf/internal/statfs.go b/vendor/github.com/cilium/ebpf/internal/statfs.go new file mode 100644 index 0000000000..44c02d676e --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/statfs.go @@ -0,0 +1,23 @@ +package internal + +import ( + "unsafe" + + "github.com/cilium/ebpf/internal/unix" +) + +func FSType(path string) (int64, error) { + var statfs unix.Statfs_t + if err := unix.Statfs(path, &statfs); err != nil { + return 0, err + } + + fsType := int64(statfs.Type) + if unsafe.Sizeof(statfs.Type) == 4 { + // We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a + // negative number when interpreted as int32 so we need to cast via + // uint32 to avoid sign extension. + fsType = int64(uint32(statfs.Type)) + } + return fsType, nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/fd.go b/vendor/github.com/cilium/ebpf/internal/sys/fd.go index 65517d45e2..941a56fb91 100644 --- a/vendor/github.com/cilium/ebpf/internal/sys/fd.go +++ b/vendor/github.com/cilium/ebpf/internal/sys/fd.go @@ -17,11 +17,39 @@ type FD struct { } func newFD(value int) *FD { + if onLeakFD != nil { + // Attempt to store the caller's stack for the given fd value. + // Panic if fds contains an existing stack for the fd. + old, exist := fds.LoadOrStore(value, callersFrames()) + if exist { + f := old.(*runtime.Frames) + panic(fmt.Sprintf("found existing stack for fd %d:\n%s", value, FormatFrames(f))) + } + } + fd := &FD{value} - runtime.SetFinalizer(fd, (*FD).Close) + runtime.SetFinalizer(fd, (*FD).finalize) return fd } +// finalize is set as the FD's runtime finalizer and +// sends a leak trace before calling FD.Close(). +func (fd *FD) finalize() { + if fd.raw < 0 { + return + } + + // Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback + // is invoked at most once for one sys.FD allocation, runtime.Frames can only + // be unwound once. + f, ok := fds.LoadAndDelete(fd.Int()) + if ok && onLeakFD != nil { + onLeakFD(f.(*runtime.Frames)) + } + + _ = fd.Close() +} + // NewFD wraps a raw fd with a finalizer. // // You must not use the raw fd after calling this function, since the underlying @@ -64,15 +92,16 @@ func (fd *FD) Close() error { return nil } + return unix.Close(fd.disown()) +} + +func (fd *FD) disown() int { value := int(fd.raw) + fds.Delete(int(value)) fd.raw = -1 - fd.Forget() - return unix.Close(value) -} - -func (fd *FD) Forget() { runtime.SetFinalizer(fd, nil) + return value } func (fd *FD) Dup() (*FD, error) { @@ -90,7 +119,15 @@ func (fd *FD) Dup() (*FD, error) { return newFD(dup), nil } +// File takes ownership of FD and turns it into an [*os.File]. +// +// You must not use the FD after the call returns. +// +// Returns nil if the FD is not valid. func (fd *FD) File(name string) *os.File { - fd.Forget() - return os.NewFile(uintptr(fd.raw), name) + if fd.raw < 0 { + return nil + } + + return os.NewFile(uintptr(fd.disown()), name) } diff --git a/vendor/github.com/cilium/ebpf/internal/sys/fd_trace.go b/vendor/github.com/cilium/ebpf/internal/sys/fd_trace.go new file mode 100644 index 0000000000..cd50dd1f64 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/fd_trace.go @@ -0,0 +1,93 @@ +package sys + +import ( + "bytes" + "fmt" + "runtime" + "sync" +) + +// OnLeakFD controls tracing [FD] lifetime to detect resources that are not +// closed by Close(). +// +// If fn is not nil, tracing is enabled for all FDs created going forward. fn is +// invoked for all FDs that are closed by the garbage collector instead of an +// explicit Close() by a caller. Calling OnLeakFD twice with a non-nil fn +// (without disabling tracing in the meantime) will cause a panic. +// +// If fn is nil, tracing will be disabled. Any FDs that have not been closed are +// considered to be leaked, fn will be invoked for them, and the process will be +// terminated. +// +// fn will be invoked at most once for every unique sys.FD allocation since a +// runtime.Frames can only be unwound once. +func OnLeakFD(fn func(*runtime.Frames)) { + // Enable leak tracing if new fn is provided. + if fn != nil { + if onLeakFD != nil { + panic("OnLeakFD called twice with non-nil fn") + } + + onLeakFD = fn + return + } + + // fn is nil past this point. + + if onLeakFD == nil { + return + } + + // Call onLeakFD for all open fds. + if fs := flushFrames(); len(fs) != 0 { + for _, f := range fs { + onLeakFD(f) + } + } + + onLeakFD = nil +} + +var onLeakFD func(*runtime.Frames) + +// fds is a registry of all file descriptors wrapped into sys.fds that were +// created while an fd tracer was active. +var fds sync.Map // map[int]*runtime.Frames + +// flushFrames removes all elements from fds and returns them as a slice. This +// deals with the fact that a runtime.Frames can only be unwound once using +// Next(). +func flushFrames() []*runtime.Frames { + var frames []*runtime.Frames + fds.Range(func(key, value any) bool { + frames = append(frames, value.(*runtime.Frames)) + fds.Delete(key) + return true + }) + return frames +} + +func callersFrames() *runtime.Frames { + c := make([]uintptr, 32) + + // Skip runtime.Callers and this function. + i := runtime.Callers(2, c) + if i == 0 { + return nil + } + + return runtime.CallersFrames(c) +} + +// FormatFrames formats a runtime.Frames as a human-readable string. +func FormatFrames(fs *runtime.Frames) string { + var b bytes.Buffer + for { + f, more := fs.Next() + b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line)) + if !more { + break + } + } + return b.String() +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/mapflags_string.go b/vendor/github.com/cilium/ebpf/internal/sys/mapflags_string.go new file mode 100644 index 0000000000..d9fe217222 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/mapflags_string.go @@ -0,0 +1,53 @@ +// Code generated by "stringer -type MapFlags"; DO NOT EDIT. + +package sys + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[BPF_F_NO_PREALLOC-1] + _ = x[BPF_F_NO_COMMON_LRU-2] + _ = x[BPF_F_NUMA_NODE-4] + _ = x[BPF_F_RDONLY-8] + _ = x[BPF_F_WRONLY-16] + _ = x[BPF_F_STACK_BUILD_ID-32] + _ = x[BPF_F_ZERO_SEED-64] + _ = x[BPF_F_RDONLY_PROG-128] + _ = x[BPF_F_WRONLY_PROG-256] + _ = x[BPF_F_CLONE-512] + _ = x[BPF_F_MMAPABLE-1024] + _ = x[BPF_F_PRESERVE_ELEMS-2048] + _ = x[BPF_F_INNER_MAP-4096] + _ = x[BPF_F_LINK-8192] + _ = x[BPF_F_PATH_FD-16384] +} + +const _MapFlags_name = "BPF_F_NO_PREALLOCBPF_F_NO_COMMON_LRUBPF_F_NUMA_NODEBPF_F_RDONLYBPF_F_WRONLYBPF_F_STACK_BUILD_IDBPF_F_ZERO_SEEDBPF_F_RDONLY_PROGBPF_F_WRONLY_PROGBPF_F_CLONEBPF_F_MMAPABLEBPF_F_PRESERVE_ELEMSBPF_F_INNER_MAPBPF_F_LINKBPF_F_PATH_FD" + +var _MapFlags_map = map[MapFlags]string{ + 1: _MapFlags_name[0:17], + 2: _MapFlags_name[17:36], + 4: _MapFlags_name[36:51], + 8: _MapFlags_name[51:63], + 16: _MapFlags_name[63:75], + 32: _MapFlags_name[75:95], + 64: _MapFlags_name[95:110], + 128: _MapFlags_name[110:127], + 256: _MapFlags_name[127:144], + 512: _MapFlags_name[144:155], + 1024: _MapFlags_name[155:169], + 2048: _MapFlags_name[169:189], + 4096: _MapFlags_name[189:204], + 8192: _MapFlags_name[204:214], + 16384: _MapFlags_name[214:227], +} + +func (i MapFlags) String() string { + if str, ok := _MapFlags_map[i]; ok { + return str + } + return "MapFlags(" + strconv.FormatInt(int64(i), 10) + ")" +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/ptr.go b/vendor/github.com/cilium/ebpf/internal/sys/ptr.go index a221006888..e9bb590597 100644 --- a/vendor/github.com/cilium/ebpf/internal/sys/ptr.go +++ b/vendor/github.com/cilium/ebpf/internal/sys/ptr.go @@ -20,7 +20,7 @@ func NewSlicePointer(buf []byte) Pointer { return Pointer{ptr: unsafe.Pointer(&buf[0])} } -// NewSlicePointer creates a 64-bit pointer from a byte slice. +// NewSlicePointerLen creates a 64-bit pointer from a byte slice. // // Useful to assign both the pointer and the length in one go. func NewSlicePointerLen(buf []byte) (Pointer, uint32) { @@ -36,3 +36,17 @@ func NewStringPointer(str string) Pointer { return Pointer{ptr: unsafe.Pointer(p)} } + +// NewStringSlicePointer allocates an array of Pointers to each string in the +// given slice of strings and returns a 64-bit pointer to the start of the +// resulting array. +// +// Use this function to pass arrays of strings as syscall arguments. +func NewStringSlicePointer(strings []string) Pointer { + sp := make([]Pointer, 0, len(strings)) + for _, s := range strings { + sp = append(sp, NewStringPointer(s)) + } + + return Pointer{ptr: unsafe.Pointer(&sp[0])} +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_be.go b/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_be.go index df903d780b..6278c79c9e 100644 --- a/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_be.go +++ b/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_be.go @@ -1,5 +1,4 @@ //go:build armbe || mips || mips64p32 -// +build armbe mips mips64p32 package sys diff --git a/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_le.go b/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_le.go index a6a51edb6e..c27b537e8e 100644 --- a/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_le.go +++ b/vendor/github.com/cilium/ebpf/internal/sys/ptr_32_le.go @@ -1,5 +1,4 @@ //go:build 386 || amd64p32 || arm || mipsle || mips64p32le -// +build 386 amd64p32 arm mipsle mips64p32le package sys diff --git a/vendor/github.com/cilium/ebpf/internal/sys/ptr_64.go b/vendor/github.com/cilium/ebpf/internal/sys/ptr_64.go index 7c0279e487..2d7828230a 100644 --- a/vendor/github.com/cilium/ebpf/internal/sys/ptr_64.go +++ b/vendor/github.com/cilium/ebpf/internal/sys/ptr_64.go @@ -1,5 +1,4 @@ //go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32 -// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le,!armbe,!mips,!mips64p32 package sys diff --git a/vendor/github.com/cilium/ebpf/internal/sys/signals.go b/vendor/github.com/cilium/ebpf/internal/sys/signals.go new file mode 100644 index 0000000000..e5337191d6 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sys/signals.go @@ -0,0 +1,83 @@ +package sys + +import ( + "fmt" + "runtime" + "unsafe" + + "github.com/cilium/ebpf/internal/unix" +) + +// A sigset containing only SIGPROF. +var profSet unix.Sigset_t + +func init() { + // See sigsetAdd for details on the implementation. Open coded here so + // that the compiler will check the constant calculations for us. + profSet.Val[sigprofBit/wordBits] |= 1 << (sigprofBit % wordBits) +} + +// maskProfilerSignal locks the calling goroutine to its underlying OS thread +// and adds SIGPROF to the thread's signal mask. This prevents pprof from +// interrupting expensive syscalls like e.g. BPF_PROG_LOAD. +// +// The caller must defer unmaskProfilerSignal() to reverse the operation. +func maskProfilerSignal() { + runtime.LockOSThread() + + if err := unix.PthreadSigmask(unix.SIG_BLOCK, &profSet, nil); err != nil { + runtime.UnlockOSThread() + panic(fmt.Errorf("masking profiler signal: %w", err)) + } +} + +// unmaskProfilerSignal removes SIGPROF from the underlying thread's signal +// mask, allowing it to be interrupted for profiling once again. +// +// It also unlocks the current goroutine from its underlying OS thread. +func unmaskProfilerSignal() { + defer runtime.UnlockOSThread() + + if err := unix.PthreadSigmask(unix.SIG_UNBLOCK, &profSet, nil); err != nil { + panic(fmt.Errorf("unmasking profiler signal: %w", err)) + } +} + +const ( + // Signal is the nth bit in the bitfield. + sigprofBit = int(unix.SIGPROF - 1) + // The number of bits in one Sigset_t word. + wordBits = int(unsafe.Sizeof(unix.Sigset_t{}.Val[0])) * 8 +) + +// sigsetAdd adds signal to set. +// +// Note: Sigset_t.Val's value type is uint32 or uint64 depending on the arch. +// This function must be able to deal with both and so must avoid any direct +// references to u32 or u64 types. +func sigsetAdd(set *unix.Sigset_t, signal unix.Signal) error { + if signal < 1 { + return fmt.Errorf("signal %d must be larger than 0", signal) + } + + // For amd64, runtime.sigaddset() performs the following operation: + // set[(signal-1)/32] |= 1 << ((uint32(signal) - 1) & 31) + // + // This trick depends on sigset being two u32's, causing a signal in the + // bottom 31 bits to be written to the low word if bit 32 is low, or the high + // word if bit 32 is high. + + // Signal is the nth bit in the bitfield. + bit := int(signal - 1) + // Word within the sigset the bit needs to be written to. + word := bit / wordBits + + if word >= len(set.Val) { + return fmt.Errorf("signal %d does not fit within unix.Sigset_t", signal) + } + + // Write the signal bit into its corresponding word at the corrected offset. + set.Val[word] |= 1 << (bit % wordBits) + + return nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/sys/syscall.go b/vendor/github.com/cilium/ebpf/internal/sys/syscall.go index 2a5935dc91..f6b6e93458 100644 --- a/vendor/github.com/cilium/ebpf/internal/sys/syscall.go +++ b/vendor/github.com/cilium/ebpf/internal/sys/syscall.go @@ -8,10 +8,22 @@ import ( "github.com/cilium/ebpf/internal/unix" ) +// ENOTSUPP is a Linux internal error code that has leaked into UAPI. +// +// It is not the same as ENOTSUP or EOPNOTSUPP. +const ENOTSUPP = syscall.Errno(524) + // BPF wraps SYS_BPF. // // Any pointers contained in attr must use the Pointer type from this package. func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) { + // Prevent the Go profiler from repeatedly interrupting the verifier, + // which could otherwise lead to a livelock due to receiving EAGAIN. + if cmd == BPF_PROG_LOAD || cmd == BPF_PROG_RUN { + maskProfilerSignal() + defer unmaskProfilerSignal() + } + for { r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size) runtime.KeepAlive(attr) @@ -33,10 +45,10 @@ func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) { // Info is implemented by all structs that can be passed to the ObjInfo syscall. // -// MapInfo -// ProgInfo -// LinkInfo -// BtfInfo +// MapInfo +// ProgInfo +// LinkInfo +// BtfInfo type Info interface { info() (unsafe.Pointer, uint32) } @@ -59,12 +71,52 @@ func (i *LinkInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } +func (i *TracingLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +func (i *CgroupLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +func (i *NetNsLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +func (i *XDPLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +func (i *TcxLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +func (i *NetfilterLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +func (i *NetkitLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +func (i *KprobeMultiLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + +func (i *KprobeLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + var _ Info = (*BtfInfo)(nil) func (i *BtfInfo) info() (unsafe.Pointer, uint32) { return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) } +func (i *PerfEventLinkInfo) info() (unsafe.Pointer, uint32) { + return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i)) +} + // ObjInfo retrieves information about a BPF Fd. // // info may be one of MapInfo, ProgInfo, LinkInfo and BtfInfo. @@ -90,12 +142,56 @@ func NewObjName(name string) ObjName { return result } +// LogLevel controls the verbosity of the kernel's eBPF program verifier. +type LogLevel uint32 + +const ( + BPF_LOG_LEVEL1 LogLevel = 1 << iota + BPF_LOG_LEVEL2 + BPF_LOG_STATS +) + // LinkID uniquely identifies a bpf_link. type LinkID uint32 // BTFID uniquely identifies a BTF blob loaded into the kernel. type BTFID uint32 +// TypeID identifies a type in a BTF blob. +type TypeID uint32 + +// MapFlags control map behaviour. +type MapFlags uint32 + +//go:generate go run golang.org/x/tools/cmd/stringer@latest -type MapFlags + +const ( + BPF_F_NO_PREALLOC MapFlags = 1 << iota + BPF_F_NO_COMMON_LRU + BPF_F_NUMA_NODE + BPF_F_RDONLY + BPF_F_WRONLY + BPF_F_STACK_BUILD_ID + BPF_F_ZERO_SEED + BPF_F_RDONLY_PROG + BPF_F_WRONLY_PROG + BPF_F_CLONE + BPF_F_MMAPABLE + BPF_F_PRESERVE_ELEMS + BPF_F_INNER_MAP + BPF_F_LINK + BPF_F_PATH_FD +) + +// Flags used by bpf_mprog. +const ( + BPF_F_REPLACE = 1 << (iota + 2) + BPF_F_BEFORE + BPF_F_AFTER + BPF_F_ID + BPF_F_LINK_MPROG = 1 << 13 // aka BPF_F_LINK +) + // wrappedErrno wraps syscall.Errno to prevent direct comparisons with // syscall.E* or unix.E* constants. // @@ -108,6 +204,13 @@ func (we wrappedErrno) Unwrap() error { return we.Errno } +func (we wrappedErrno) Error() string { + if we.Errno == ENOTSUPP { + return "operation not supported" + } + return we.Errno.Error() +} + type syscallError struct { error errno syscall.Errno diff --git a/vendor/github.com/cilium/ebpf/internal/sys/types.go b/vendor/github.com/cilium/ebpf/internal/sys/types.go index 291e3a6196..d2ae942668 100644 --- a/vendor/github.com/cilium/ebpf/internal/sys/types.go +++ b/vendor/github.com/cilium/ebpf/internal/sys/types.go @@ -6,14 +6,14 @@ import ( "unsafe" ) -type AdjRoomMode int32 +type AdjRoomMode uint32 const ( BPF_ADJ_ROOM_NET AdjRoomMode = 0 BPF_ADJ_ROOM_MAC AdjRoomMode = 1 ) -type AttachType int32 +type AttachType uint32 const ( BPF_CGROUP_INET_INGRESS AttachType = 0 @@ -59,10 +59,23 @@ const ( BPF_SK_REUSEPORT_SELECT_OR_MIGRATE AttachType = 40 BPF_PERF_EVENT AttachType = 41 BPF_TRACE_KPROBE_MULTI AttachType = 42 - __MAX_BPF_ATTACH_TYPE AttachType = 43 + BPF_LSM_CGROUP AttachType = 43 + BPF_STRUCT_OPS AttachType = 44 + BPF_NETFILTER AttachType = 45 + BPF_TCX_INGRESS AttachType = 46 + BPF_TCX_EGRESS AttachType = 47 + BPF_TRACE_UPROBE_MULTI AttachType = 48 + BPF_CGROUP_UNIX_CONNECT AttachType = 49 + BPF_CGROUP_UNIX_SENDMSG AttachType = 50 + BPF_CGROUP_UNIX_RECVMSG AttachType = 51 + BPF_CGROUP_UNIX_GETPEERNAME AttachType = 52 + BPF_CGROUP_UNIX_GETSOCKNAME AttachType = 53 + BPF_NETKIT_PRIMARY AttachType = 54 + BPF_NETKIT_PEER AttachType = 55 + __MAX_BPF_ATTACH_TYPE AttachType = 56 ) -type Cmd int32 +type Cmd uint32 const ( BPF_MAP_CREATE Cmd = 0 @@ -104,7 +117,7 @@ const ( BPF_PROG_BIND_MAP Cmd = 35 ) -type FunctionId int32 +type FunctionId uint32 const ( BPF_FUNC_unspec FunctionId = 0 @@ -301,17 +314,35 @@ const ( BPF_FUNC_copy_from_user_task FunctionId = 191 BPF_FUNC_skb_set_tstamp FunctionId = 192 BPF_FUNC_ima_file_hash FunctionId = 193 - __BPF_FUNC_MAX_ID FunctionId = 194 + BPF_FUNC_kptr_xchg FunctionId = 194 + BPF_FUNC_map_lookup_percpu_elem FunctionId = 195 + BPF_FUNC_skc_to_mptcp_sock FunctionId = 196 + BPF_FUNC_dynptr_from_mem FunctionId = 197 + BPF_FUNC_ringbuf_reserve_dynptr FunctionId = 198 + BPF_FUNC_ringbuf_submit_dynptr FunctionId = 199 + BPF_FUNC_ringbuf_discard_dynptr FunctionId = 200 + BPF_FUNC_dynptr_read FunctionId = 201 + BPF_FUNC_dynptr_write FunctionId = 202 + BPF_FUNC_dynptr_data FunctionId = 203 + BPF_FUNC_tcp_raw_gen_syncookie_ipv4 FunctionId = 204 + BPF_FUNC_tcp_raw_gen_syncookie_ipv6 FunctionId = 205 + BPF_FUNC_tcp_raw_check_syncookie_ipv4 FunctionId = 206 + BPF_FUNC_tcp_raw_check_syncookie_ipv6 FunctionId = 207 + BPF_FUNC_ktime_get_tai_ns FunctionId = 208 + BPF_FUNC_user_ringbuf_drain FunctionId = 209 + BPF_FUNC_cgrp_storage_get FunctionId = 210 + BPF_FUNC_cgrp_storage_delete FunctionId = 211 + __BPF_FUNC_MAX_ID FunctionId = 212 ) -type HdrStartOff int32 +type HdrStartOff uint32 const ( BPF_HDR_START_MAC HdrStartOff = 0 BPF_HDR_START_NET HdrStartOff = 1 ) -type LinkType int32 +type LinkType uint32 const ( BPF_LINK_TYPE_UNSPEC LinkType = 0 @@ -323,46 +354,67 @@ const ( BPF_LINK_TYPE_XDP LinkType = 6 BPF_LINK_TYPE_PERF_EVENT LinkType = 7 BPF_LINK_TYPE_KPROBE_MULTI LinkType = 8 - MAX_BPF_LINK_TYPE LinkType = 9 + BPF_LINK_TYPE_STRUCT_OPS LinkType = 9 + BPF_LINK_TYPE_NETFILTER LinkType = 10 + BPF_LINK_TYPE_TCX LinkType = 11 + BPF_LINK_TYPE_UPROBE_MULTI LinkType = 12 + BPF_LINK_TYPE_NETKIT LinkType = 13 + MAX_BPF_LINK_TYPE LinkType = 14 ) -type MapType int32 +type MapType uint32 const ( - BPF_MAP_TYPE_UNSPEC MapType = 0 - BPF_MAP_TYPE_HASH MapType = 1 - BPF_MAP_TYPE_ARRAY MapType = 2 - BPF_MAP_TYPE_PROG_ARRAY MapType = 3 - BPF_MAP_TYPE_PERF_EVENT_ARRAY MapType = 4 - BPF_MAP_TYPE_PERCPU_HASH MapType = 5 - BPF_MAP_TYPE_PERCPU_ARRAY MapType = 6 - BPF_MAP_TYPE_STACK_TRACE MapType = 7 - BPF_MAP_TYPE_CGROUP_ARRAY MapType = 8 - BPF_MAP_TYPE_LRU_HASH MapType = 9 - BPF_MAP_TYPE_LRU_PERCPU_HASH MapType = 10 - BPF_MAP_TYPE_LPM_TRIE MapType = 11 - BPF_MAP_TYPE_ARRAY_OF_MAPS MapType = 12 - BPF_MAP_TYPE_HASH_OF_MAPS MapType = 13 - BPF_MAP_TYPE_DEVMAP MapType = 14 - BPF_MAP_TYPE_SOCKMAP MapType = 15 - BPF_MAP_TYPE_CPUMAP MapType = 16 - BPF_MAP_TYPE_XSKMAP MapType = 17 - BPF_MAP_TYPE_SOCKHASH MapType = 18 - BPF_MAP_TYPE_CGROUP_STORAGE MapType = 19 - BPF_MAP_TYPE_REUSEPORT_SOCKARRAY MapType = 20 - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE MapType = 21 - BPF_MAP_TYPE_QUEUE MapType = 22 - BPF_MAP_TYPE_STACK MapType = 23 - BPF_MAP_TYPE_SK_STORAGE MapType = 24 - BPF_MAP_TYPE_DEVMAP_HASH MapType = 25 - BPF_MAP_TYPE_STRUCT_OPS MapType = 26 - BPF_MAP_TYPE_RINGBUF MapType = 27 - BPF_MAP_TYPE_INODE_STORAGE MapType = 28 - BPF_MAP_TYPE_TASK_STORAGE MapType = 29 - BPF_MAP_TYPE_BLOOM_FILTER MapType = 30 + BPF_MAP_TYPE_UNSPEC MapType = 0 + BPF_MAP_TYPE_HASH MapType = 1 + BPF_MAP_TYPE_ARRAY MapType = 2 + BPF_MAP_TYPE_PROG_ARRAY MapType = 3 + BPF_MAP_TYPE_PERF_EVENT_ARRAY MapType = 4 + BPF_MAP_TYPE_PERCPU_HASH MapType = 5 + BPF_MAP_TYPE_PERCPU_ARRAY MapType = 6 + BPF_MAP_TYPE_STACK_TRACE MapType = 7 + BPF_MAP_TYPE_CGROUP_ARRAY MapType = 8 + BPF_MAP_TYPE_LRU_HASH MapType = 9 + BPF_MAP_TYPE_LRU_PERCPU_HASH MapType = 10 + BPF_MAP_TYPE_LPM_TRIE MapType = 11 + BPF_MAP_TYPE_ARRAY_OF_MAPS MapType = 12 + BPF_MAP_TYPE_HASH_OF_MAPS MapType = 13 + BPF_MAP_TYPE_DEVMAP MapType = 14 + BPF_MAP_TYPE_SOCKMAP MapType = 15 + BPF_MAP_TYPE_CPUMAP MapType = 16 + BPF_MAP_TYPE_XSKMAP MapType = 17 + BPF_MAP_TYPE_SOCKHASH MapType = 18 + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED MapType = 19 + BPF_MAP_TYPE_CGROUP_STORAGE MapType = 19 + BPF_MAP_TYPE_REUSEPORT_SOCKARRAY MapType = 20 + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED MapType = 21 + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE MapType = 21 + BPF_MAP_TYPE_QUEUE MapType = 22 + BPF_MAP_TYPE_STACK MapType = 23 + BPF_MAP_TYPE_SK_STORAGE MapType = 24 + BPF_MAP_TYPE_DEVMAP_HASH MapType = 25 + BPF_MAP_TYPE_STRUCT_OPS MapType = 26 + BPF_MAP_TYPE_RINGBUF MapType = 27 + BPF_MAP_TYPE_INODE_STORAGE MapType = 28 + BPF_MAP_TYPE_TASK_STORAGE MapType = 29 + BPF_MAP_TYPE_BLOOM_FILTER MapType = 30 + BPF_MAP_TYPE_USER_RINGBUF MapType = 31 + BPF_MAP_TYPE_CGRP_STORAGE MapType = 32 ) -type ProgType int32 +type PerfEventType uint32 + +const ( + BPF_PERF_EVENT_UNSPEC PerfEventType = 0 + BPF_PERF_EVENT_UPROBE PerfEventType = 1 + BPF_PERF_EVENT_URETPROBE PerfEventType = 2 + BPF_PERF_EVENT_KPROBE PerfEventType = 3 + BPF_PERF_EVENT_KRETPROBE PerfEventType = 4 + BPF_PERF_EVENT_TRACEPOINT PerfEventType = 5 + BPF_PERF_EVENT_EVENT PerfEventType = 6 +) + +type ProgType uint32 const ( BPF_PROG_TYPE_UNSPEC ProgType = 0 @@ -397,25 +449,27 @@ const ( BPF_PROG_TYPE_LSM ProgType = 29 BPF_PROG_TYPE_SK_LOOKUP ProgType = 30 BPF_PROG_TYPE_SYSCALL ProgType = 31 + BPF_PROG_TYPE_NETFILTER ProgType = 32 ) -type RetCode int32 +type RetCode uint32 const ( - BPF_OK RetCode = 0 - BPF_DROP RetCode = 2 - BPF_REDIRECT RetCode = 7 - BPF_LWT_REROUTE RetCode = 128 + BPF_OK RetCode = 0 + BPF_DROP RetCode = 2 + BPF_REDIRECT RetCode = 7 + BPF_LWT_REROUTE RetCode = 128 + BPF_FLOW_DISSECTOR_CONTINUE RetCode = 129 ) -type SkAction int32 +type SkAction uint32 const ( SK_DROP SkAction = 0 SK_PASS SkAction = 1 ) -type StackBuildIdStatus int32 +type StackBuildIdStatus uint32 const ( BPF_STACK_BUILD_ID_EMPTY StackBuildIdStatus = 0 @@ -423,13 +477,22 @@ const ( BPF_STACK_BUILD_ID_IP StackBuildIdStatus = 2 ) -type StatsType int32 +type StatsType uint32 const ( BPF_STATS_RUN_TIME StatsType = 0 ) -type XdpAction int32 +type TcxActionBase int32 + +const ( + TCX_NEXT TcxActionBase = -1 + TCX_PASS TcxActionBase = 0 + TCX_DROP TcxActionBase = 2 + TCX_REDIRECT TcxActionBase = 7 +) + +type XdpAction uint32 const ( XDP_ABORTED XdpAction = 0 @@ -465,7 +528,7 @@ type LinkInfo struct { Id LinkID ProgId uint32 _ [4]byte - Extra [16]uint8 + Extra [40]uint8 } type MapInfo struct { @@ -474,15 +537,15 @@ type MapInfo struct { KeySize uint32 ValueSize uint32 MaxEntries uint32 - MapFlags uint32 + MapFlags MapFlags Name ObjName Ifindex uint32 - BtfVmlinuxValueTypeId uint32 + BtfVmlinuxValueTypeId TypeID NetnsDev uint64 NetnsIno uint64 BtfId uint32 - BtfKeyTypeId uint32 - BtfValueTypeId uint32 + BtfKeyTypeId TypeID + BtfValueTypeId TypeID _ [4]byte MapExtra uint64 } @@ -508,12 +571,12 @@ type ProgInfo struct { NrJitedFuncLens uint32 JitedKsyms uint64 JitedFuncLens uint64 - BtfId uint32 + BtfId BTFID FuncInfoRecSize uint32 - FuncInfo uint64 + FuncInfo Pointer NrFuncInfo uint32 NrLineInfo uint32 - LineInfo uint64 + LineInfo Pointer JitedLineInfo uint64 NrJitedLineInfo uint32 LineInfoRecSize uint32 @@ -524,6 +587,8 @@ type ProgInfo struct { RunCnt uint64 RecursionMisses uint64 VerifiedInsns uint32 + AttachBtfObjId BTFID + AttachBtfId TypeID _ [4]byte } @@ -572,12 +637,12 @@ func BtfGetNextId(attr *BtfGetNextIdAttr) error { } type BtfLoadAttr struct { - Btf Pointer - BtfLogBuf Pointer - BtfSize uint32 - BtfLogSize uint32 - BtfLogLevel uint32 - _ [4]byte + Btf Pointer + BtfLogBuf Pointer + BtfSize uint32 + BtfLogSize uint32 + BtfLogLevel uint32 + BtfLogTrueSize uint32 } func BtfLoad(attr *BtfLoadAttr) (*FD, error) { @@ -616,8 +681,8 @@ type LinkCreateAttr struct { TargetFd uint32 AttachType AttachType Flags uint32 - TargetBtfId uint32 - _ [28]byte + TargetBtfId TypeID + _ [44]byte } func LinkCreate(attr *LinkCreateAttr) (*FD, error) { @@ -635,7 +700,7 @@ type LinkCreateIterAttr struct { Flags uint32 IterInfo Pointer IterInfoLen uint32 - _ [20]byte + _ [36]byte } func LinkCreateIter(attr *LinkCreateIterAttr) (*FD, error) { @@ -646,13 +711,73 @@ func LinkCreateIter(attr *LinkCreateIterAttr) (*FD, error) { return NewFD(int(fd)) } +type LinkCreateKprobeMultiAttr struct { + ProgFd uint32 + TargetFd uint32 + AttachType AttachType + Flags uint32 + KprobeMultiFlags uint32 + Count uint32 + Syms Pointer + Addrs Pointer + Cookies Pointer + _ [16]byte +} + +func LinkCreateKprobeMulti(attr *LinkCreateKprobeMultiAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkCreateNetfilterAttr struct { + ProgFd uint32 + TargetFd uint32 + AttachType AttachType + Flags uint32 + Pf uint32 + Hooknum uint32 + Priority int32 + NetfilterFlags uint32 + _ [32]byte +} + +func LinkCreateNetfilter(attr *LinkCreateNetfilterAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkCreateNetkitAttr struct { + ProgFd uint32 + TargetIfindex uint32 + AttachType AttachType + Flags uint32 + RelativeFdOrId uint32 + _ [4]byte + ExpectedRevision uint64 + _ [32]byte +} + +func LinkCreateNetkit(attr *LinkCreateNetkitAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + type LinkCreatePerfEventAttr struct { ProgFd uint32 TargetFd uint32 AttachType AttachType Flags uint32 BpfCookie uint64 - _ [24]byte + _ [40]byte } func LinkCreatePerfEvent(attr *LinkCreatePerfEventAttr) (*FD, error) { @@ -663,6 +788,87 @@ func LinkCreatePerfEvent(attr *LinkCreatePerfEventAttr) (*FD, error) { return NewFD(int(fd)) } +type LinkCreateTcxAttr struct { + ProgFd uint32 + TargetIfindex uint32 + AttachType AttachType + Flags uint32 + RelativeFdOrId uint32 + _ [4]byte + ExpectedRevision uint64 + _ [32]byte +} + +func LinkCreateTcx(attr *LinkCreateTcxAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkCreateTracingAttr struct { + ProgFd uint32 + TargetFd uint32 + AttachType AttachType + Flags uint32 + TargetBtfId BTFID + _ [4]byte + Cookie uint64 + _ [32]byte +} + +func LinkCreateTracing(attr *LinkCreateTracingAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkCreateUprobeMultiAttr struct { + ProgFd uint32 + TargetFd uint32 + AttachType AttachType + Flags uint32 + Path Pointer + Offsets Pointer + RefCtrOffsets Pointer + Cookies Pointer + Count uint32 + UprobeMultiFlags uint32 + Pid uint32 + _ [4]byte +} + +func LinkCreateUprobeMulti(attr *LinkCreateUprobeMultiAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkGetFdByIdAttr struct{ Id LinkID } + +func LinkGetFdById(attr *LinkGetFdByIdAttr) (*FD, error) { + fd, err := BPF(BPF_LINK_GET_FD_BY_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + return NewFD(int(fd)) +} + +type LinkGetNextIdAttr struct { + Id LinkID + NextId LinkID +} + +func LinkGetNextId(attr *LinkGetNextIdAttr) error { + _, err := BPF(BPF_LINK_GET_NEXT_ID, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + type LinkUpdateAttr struct { LinkFd uint32 NewProgFd uint32 @@ -680,15 +886,15 @@ type MapCreateAttr struct { KeySize uint32 ValueSize uint32 MaxEntries uint32 - MapFlags uint32 + MapFlags MapFlags InnerMapFd uint32 NumaNode uint32 MapName ObjName MapIfindex uint32 BtfFd uint32 - BtfKeyTypeId uint32 - BtfValueTypeId uint32 - BtfVmlinuxValueTypeId uint32 + BtfKeyTypeId TypeID + BtfValueTypeId TypeID + BtfVmlinuxValueTypeId TypeID MapExtra uint64 } @@ -859,6 +1065,8 @@ type ObjGetAttr struct { Pathname Pointer BpfFd uint32 FileFlags uint32 + PathFd int32 + _ [4]byte } func ObjGet(attr *ObjGetAttr) (*FD, error) { @@ -884,6 +1092,8 @@ type ObjPinAttr struct { Pathname Pointer BpfFd uint32 FileFlags uint32 + PathFd int32 + _ [4]byte } func ObjPin(attr *ObjPinAttr) error { @@ -892,11 +1102,13 @@ func ObjPin(attr *ObjPinAttr) error { } type ProgAttachAttr struct { - TargetFd uint32 - AttachBpfFd uint32 - AttachType uint32 - AttachFlags uint32 - ReplaceBpfFd uint32 + TargetFdOrIfindex uint32 + AttachBpfFd uint32 + AttachType uint32 + AttachFlags uint32 + ReplaceBpfFd uint32 + RelativeFdOrId uint32 + ExpectedRevision uint64 } func ProgAttach(attr *ProgAttachAttr) error { @@ -916,9 +1128,13 @@ func ProgBindMap(attr *ProgBindMapAttr) error { } type ProgDetachAttr struct { - TargetFd uint32 - AttachBpfFd uint32 - AttachType uint32 + TargetFdOrIfindex uint32 + AttachBpfFd uint32 + AttachType uint32 + AttachFlags uint32 + _ [4]byte + RelativeFdOrId uint32 + ExpectedRevision uint64 } func ProgDetach(attr *ProgDetachAttr) error { @@ -951,7 +1167,7 @@ type ProgLoadAttr struct { InsnCnt uint32 Insns Pointer License Pointer - LogLevel uint32 + LogLevel LogLevel LogSize uint32 LogBuf Pointer KernVersion uint32 @@ -966,13 +1182,13 @@ type ProgLoadAttr struct { LineInfoRecSize uint32 LineInfo Pointer LineInfoCnt uint32 - AttachBtfId uint32 - AttachProgFd uint32 + AttachBtfId TypeID + AttachBtfObjFd uint32 CoreReloCnt uint32 FdArray Pointer CoreRelos Pointer CoreReloRecSize uint32 - _ [4]byte + LogTrueSize uint32 } func ProgLoad(attr *ProgLoadAttr) (*FD, error) { @@ -983,6 +1199,25 @@ func ProgLoad(attr *ProgLoadAttr) (*FD, error) { return NewFD(int(fd)) } +type ProgQueryAttr struct { + TargetFdOrIfindex uint32 + AttachType AttachType + QueryFlags uint32 + AttachFlags uint32 + ProgIds Pointer + Count uint32 + _ [4]byte + ProgAttachFlags Pointer + LinkIds Pointer + LinkAttachFlags Pointer + Revision uint64 +} + +func ProgQuery(attr *ProgQueryAttr) error { + _, err := BPF(BPF_PROG_QUERY, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + type ProgRunAttr struct { ProgFd uint32 Retval uint32 @@ -1022,31 +1257,126 @@ func RawTracepointOpen(attr *RawTracepointOpenAttr) (*FD, error) { } type CgroupLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte CgroupId uint64 AttachType AttachType - _ [4]byte + _ [28]byte } type IterLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte TargetName Pointer TargetNameLen uint32 } +type KprobeLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte + PerfEventType PerfEventType + _ [4]byte + FuncName Pointer + NameLen uint32 + Offset uint32 + Addr uint64 + Missed uint64 +} + +type KprobeMultiLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte + Addrs Pointer + Count uint32 + Flags uint32 + Missed uint64 + _ [16]byte +} + type NetNsLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte NetnsIno uint32 AttachType AttachType + _ [32]byte +} + +type NetfilterLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte + Pf uint32 + Hooknum uint32 + Priority int32 + Flags uint32 + _ [24]byte +} + +type NetkitLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte + Ifindex uint32 + AttachType AttachType + _ [32]byte +} + +type PerfEventLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte + PerfEventType PerfEventType } type RawTracepointLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte TpName Pointer TpNameLen uint32 - _ [4]byte + _ [28]byte +} + +type TcxLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte + Ifindex uint32 + AttachType AttachType + _ [32]byte } type TracingLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte AttachType AttachType TargetObjId uint32 - TargetBtfId uint32 + TargetBtfId TypeID + _ [28]byte } -type XDPLinkInfo struct{ Ifindex uint32 } +type XDPLinkInfo struct { + Type LinkType + Id LinkID + ProgId uint32 + _ [4]byte + Ifindex uint32 + _ [36]byte +} diff --git a/vendor/github.com/cilium/ebpf/internal/sysenc/buffer.go b/vendor/github.com/cilium/ebpf/internal/sysenc/buffer.go new file mode 100644 index 0000000000..d184ea196a --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sysenc/buffer.go @@ -0,0 +1,83 @@ +package sysenc + +import ( + "unsafe" + + "github.com/cilium/ebpf/internal/sys" +) + +type Buffer struct { + ptr unsafe.Pointer + // Size of the buffer. syscallPointerOnly if created from UnsafeBuffer or when using + // zero-copy unmarshaling. + size int +} + +const syscallPointerOnly = -1 + +func newBuffer(buf []byte) Buffer { + if len(buf) == 0 { + return Buffer{} + } + return Buffer{unsafe.Pointer(&buf[0]), len(buf)} +} + +// UnsafeBuffer constructs a Buffer for zero-copy unmarshaling. +// +// [Pointer] is the only valid method to call on such a Buffer. +// Use [SyscallBuffer] instead if possible. +func UnsafeBuffer(ptr unsafe.Pointer) Buffer { + return Buffer{ptr, syscallPointerOnly} +} + +// SyscallOutput prepares a Buffer for a syscall to write into. +// +// size is the length of the desired buffer in bytes. +// The buffer may point at the underlying memory of dst, in which case [Unmarshal] +// becomes a no-op. +// +// The contents of the buffer are undefined and may be non-zero. +func SyscallOutput(dst any, size int) Buffer { + if dstBuf := unsafeBackingMemory(dst); len(dstBuf) == size { + buf := newBuffer(dstBuf) + buf.size = syscallPointerOnly + return buf + } + + return newBuffer(make([]byte, size)) +} + +// CopyTo copies the buffer into dst. +// +// Returns the number of copied bytes. +func (b Buffer) CopyTo(dst []byte) int { + return copy(dst, b.unsafeBytes()) +} + +// AppendTo appends the buffer onto dst. +func (b Buffer) AppendTo(dst []byte) []byte { + return append(dst, b.unsafeBytes()...) +} + +// Pointer returns the location where a syscall should write. +func (b Buffer) Pointer() sys.Pointer { + // NB: This deliberately ignores b.length to support zero-copy + // marshaling / unmarshaling using unsafe.Pointer. + return sys.NewPointer(b.ptr) +} + +// Unmarshal the buffer into the provided value. +func (b Buffer) Unmarshal(data any) error { + if b.size == syscallPointerOnly { + return nil + } + + return Unmarshal(data, b.unsafeBytes()) +} + +func (b Buffer) unsafeBytes() []byte { + if b.size == syscallPointerOnly { + return nil + } + return unsafe.Slice((*byte)(b.ptr), b.size) +} diff --git a/vendor/github.com/cilium/ebpf/internal/sysenc/doc.go b/vendor/github.com/cilium/ebpf/internal/sysenc/doc.go new file mode 100644 index 0000000000..676ad98ba1 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sysenc/doc.go @@ -0,0 +1,3 @@ +// Package sysenc provides efficient conversion of Go values to system +// call interfaces. +package sysenc diff --git a/vendor/github.com/cilium/ebpf/internal/sysenc/layout.go b/vendor/github.com/cilium/ebpf/internal/sysenc/layout.go new file mode 100644 index 0000000000..52d111e7af --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sysenc/layout.go @@ -0,0 +1,41 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found at https://go.dev/LICENSE. + +package sysenc + +import ( + "reflect" + "sync" +) + +var hasUnexportedFieldsCache sync.Map // map[reflect.Type]bool + +func hasUnexportedFields(typ reflect.Type) bool { + switch typ.Kind() { + case reflect.Slice, reflect.Array, reflect.Pointer: + return hasUnexportedFields(typ.Elem()) + + case reflect.Struct: + if unexported, ok := hasUnexportedFieldsCache.Load(typ); ok { + return unexported.(bool) + } + + unexported := false + for i, n := 0, typ.NumField(); i < n; i++ { + field := typ.Field(i) + // Package binary allows _ fields but always writes zeroes into them. + if (!field.IsExported() && field.Name != "_") || hasUnexportedFields(field.Type) { + unexported = true + break + } + } + + hasUnexportedFieldsCache.Store(typ, unexported) + return unexported + + default: + // NB: It's not clear what this means for Chan and so on. + return false + } +} diff --git a/vendor/github.com/cilium/ebpf/internal/sysenc/marshal.go b/vendor/github.com/cilium/ebpf/internal/sysenc/marshal.go new file mode 100644 index 0000000000..0026af8f24 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/sysenc/marshal.go @@ -0,0 +1,177 @@ +package sysenc + +import ( + "bytes" + "encoding" + "encoding/binary" + "errors" + "fmt" + "reflect" + "slices" + "sync" + "unsafe" + + "github.com/cilium/ebpf/internal" +) + +// Marshal turns data into a byte slice using the system's native endianness. +// +// If possible, avoids allocations by directly using the backing memory +// of data. This means that the variable must not be modified for the lifetime +// of the returned [Buffer]. +// +// Returns an error if the data can't be turned into a byte slice according to +// the behaviour of [binary.Write]. +func Marshal(data any, size int) (Buffer, error) { + if data == nil { + return Buffer{}, errors.New("can't marshal a nil value") + } + + var buf []byte + var err error + switch value := data.(type) { + case encoding.BinaryMarshaler: + buf, err = value.MarshalBinary() + case string: + buf = unsafe.Slice(unsafe.StringData(value), len(value)) + case []byte: + buf = value + case int16: + buf = internal.NativeEndian.AppendUint16(make([]byte, 0, 2), uint16(value)) + case uint16: + buf = internal.NativeEndian.AppendUint16(make([]byte, 0, 2), value) + case int32: + buf = internal.NativeEndian.AppendUint32(make([]byte, 0, 4), uint32(value)) + case uint32: + buf = internal.NativeEndian.AppendUint32(make([]byte, 0, 4), value) + case int64: + buf = internal.NativeEndian.AppendUint64(make([]byte, 0, 8), uint64(value)) + case uint64: + buf = internal.NativeEndian.AppendUint64(make([]byte, 0, 8), value) + default: + if buf := unsafeBackingMemory(data); len(buf) == size { + return newBuffer(buf), nil + } + + wr := internal.NewBuffer(make([]byte, 0, size)) + defer internal.PutBuffer(wr) + + err = binary.Write(wr, internal.NativeEndian, value) + buf = wr.Bytes() + } + if err != nil { + return Buffer{}, err + } + + if len(buf) != size { + return Buffer{}, fmt.Errorf("%T doesn't marshal to %d bytes", data, size) + } + + return newBuffer(buf), nil +} + +var bytesReaderPool = sync.Pool{ + New: func() interface{} { + return new(bytes.Reader) + }, +} + +// Unmarshal a byte slice in the system's native endianness into data. +// +// Returns an error if buf can't be unmarshalled according to the behaviour +// of [binary.Read]. +func Unmarshal(data interface{}, buf []byte) error { + switch value := data.(type) { + case encoding.BinaryUnmarshaler: + return value.UnmarshalBinary(buf) + + case *string: + *value = string(buf) + return nil + + case *[]byte: + // Backwards compat: unmarshaling into a slice replaces the whole slice. + *value = slices.Clone(buf) + return nil + + default: + if dataBuf := unsafeBackingMemory(data); len(dataBuf) == len(buf) { + copy(dataBuf, buf) + return nil + } + + rd := bytesReaderPool.Get().(*bytes.Reader) + defer bytesReaderPool.Put(rd) + + rd.Reset(buf) + + if err := binary.Read(rd, internal.NativeEndian, value); err != nil { + return err + } + + if rd.Len() != 0 { + return fmt.Errorf("unmarshaling %T doesn't consume all data", data) + } + + return nil + } +} + +// unsafeBackingMemory returns the backing memory of data if it can be used +// instead of calling into package binary. +// +// Returns nil if the value is not a pointer or a slice, or if it contains +// padding or unexported fields. +func unsafeBackingMemory(data any) []byte { + if data == nil { + return nil + } + + value := reflect.ValueOf(data) + var valueSize int + switch value.Kind() { + case reflect.Pointer: + if value.IsNil() { + return nil + } + + if elemType := value.Type().Elem(); elemType.Kind() != reflect.Slice { + valueSize = int(elemType.Size()) + break + } + + // We're dealing with a pointer to a slice. Dereference and + // handle it like a regular slice. + value = value.Elem() + fallthrough + + case reflect.Slice: + valueSize = int(value.Type().Elem().Size()) * value.Len() + + default: + // Prevent Value.UnsafePointer from panicking. + return nil + } + + // Some nil pointer types currently crash binary.Size. Call it after our own + // code so that the panic isn't reachable. + // See https://github.com/golang/go/issues/60892 + if size := binary.Size(data); size == -1 || size != valueSize { + // The type contains padding or unsupported types. + return nil + } + + if hasUnexportedFields(reflect.TypeOf(data)) { + return nil + } + + // Reinterpret the pointer as a byte slice. This violates the unsafe.Pointer + // rules because it's very unlikely that the source data has "an equivalent + // memory layout". However, we can make it safe-ish because of the + // following reasons: + // - There is no alignment mismatch since we cast to a type with an + // alignment of 1. + // - There are no pointers in the source type so we don't upset the GC. + // - The length is verified at runtime. + return unsafe.Slice((*byte)(value.UnsafePointer()), valueSize) +} diff --git a/vendor/github.com/cilium/ebpf/internal/tracefs/kprobe.go b/vendor/github.com/cilium/ebpf/internal/tracefs/kprobe.go new file mode 100644 index 0000000000..897740fec0 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/tracefs/kprobe.go @@ -0,0 +1,360 @@ +package tracefs + +import ( + "crypto/rand" + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "syscall" + + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/unix" +) + +var ( + ErrInvalidInput = errors.New("invalid input") + + ErrInvalidMaxActive = errors.New("can only set maxactive on kretprobes") +) + +//go:generate go run golang.org/x/tools/cmd/stringer@latest -type=ProbeType -linecomment + +type ProbeType uint8 + +const ( + Kprobe ProbeType = iota // kprobe + Uprobe // uprobe +) + +func (pt ProbeType) eventsFile() (*os.File, error) { + path, err := sanitizeTracefsPath(fmt.Sprintf("%s_events", pt.String())) + if err != nil { + return nil, err + } + + return os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0666) +} + +type ProbeArgs struct { + Type ProbeType + Symbol, Group, Path string + Offset, RefCtrOffset, Cookie uint64 + Pid, RetprobeMaxActive int + Ret bool +} + +// RandomGroup generates a pseudorandom string for use as a tracefs group name. +// Returns an error when the output string would exceed 63 characters (kernel +// limitation), when rand.Read() fails or when prefix contains characters not +// allowed by IsValidTraceID. +func RandomGroup(prefix string) (string, error) { + if !validIdentifier(prefix) { + return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, ErrInvalidInput) + } + + b := make([]byte, 8) + if _, err := rand.Read(b); err != nil { + return "", fmt.Errorf("reading random bytes: %w", err) + } + + group := fmt.Sprintf("%s_%x", prefix, b) + if len(group) > 63 { + return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, ErrInvalidInput) + } + + return group, nil +} + +// validIdentifier implements the equivalent of a regex match +// against "^[a-zA-Z_][0-9a-zA-Z_]*$". +// +// Trace event groups, names and kernel symbols must adhere to this set +// of characters. Non-empty, first character must not be a number, all +// characters must be alphanumeric or underscore. +func validIdentifier(s string) bool { + if len(s) < 1 { + return false + } + for i, c := range []byte(s) { + switch { + case c >= 'a' && c <= 'z': + case c >= 'A' && c <= 'Z': + case c == '_': + case i > 0 && c >= '0' && c <= '9': + + default: + return false + } + } + + return true +} + +func sanitizeTracefsPath(path ...string) (string, error) { + base, err := getTracefsPath() + if err != nil { + return "", err + } + l := filepath.Join(path...) + p := filepath.Join(base, l) + if !strings.HasPrefix(p, base) { + return "", fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, ErrInvalidInput) + } + return p, nil +} + +// getTracefsPath will return a correct path to the tracefs mount point. +// Since kernel 4.1 tracefs should be mounted by default at /sys/kernel/tracing, +// but may be also be available at /sys/kernel/debug/tracing if debugfs is mounted. +// The available tracefs paths will depends on distribution choices. +var getTracefsPath = sync.OnceValues(func() (string, error) { + for _, p := range []struct { + path string + fsType int64 + }{ + {"/sys/kernel/tracing", unix.TRACEFS_MAGIC}, + {"/sys/kernel/debug/tracing", unix.TRACEFS_MAGIC}, + // RHEL/CentOS + {"/sys/kernel/debug/tracing", unix.DEBUGFS_MAGIC}, + } { + if fsType, err := internal.FSType(p.path); err == nil && fsType == p.fsType { + return p.path, nil + } + } + + return "", errors.New("neither debugfs nor tracefs are mounted") +}) + +// sanitizeIdentifier replaces every invalid character for the tracefs api with an underscore. +// +// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_"). +func sanitizeIdentifier(s string) string { + var skip bool + return strings.Map(func(c rune) rune { + switch { + case c >= 'a' && c <= 'z', + c >= 'A' && c <= 'Z', + c >= '0' && c <= '9': + skip = false + return c + + case skip: + return -1 + + default: + skip = true + return '_' + } + }, s) +} + +// EventID reads a trace event's ID from tracefs given its group and name. +// The kernel requires group and name to be alphanumeric or underscore. +func EventID(group, name string) (uint64, error) { + if !validIdentifier(group) { + return 0, fmt.Errorf("invalid tracefs group: %q", group) + } + + if !validIdentifier(name) { + return 0, fmt.Errorf("invalid tracefs name: %q", name) + } + + path, err := sanitizeTracefsPath("events", group, name, "id") + if err != nil { + return 0, err + } + tid, err := internal.ReadUint64FromFile("%d\n", path) + if errors.Is(err, os.ErrNotExist) { + return 0, err + } + if err != nil { + return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err) + } + + return tid, nil +} + +func probePrefix(ret bool, maxActive int) string { + if ret { + if maxActive > 0 { + return fmt.Sprintf("r%d", maxActive) + } + return "r" + } + return "p" +} + +// Event represents an entry in a tracefs probe events file. +type Event struct { + typ ProbeType + group, name string + // event id allocated by the kernel. 0 if the event has already been removed. + id uint64 +} + +// NewEvent creates a new ephemeral trace event. +// +// Returns os.ErrNotExist if symbol is not a valid +// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist +// if a probe with the same group and symbol already exists. Returns an error if +// args.RetprobeMaxActive is used on non kprobe types. Returns ErrNotSupported if +// the kernel is too old to support kretprobe maxactive. +func NewEvent(args ProbeArgs) (*Event, error) { + // Before attempting to create a trace event through tracefs, + // check if an event with the same group and name already exists. + // Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate + // entry, so we need to rely on reads for detecting uniqueness. + eventName := sanitizeIdentifier(args.Symbol) + _, err := EventID(args.Group, eventName) + if err == nil { + return nil, fmt.Errorf("trace event %s/%s: %w", args.Group, eventName, os.ErrExist) + } + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("checking trace event %s/%s: %w", args.Group, eventName, err) + } + + // Open the kprobe_events file in tracefs. + f, err := args.Type.eventsFile() + if err != nil { + return nil, err + } + defer f.Close() + + var pe, token string + switch args.Type { + case Kprobe: + // The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt): + // p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe + // r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe + // -:[GRP/]EVENT : Clear a probe + // + // Some examples: + // r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy + // p:ebpf_5678/p_my_kprobe __x64_sys_execve + // + // Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the + // kernel default to NR_CPUS. This is desired in most eBPF cases since + // subsampling or rate limiting logic can be more accurately implemented in + // the eBPF program itself. + // See Documentation/kprobes.txt for more details. + if args.RetprobeMaxActive != 0 && !args.Ret { + return nil, ErrInvalidMaxActive + } + token = KprobeToken(args) + pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, args.RetprobeMaxActive), args.Group, eventName, token) + case Uprobe: + // The uprobe_events syntax is as follows: + // p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe + // r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe + // -:[GRP/]EVENT : Clear a probe + // + // Some examples: + // r:ebpf_1234/readline /bin/bash:0x12345 + // p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123) + // + // See Documentation/trace/uprobetracer.txt for more details. + if args.RetprobeMaxActive != 0 { + return nil, ErrInvalidMaxActive + } + token = UprobeToken(args) + pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, 0), args.Group, eventName, token) + } + _, err = f.WriteString(pe) + + // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL + // when trying to create a retprobe for a missing symbol. + if errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("token %s: not found: %w", token, err) + } + // Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved + // to an invalid insn boundary. The exact conditions that trigger this error are + // arch specific however. + if errors.Is(err, syscall.EILSEQ) { + return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) + } + // ERANGE is returned when the `SYM[+offs]` token is too big and cannot + // be resolved. + if errors.Is(err, syscall.ERANGE) { + return nil, fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist) + } + + if err != nil { + return nil, fmt.Errorf("token %s: writing '%s': %w", token, pe, err) + } + + // Get the newly-created trace event's id. + tid, err := EventID(args.Group, eventName) + if args.RetprobeMaxActive != 0 && errors.Is(err, os.ErrNotExist) { + // Kernels < 4.12 don't support maxactive and therefore auto generate + // group and event names from the symbol and offset. The symbol is used + // without any sanitization. + // See https://elixir.bootlin.com/linux/v4.10/source/kernel/trace/trace_kprobe.c#L712 + event := fmt.Sprintf("kprobes/r_%s_%d", args.Symbol, args.Offset) + if err := removeEvent(args.Type, event); err != nil { + return nil, fmt.Errorf("failed to remove spurious maxactive event: %s", err) + } + return nil, fmt.Errorf("create trace event with non-default maxactive: %w", internal.ErrNotSupported) + } + if err != nil { + return nil, fmt.Errorf("get trace event id: %w", err) + } + + evt := &Event{args.Type, args.Group, eventName, tid} + runtime.SetFinalizer(evt, (*Event).Close) + return evt, nil +} + +// Close removes the event from tracefs. +// +// Returns os.ErrClosed if the event has already been closed before. +func (evt *Event) Close() error { + if evt.id == 0 { + return os.ErrClosed + } + + evt.id = 0 + runtime.SetFinalizer(evt, nil) + pe := fmt.Sprintf("%s/%s", evt.group, evt.name) + return removeEvent(evt.typ, pe) +} + +func removeEvent(typ ProbeType, pe string) error { + f, err := typ.eventsFile() + if err != nil { + return err + } + defer f.Close() + + // See [k,u]probe_events syntax above. The probe type does not need to be specified + // for removals. + if _, err = f.WriteString("-:" + pe); err != nil { + return fmt.Errorf("remove event %q from %s: %w", pe, f.Name(), err) + } + + return nil +} + +// ID returns the tracefs ID associated with the event. +func (evt *Event) ID() uint64 { + return evt.id +} + +// Group returns the tracefs group used by the event. +func (evt *Event) Group() string { + return evt.group +} + +// KprobeToken creates the SYM[+offs] token for the tracefs api. +func KprobeToken(args ProbeArgs) string { + po := args.Symbol + + if args.Offset != 0 { + po += fmt.Sprintf("+%#x", args.Offset) + } + + return po +} diff --git a/vendor/github.com/cilium/ebpf/internal/tracefs/probetype_string.go b/vendor/github.com/cilium/ebpf/internal/tracefs/probetype_string.go new file mode 100644 index 0000000000..87cb0a059b --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/tracefs/probetype_string.go @@ -0,0 +1,24 @@ +// Code generated by "stringer -type=ProbeType -linecomment"; DO NOT EDIT. + +package tracefs + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[Kprobe-0] + _ = x[Uprobe-1] +} + +const _ProbeType_name = "kprobeuprobe" + +var _ProbeType_index = [...]uint8{0, 6, 12} + +func (i ProbeType) String() string { + if i >= ProbeType(len(_ProbeType_index)-1) { + return "ProbeType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _ProbeType_name[_ProbeType_index[i]:_ProbeType_index[i+1]] +} diff --git a/vendor/github.com/cilium/ebpf/internal/tracefs/uprobe.go b/vendor/github.com/cilium/ebpf/internal/tracefs/uprobe.go new file mode 100644 index 0000000000..994f31260d --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/tracefs/uprobe.go @@ -0,0 +1,16 @@ +package tracefs + +import "fmt" + +// UprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api. +func UprobeToken(args ProbeArgs) string { + po := fmt.Sprintf("%s:%#x", args.Path, args.Offset) + + if args.RefCtrOffset != 0 { + // This is not documented in Documentation/trace/uprobetracer.txt. + // elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564 + po += fmt.Sprintf("(%#x)", args.RefCtrOffset) + } + + return po +} diff --git a/vendor/github.com/cilium/ebpf/internal/unix/doc.go b/vendor/github.com/cilium/ebpf/internal/unix/doc.go new file mode 100644 index 0000000000..d168d36f18 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/unix/doc.go @@ -0,0 +1,11 @@ +// Package unix re-exports Linux specific parts of golang.org/x/sys/unix. +// +// It avoids breaking compilation on other OS by providing stubs as follows: +// - Invoking a function always returns an error. +// - Errnos have distinct, non-zero values. +// - Constants have distinct but meaningless values. +// - Types use the same names for members, but may or may not follow the +// Linux layout. +package unix + +// Note: please don't add any custom API to this package. Use internal/sys instead. diff --git a/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go b/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go index db4a1f5bf9..d725cfaa39 100644 --- a/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go +++ b/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go @@ -1,5 +1,4 @@ //go:build linux -// +build linux package unix @@ -10,189 +9,184 @@ import ( ) const ( - ENOENT = linux.ENOENT - EEXIST = linux.EEXIST - EAGAIN = linux.EAGAIN - ENOSPC = linux.ENOSPC - EINVAL = linux.EINVAL - EPOLLIN = linux.EPOLLIN - EINTR = linux.EINTR - EPERM = linux.EPERM - ESRCH = linux.ESRCH - ENODEV = linux.ENODEV - EBADF = linux.EBADF - E2BIG = linux.E2BIG - EFAULT = linux.EFAULT - EACCES = linux.EACCES - // ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP - ENOTSUPP = syscall.Errno(0x20c) - - BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC - BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE - BPF_F_RDONLY = linux.BPF_F_RDONLY - BPF_F_WRONLY = linux.BPF_F_WRONLY - BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG - BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG - BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE - BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE - BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP - BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN - BPF_TAG_SIZE = linux.BPF_TAG_SIZE - BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT - BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT - BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ - SYS_BPF = linux.SYS_BPF - F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC - EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD - EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC - O_CLOEXEC = linux.O_CLOEXEC - O_NONBLOCK = linux.O_NONBLOCK - PROT_READ = linux.PROT_READ - PROT_WRITE = linux.PROT_WRITE - MAP_SHARED = linux.MAP_SHARED - PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1 - PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE - PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT - PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT - PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE - PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE - PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF - PerfBitWatermark = linux.PerfBitWatermark - PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW - PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC - RLIM_INFINITY = linux.RLIM_INFINITY - RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK - BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME - PERF_RECORD_LOST = linux.PERF_RECORD_LOST - PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE - AT_FDCWD = linux.AT_FDCWD - RENAME_NOREPLACE = linux.RENAME_NOREPLACE - SO_ATTACH_BPF = linux.SO_ATTACH_BPF - SO_DETACH_BPF = linux.SO_DETACH_BPF - SOL_SOCKET = linux.SOL_SOCKET + ENOENT = linux.ENOENT + EEXIST = linux.EEXIST + EAGAIN = linux.EAGAIN + ENOSPC = linux.ENOSPC + EINVAL = linux.EINVAL + EPOLLIN = linux.EPOLLIN + EINTR = linux.EINTR + EPERM = linux.EPERM + ESRCH = linux.ESRCH + ENODEV = linux.ENODEV + EBADF = linux.EBADF + E2BIG = linux.E2BIG + EFAULT = linux.EFAULT + EACCES = linux.EACCES + EILSEQ = linux.EILSEQ + EOPNOTSUPP = linux.EOPNOTSUPP + ESTALE = linux.ESTALE ) -// Statfs_t is a wrapper -type Statfs_t = linux.Statfs_t +const ( + BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC + BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE + BPF_F_RDONLY = linux.BPF_F_RDONLY + BPF_F_WRONLY = linux.BPF_F_WRONLY + BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG + BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG + BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE + BPF_F_XDP_HAS_FRAGS = linux.BPF_F_XDP_HAS_FRAGS + BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE + BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP + BPF_F_KPROBE_MULTI_RETURN = linux.BPF_F_KPROBE_MULTI_RETURN + BPF_F_UPROBE_MULTI_RETURN = linux.BPF_F_UPROBE_MULTI_RETURN + BPF_F_LOCK = linux.BPF_F_LOCK + BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN + BPF_TAG_SIZE = linux.BPF_TAG_SIZE + BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT + BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT + BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ + SYS_BPF = linux.SYS_BPF + F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC + EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD + EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC + O_CLOEXEC = linux.O_CLOEXEC + O_NONBLOCK = linux.O_NONBLOCK + PROT_NONE = linux.PROT_NONE + PROT_READ = linux.PROT_READ + PROT_WRITE = linux.PROT_WRITE + MAP_ANON = linux.MAP_ANON + MAP_SHARED = linux.MAP_SHARED + MAP_PRIVATE = linux.MAP_PRIVATE + PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1 + PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE + PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT + PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT + PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE + PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE + PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF + PerfBitWatermark = linux.PerfBitWatermark + PerfBitWriteBackward = linux.PerfBitWriteBackward + PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW + PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC + RLIM_INFINITY = linux.RLIM_INFINITY + RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK + BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME + PERF_RECORD_LOST = linux.PERF_RECORD_LOST + PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE + AT_FDCWD = linux.AT_FDCWD + RENAME_NOREPLACE = linux.RENAME_NOREPLACE + SO_ATTACH_BPF = linux.SO_ATTACH_BPF + SO_DETACH_BPF = linux.SO_DETACH_BPF + SOL_SOCKET = linux.SOL_SOCKET + SIGPROF = linux.SIGPROF + SIG_BLOCK = linux.SIG_BLOCK + SIG_UNBLOCK = linux.SIG_UNBLOCK + EM_NONE = linux.EM_NONE + EM_BPF = linux.EM_BPF + BPF_FS_MAGIC = linux.BPF_FS_MAGIC + TRACEFS_MAGIC = linux.TRACEFS_MAGIC + DEBUGFS_MAGIC = linux.DEBUGFS_MAGIC + BPF_RB_NO_WAKEUP = linux.BPF_RB_NO_WAKEUP + BPF_RB_FORCE_WAKEUP = linux.BPF_RB_FORCE_WAKEUP +) +type Statfs_t = linux.Statfs_t type Stat_t = linux.Stat_t - -// Rlimit is a wrapper type Rlimit = linux.Rlimit +type Signal = linux.Signal +type Sigset_t = linux.Sigset_t +type PerfEventMmapPage = linux.PerfEventMmapPage +type EpollEvent = linux.EpollEvent +type PerfEventAttr = linux.PerfEventAttr +type Utsname = linux.Utsname +type CPUSet = linux.CPUSet -// Syscall is a wrapper func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { return linux.Syscall(trap, a1, a2, a3) } -// FcntlInt is a wrapper +func PthreadSigmask(how int, set, oldset *Sigset_t) error { + return linux.PthreadSigmask(how, set, oldset) +} + func FcntlInt(fd uintptr, cmd, arg int) (int, error) { return linux.FcntlInt(fd, cmd, arg) } -// IoctlSetInt is a wrapper func IoctlSetInt(fd int, req uint, value int) error { return linux.IoctlSetInt(fd, req, value) } -// Statfs is a wrapper func Statfs(path string, buf *Statfs_t) (err error) { return linux.Statfs(path, buf) } -// Close is a wrapper func Close(fd int) (err error) { return linux.Close(fd) } -// EpollEvent is a wrapper -type EpollEvent = linux.EpollEvent - -// EpollWait is a wrapper func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { return linux.EpollWait(epfd, events, msec) } -// EpollCtl is a wrapper func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { return linux.EpollCtl(epfd, op, fd, event) } -// Eventfd is a wrapper func Eventfd(initval uint, flags int) (fd int, err error) { return linux.Eventfd(initval, flags) } -// Write is a wrapper func Write(fd int, p []byte) (n int, err error) { return linux.Write(fd, p) } -// EpollCreate1 is a wrapper func EpollCreate1(flag int) (fd int, err error) { return linux.EpollCreate1(flag) } -// PerfEventMmapPage is a wrapper -type PerfEventMmapPage linux.PerfEventMmapPage - -// SetNonblock is a wrapper func SetNonblock(fd int, nonblocking bool) (err error) { return linux.SetNonblock(fd, nonblocking) } -// Mmap is a wrapper func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { return linux.Mmap(fd, offset, length, prot, flags) } -// Munmap is a wrapper func Munmap(b []byte) (err error) { return linux.Munmap(b) } -// PerfEventAttr is a wrapper -type PerfEventAttr = linux.PerfEventAttr - -// PerfEventOpen is a wrapper func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags) } -// Utsname is a wrapper -type Utsname = linux.Utsname - -// Uname is a wrapper func Uname(buf *Utsname) (err error) { return linux.Uname(buf) } -// Getpid is a wrapper func Getpid() int { return linux.Getpid() } -// Gettid is a wrapper func Gettid() int { return linux.Gettid() } -// Tgkill is a wrapper func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { return linux.Tgkill(tgid, tid, sig) } -// BytePtrFromString is a wrapper func BytePtrFromString(s string) (*byte, error) { return linux.BytePtrFromString(s) } -// ByteSliceToString is a wrapper func ByteSliceToString(s []byte) string { return linux.ByteSliceToString(s) } -// Renameat2 is a wrapper func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags) } @@ -208,3 +202,15 @@ func Open(path string, mode int, perm uint32) (int, error) { func Fstat(fd int, stat *Stat_t) error { return linux.Fstat(fd, stat) } + +func SetsockoptInt(fd, level, opt, value int) error { + return linux.SetsockoptInt(fd, level, opt, value) +} + +func SchedSetaffinity(pid int, set *CPUSet) error { + return linux.SchedSetaffinity(pid, set) +} + +func SchedGetaffinity(pid int, set *CPUSet) error { + return linux.SchedGetaffinity(pid, set) +} diff --git a/vendor/github.com/cilium/ebpf/internal/unix/types_other.go b/vendor/github.com/cilium/ebpf/internal/unix/types_other.go index 133c267dbc..3ff8962716 100644 --- a/vendor/github.com/cilium/ebpf/internal/unix/types_other.go +++ b/vendor/github.com/cilium/ebpf/internal/unix/types_other.go @@ -1,5 +1,4 @@ //go:build !linux -// +build !linux package unix @@ -11,70 +10,92 @@ import ( var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) +// Errnos are distinct and non-zero. const ( - ENOENT = syscall.ENOENT - EEXIST = syscall.EEXIST - EAGAIN = syscall.EAGAIN - ENOSPC = syscall.ENOSPC - EINVAL = syscall.EINVAL - EINTR = syscall.EINTR - EPERM = syscall.EPERM - ESRCH = syscall.ESRCH - ENODEV = syscall.ENODEV - EBADF = syscall.Errno(0) - E2BIG = syscall.Errno(0) - EFAULT = syscall.EFAULT - EACCES = syscall.Errno(0) - // ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP - ENOTSUPP = syscall.Errno(0x20c) - - BPF_F_NO_PREALLOC = 0 - BPF_F_NUMA_NODE = 0 - BPF_F_RDONLY = 0 - BPF_F_WRONLY = 0 - BPF_F_RDONLY_PROG = 0 - BPF_F_WRONLY_PROG = 0 - BPF_F_SLEEPABLE = 0 - BPF_F_MMAPABLE = 0 - BPF_F_INNER_MAP = 0 - BPF_OBJ_NAME_LEN = 0x10 - BPF_TAG_SIZE = 0x8 - BPF_RINGBUF_BUSY_BIT = 0 - BPF_RINGBUF_DISCARD_BIT = 0 - BPF_RINGBUF_HDR_SZ = 0 - SYS_BPF = 321 - F_DUPFD_CLOEXEC = 0x406 - EPOLLIN = 0x1 - EPOLL_CTL_ADD = 0x1 - EPOLL_CLOEXEC = 0x80000 - O_CLOEXEC = 0x80000 - O_NONBLOCK = 0x800 - PROT_READ = 0x1 - PROT_WRITE = 0x2 - MAP_SHARED = 0x1 - PERF_ATTR_SIZE_VER1 = 0 - PERF_TYPE_SOFTWARE = 0x1 - PERF_TYPE_TRACEPOINT = 0 - PERF_COUNT_SW_BPF_OUTPUT = 0xa - PERF_EVENT_IOC_DISABLE = 0 - PERF_EVENT_IOC_ENABLE = 0 - PERF_EVENT_IOC_SET_BPF = 0 - PerfBitWatermark = 0x4000 - PERF_SAMPLE_RAW = 0x400 - PERF_FLAG_FD_CLOEXEC = 0x8 - RLIM_INFINITY = 0x7fffffffffffffff - RLIMIT_MEMLOCK = 8 - BPF_STATS_RUN_TIME = 0 - PERF_RECORD_LOST = 2 - PERF_RECORD_SAMPLE = 9 - AT_FDCWD = -0x2 - RENAME_NOREPLACE = 0x1 - SO_ATTACH_BPF = 0x32 - SO_DETACH_BPF = 0x1b - SOL_SOCKET = 0x1 + ENOENT syscall.Errno = iota + 1 + EEXIST + EAGAIN + ENOSPC + EINVAL + EINTR + EPERM + ESRCH + ENODEV + EBADF + E2BIG + EFAULT + EACCES + EILSEQ + EOPNOTSUPP + ESTALE +) + +// Constants are distinct to avoid breaking switch statements. +const ( + BPF_F_NO_PREALLOC = iota + BPF_F_NUMA_NODE + BPF_F_RDONLY + BPF_F_WRONLY + BPF_F_RDONLY_PROG + BPF_F_WRONLY_PROG + BPF_F_SLEEPABLE + BPF_F_MMAPABLE + BPF_F_INNER_MAP + BPF_F_KPROBE_MULTI_RETURN + BPF_F_UPROBE_MULTI_RETURN + BPF_F_XDP_HAS_FRAGS + BPF_OBJ_NAME_LEN + BPF_TAG_SIZE + BPF_RINGBUF_BUSY_BIT + BPF_RINGBUF_DISCARD_BIT + BPF_RINGBUF_HDR_SZ + SYS_BPF + F_DUPFD_CLOEXEC + EPOLLIN + EPOLL_CTL_ADD + EPOLL_CLOEXEC + O_CLOEXEC + O_NONBLOCK + PROT_NONE + PROT_READ + PROT_WRITE + MAP_ANON + MAP_SHARED + MAP_PRIVATE + PERF_ATTR_SIZE_VER1 + PERF_TYPE_SOFTWARE + PERF_TYPE_TRACEPOINT + PERF_COUNT_SW_BPF_OUTPUT + PERF_EVENT_IOC_DISABLE + PERF_EVENT_IOC_ENABLE + PERF_EVENT_IOC_SET_BPF + PerfBitWatermark + PerfBitWriteBackward + PERF_SAMPLE_RAW + PERF_FLAG_FD_CLOEXEC + RLIM_INFINITY + RLIMIT_MEMLOCK + BPF_STATS_RUN_TIME + PERF_RECORD_LOST + PERF_RECORD_SAMPLE + AT_FDCWD + RENAME_NOREPLACE + SO_ATTACH_BPF + SO_DETACH_BPF + SOL_SOCKET + SIGPROF + SIG_BLOCK + SIG_UNBLOCK + EM_NONE + EM_BPF + BPF_FS_MAGIC + TRACEFS_MAGIC + DEBUGFS_MAGIC + BPF_RB_NO_WAKEUP + BPF_RB_FORCE_WAKEUP + BPF_F_LOCK ) -// Statfs_t is a wrapper type Statfs_t struct { Type int64 Bsize int64 @@ -90,72 +111,81 @@ type Statfs_t struct { Spare [4]int64 } -type Stat_t struct{} +type Stat_t struct { + Dev uint64 + Ino uint64 + Nlink uint64 + Mode uint32 + Uid uint32 + Gid uint32 + _ int32 + Rdev uint64 + Size int64 + Blksize int64 + Blocks int64 +} -// Rlimit is a wrapper type Rlimit struct { Cur uint64 Max uint64 } -// Syscall is a wrapper +type Signal int + +type Sigset_t struct { + Val [4]uint64 +} + func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { - return 0, 0, syscall.Errno(1) + return 0, 0, syscall.ENOTSUP +} + +func PthreadSigmask(how int, set, oldset *Sigset_t) error { + return errNonLinux } -// FcntlInt is a wrapper func FcntlInt(fd uintptr, cmd, arg int) (int, error) { return -1, errNonLinux } -// IoctlSetInt is a wrapper func IoctlSetInt(fd int, req uint, value int) error { return errNonLinux } -// Statfs is a wrapper func Statfs(path string, buf *Statfs_t) error { return errNonLinux } -// Close is a wrapper func Close(fd int) (err error) { return errNonLinux } -// EpollEvent is a wrapper type EpollEvent struct { Events uint32 Fd int32 Pad int32 } -// EpollWait is a wrapper func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { return 0, errNonLinux } -// EpollCtl is a wrapper func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { return errNonLinux } -// Eventfd is a wrapper func Eventfd(initval uint, flags int) (fd int, err error) { return 0, errNonLinux } -// Write is a wrapper func Write(fd int, p []byte) (n int, err error) { return 0, errNonLinux } -// EpollCreate1 is a wrapper func EpollCreate1(flag int) (fd int, err error) { return 0, errNonLinux } -// PerfEventMmapPage is a wrapper type PerfEventMmapPage struct { Version uint32 Compat_version uint32 @@ -182,22 +212,18 @@ type PerfEventMmapPage struct { Aux_size uint64 } -// SetNonblock is a wrapper func SetNonblock(fd int, nonblocking bool) (err error) { return errNonLinux } -// Mmap is a wrapper func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { return []byte{}, errNonLinux } -// Munmap is a wrapper func Munmap(b []byte) (err error) { return errNonLinux } -// PerfEventAttr is a wrapper type PerfEventAttr struct { Type uint32 Size uint32 @@ -219,48 +245,39 @@ type PerfEventAttr struct { Sample_max_stack uint16 } -// PerfEventOpen is a wrapper func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { return 0, errNonLinux } -// Utsname is a wrapper type Utsname struct { Release [65]byte Version [65]byte } -// Uname is a wrapper func Uname(buf *Utsname) (err error) { return errNonLinux } -// Getpid is a wrapper func Getpid() int { return -1 } -// Gettid is a wrapper func Gettid() int { return -1 } -// Tgkill is a wrapper func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { return errNonLinux } -// BytePtrFromString is a wrapper func BytePtrFromString(s string) (*byte, error) { return nil, errNonLinux } -// ByteSliceToString is a wrapper func ByteSliceToString(s []byte) string { return "" } -// Renameat2 is a wrapper func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { return errNonLinux } @@ -276,3 +293,19 @@ func Open(path string, mode int, perm uint32) (int, error) { func Fstat(fd int, stat *Stat_t) error { return errNonLinux } + +func SetsockoptInt(fd, level, opt, value int) error { + return errNonLinux +} + +type CPUSet struct{} + +func (*CPUSet) Set(int) {} + +func SchedSetaffinity(pid int, set *CPUSet) error { + return errNonLinux +} + +func SchedGetaffinity(pid int, set *CPUSet) error { + return errNonLinux +} diff --git a/vendor/github.com/cilium/ebpf/internal/vdso.go b/vendor/github.com/cilium/ebpf/internal/vdso.go index ae4821de20..1049278554 100644 --- a/vendor/github.com/cilium/ebpf/internal/vdso.go +++ b/vendor/github.com/cilium/ebpf/internal/vdso.go @@ -19,13 +19,11 @@ var ( // vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library // linked into the current process image. func vdsoVersion() (uint32, error) { - // Read data from the auxiliary vector, which is normally passed directly - // to the process. Go does not expose that data, so we must read it from procfs. - // https://man7.org/linux/man-pages/man3/getauxval.3.html - av, err := os.Open("/proc/self/auxv") + av, err := newAuxvRuntimeReader() if err != nil { - return 0, fmt.Errorf("opening auxv: %w", err) + return 0, err } + defer av.Close() vdsoAddr, err := vdsoMemoryAddress(av) @@ -51,24 +49,19 @@ func vdsoVersion() (uint32, error) { // vdsoMemoryAddress returns the memory address of the vDSO library // linked into the current process image. r is an io.Reader into an auxv blob. -func vdsoMemoryAddress(r io.Reader) (uint64, error) { - const ( - _AT_NULL = 0 // End of vector - _AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image - ) - +func vdsoMemoryAddress(r auxvPairReader) (uintptr, error) { // Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`, // the address of a page containing the virtual Dynamic Shared Object (vDSO). - aux := struct{ Tag, Val uint64 }{} for { - if err := binary.Read(r, NativeEndian, &aux); err != nil { - return 0, fmt.Errorf("reading auxv entry: %w", err) + tag, value, err := r.ReadAuxvPair() + if err != nil { + return 0, err } - switch aux.Tag { + switch tag { case _AT_SYSINFO_EHDR: - if aux.Val != 0 { - return aux.Val, nil + if value != 0 { + return uintptr(value), nil } return 0, fmt.Errorf("invalid vDSO address in auxv") // _AT_NULL is always the last tag/val pair in the aux vector @@ -117,7 +110,7 @@ func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) { var name string if n.NameSize > 0 { // Read the note name, aligned to 4 bytes. - buf := make([]byte, Align(int(n.NameSize), 4)) + buf := make([]byte, Align(n.NameSize, 4)) if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil { return 0, fmt.Errorf("reading note name: %w", err) } @@ -139,7 +132,7 @@ func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) { } // Discard the note descriptor if it exists but we're not interested in it. - if _, err := io.CopyN(io.Discard, sr, int64(Align(int(n.DescSize), 4))); err != nil { + if _, err := io.CopyN(io.Discard, sr, int64(Align(n.DescSize, 4))); err != nil { return 0, err } } diff --git a/vendor/github.com/cilium/ebpf/internal/version.go b/vendor/github.com/cilium/ebpf/internal/version.go index 370e01e444..acd4650af7 100644 --- a/vendor/github.com/cilium/ebpf/internal/version.go +++ b/vendor/github.com/cilium/ebpf/internal/version.go @@ -15,14 +15,6 @@ const ( MagicKernelVersion = 0xFFFFFFFE ) -var ( - kernelVersion = struct { - once sync.Once - version Version - err error - }{} -) - // A Version in the form Major.Minor.Patch. type Version [3]uint16 @@ -88,16 +80,9 @@ func (v Version) Kernel() uint32 { } // KernelVersion returns the version of the currently running kernel. -func KernelVersion() (Version, error) { - kernelVersion.once.Do(func() { - kernelVersion.version, kernelVersion.err = detectKernelVersion() - }) - - if kernelVersion.err != nil { - return Version{}, kernelVersion.err - } - return kernelVersion.version, nil -} +var KernelVersion = sync.OnceValues(func() (Version, error) { + return detectKernelVersion() +}) // detectKernelVersion returns the version of the running kernel. func detectKernelVersion() (Version, error) { diff --git a/vendor/github.com/cilium/ebpf/link/anchor.go b/vendor/github.com/cilium/ebpf/link/anchor.go new file mode 100644 index 0000000000..1a3b5f7681 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/anchor.go @@ -0,0 +1,137 @@ +package link + +import ( + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +const anchorFlags = sys.BPF_F_REPLACE | + sys.BPF_F_BEFORE | + sys.BPF_F_AFTER | + sys.BPF_F_ID | + sys.BPF_F_LINK_MPROG + +// Anchor is a reference to a link or program. +// +// It is used to describe where an attachment or detachment should take place +// for link types which support multiple attachment. +type Anchor interface { + // anchor returns an fd or ID and a set of flags. + // + // By default fdOrID is taken to reference a program, but BPF_F_LINK_MPROG + // changes this to refer to a link instead. + // + // BPF_F_BEFORE, BPF_F_AFTER, BPF_F_REPLACE modify where a link or program + // is attached. The default behaviour if none of these flags is specified + // matches BPF_F_AFTER. + anchor() (fdOrID, flags uint32, _ error) +} + +type firstAnchor struct{} + +func (firstAnchor) anchor() (fdOrID, flags uint32, _ error) { + return 0, sys.BPF_F_BEFORE, nil +} + +// Head is the position before all other programs or links. +func Head() Anchor { + return firstAnchor{} +} + +type lastAnchor struct{} + +func (lastAnchor) anchor() (fdOrID, flags uint32, _ error) { + return 0, sys.BPF_F_AFTER, nil +} + +// Tail is the position after all other programs or links. +func Tail() Anchor { + return lastAnchor{} +} + +// Before is the position just in front of target. +func BeforeLink(target Link) Anchor { + return anchor{target, sys.BPF_F_BEFORE} +} + +// After is the position just after target. +func AfterLink(target Link) Anchor { + return anchor{target, sys.BPF_F_AFTER} +} + +// Before is the position just in front of target. +func BeforeLinkByID(target ID) Anchor { + return anchor{target, sys.BPF_F_BEFORE} +} + +// After is the position just after target. +func AfterLinkByID(target ID) Anchor { + return anchor{target, sys.BPF_F_AFTER} +} + +// Before is the position just in front of target. +func BeforeProgram(target *ebpf.Program) Anchor { + return anchor{target, sys.BPF_F_BEFORE} +} + +// After is the position just after target. +func AfterProgram(target *ebpf.Program) Anchor { + return anchor{target, sys.BPF_F_AFTER} +} + +// Replace the target itself. +func ReplaceProgram(target *ebpf.Program) Anchor { + return anchor{target, sys.BPF_F_REPLACE} +} + +// Before is the position just in front of target. +func BeforeProgramByID(target ebpf.ProgramID) Anchor { + return anchor{target, sys.BPF_F_BEFORE} +} + +// After is the position just after target. +func AfterProgramByID(target ebpf.ProgramID) Anchor { + return anchor{target, sys.BPF_F_AFTER} +} + +// Replace the target itself. +func ReplaceProgramByID(target ebpf.ProgramID) Anchor { + return anchor{target, sys.BPF_F_REPLACE} +} + +type anchor struct { + target any + position uint32 +} + +func (ap anchor) anchor() (fdOrID, flags uint32, _ error) { + var typeFlag uint32 + switch target := ap.target.(type) { + case *ebpf.Program: + fd := target.FD() + if fd < 0 { + return 0, 0, sys.ErrClosedFd + } + fdOrID = uint32(fd) + typeFlag = 0 + case ebpf.ProgramID: + fdOrID = uint32(target) + typeFlag = sys.BPF_F_ID + case interface{ FD() int }: + fd := target.FD() + if fd < 0 { + return 0, 0, sys.ErrClosedFd + } + fdOrID = uint32(fd) + typeFlag = sys.BPF_F_LINK_MPROG + case ID: + fdOrID = uint32(target) + typeFlag = sys.BPF_F_LINK_MPROG | sys.BPF_F_ID + default: + return 0, 0, fmt.Errorf("invalid target %T", ap.target) + } + + return fdOrID, ap.position | typeFlag, nil +} diff --git a/vendor/github.com/cilium/ebpf/link/cgroup.go b/vendor/github.com/cilium/ebpf/link/cgroup.go index 003b0638e8..79f3d2b7f4 100644 --- a/vendor/github.com/cilium/ebpf/link/cgroup.go +++ b/vendor/github.com/cilium/ebpf/link/cgroup.go @@ -10,10 +10,15 @@ import ( type cgroupAttachFlags uint32 -// cgroup attach flags const ( + // Allow programs attached to sub-cgroups to override the verdict of this + // program. flagAllowOverride cgroupAttachFlags = 1 << iota + // Allow attaching multiple programs to the cgroup. Only works if the cgroup + // has zero or more programs attached using the Multi flag. Implies override. flagAllowMulti + // Set automatically by progAttachCgroup.Update(). Used for updating a + // specific given program attached in multi-mode. flagReplace ) @@ -27,29 +32,39 @@ type CgroupOptions struct { } // AttachCgroup links a BPF program to a cgroup. -func AttachCgroup(opts CgroupOptions) (Link, error) { +// +// If the running kernel doesn't support bpf_link, attempts to emulate its +// semantics using the legacy PROG_ATTACH mechanism. If bpf_link is not +// available, the returned [Link] will not support pinning to bpffs. +// +// If you need more control over attachment flags or the attachment mechanism +// used, look at [RawAttachProgram] and [AttachRawLink] instead. +func AttachCgroup(opts CgroupOptions) (cg Link, err error) { cgroup, err := os.Open(opts.Path) if err != nil { return nil, fmt.Errorf("can't open cgroup: %s", err) } - - clone, err := opts.Program.Clone() - if err != nil { + defer func() { + if _, ok := cg.(*progAttachCgroup); ok { + // Skip closing the cgroup handle if we return a valid progAttachCgroup, + // where the handle is retained to implement Update(). + return + } cgroup.Close() - return nil, err + }() + + cg, err = newLinkCgroup(cgroup, opts.Attach, opts.Program) + if err == nil { + return cg, nil } - var cg Link - cg, err = newLinkCgroup(cgroup, opts.Attach, clone) if errors.Is(err, ErrNotSupported) { - cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowMulti) + cg, err = newProgAttachCgroup(cgroup, opts.Attach, opts.Program, flagAllowMulti) } if errors.Is(err, ErrNotSupported) { - cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowOverride) + cg, err = newProgAttachCgroup(cgroup, opts.Attach, opts.Program, flagAllowOverride) } if err != nil { - cgroup.Close() - clone.Close() return nil, err } @@ -67,6 +82,8 @@ var _ Link = (*progAttachCgroup)(nil) func (cg *progAttachCgroup) isLink() {} +// newProgAttachCgroup attaches prog to cgroup using BPF_PROG_ATTACH. +// cgroup and prog are retained by [progAttachCgroup]. func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program, flags cgroupAttachFlags) (*progAttachCgroup, error) { if flags&flagAllowMulti > 0 { if err := haveProgAttachReplace(); err != nil { @@ -74,17 +91,24 @@ func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Pro } } - err := RawAttachProgram(RawAttachProgramOptions{ + // Use a program handle that cannot be closed by the caller. + clone, err := prog.Clone() + if err != nil { + return nil, err + } + + err = RawAttachProgram(RawAttachProgramOptions{ Target: int(cgroup.Fd()), - Program: prog, + Program: clone, Flags: uint32(flags), Attach: attach, }) if err != nil { + clone.Close() return nil, fmt.Errorf("cgroup: %w", err) } - return &progAttachCgroup{cgroup, prog, attach, flags}, nil + return &progAttachCgroup{cgroup, clone, attach, flags}, nil } func (cg *progAttachCgroup) Close() error { @@ -119,8 +143,7 @@ func (cg *progAttachCgroup) Update(prog *ebpf.Program) error { // Atomically replacing multiple programs requires at least // 5.5 (commit 7dd68b3279f17921 "bpf: Support replacing cgroup-bpf // program in MULTI mode") - args.Flags |= uint32(flagReplace) - args.Replace = cg.current + args.Anchor = ReplaceProgram(cg.current) } if err := RawAttachProgram(args); err != nil { @@ -138,7 +161,7 @@ func (cg *progAttachCgroup) Pin(string) error { } func (cg *progAttachCgroup) Unpin() error { - return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported) + return fmt.Errorf("can't unpin cgroup: %w", ErrNotSupported) } func (cg *progAttachCgroup) Info() (*Info, error) { @@ -151,6 +174,7 @@ type linkCgroup struct { var _ Link = (*linkCgroup)(nil) +// newLinkCgroup attaches prog to cgroup using BPF_LINK_CREATE. func newLinkCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program) (*linkCgroup, error) { link, err := AttachRawLink(RawLinkOptions{ Target: int(cgroup.Fd()), diff --git a/vendor/github.com/cilium/ebpf/link/iter.go b/vendor/github.com/cilium/ebpf/link/iter.go index d2b32ef331..0a39faef88 100644 --- a/vendor/github.com/cilium/ebpf/link/iter.go +++ b/vendor/github.com/cilium/ebpf/link/iter.go @@ -25,10 +25,6 @@ type IterOptions struct { // AttachIter attaches a BPF seq_file iterator. func AttachIter(opts IterOptions) (*Iter, error) { - if err := haveBPFLink(); err != nil { - return nil, err - } - progFd := opts.Program.FD() if progFd < 0 { return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd) @@ -52,6 +48,9 @@ func AttachIter(opts IterOptions) (*Iter, error) { fd, err := sys.LinkCreateIter(&attr) if err != nil { + if haveFeatErr := haveBPFLink(); haveFeatErr != nil { + return nil, haveFeatErr + } return nil, fmt.Errorf("can't link iterator: %w", err) } diff --git a/vendor/github.com/cilium/ebpf/link/kprobe.go b/vendor/github.com/cilium/ebpf/link/kprobe.go index fdf622a0c0..b54ca90853 100644 --- a/vendor/github.com/cilium/ebpf/link/kprobe.go +++ b/vendor/github.com/cilium/ebpf/link/kprobe.go @@ -1,42 +1,20 @@ package link import ( - "bytes" - "crypto/rand" "errors" "fmt" "os" - "path/filepath" "runtime" "strings" - "sync" - "syscall" "unsafe" "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/tracefs" "github.com/cilium/ebpf/internal/unix" ) -var ( - kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events") - - kprobeRetprobeBit = struct { - once sync.Once - value uint64 - err error - }{} -) - -type probeType uint8 - -type probeArgs struct { - symbol, group, path string - offset, refCtrOffset, cookie uint64 - pid int - ret bool -} - // KprobeOptions defines additional parameters that will be used // when loading Kprobes. type KprobeOptions struct { @@ -49,45 +27,23 @@ type KprobeOptions struct { // Can be used to insert kprobes at arbitrary offsets in kernel functions, // e.g. in places where functions have been inlined. Offset uint64 + // Increase the maximum number of concurrent invocations of a kretprobe. + // Required when tracing some long running functions in the kernel. + // + // Deprecated: this setting forces the use of an outdated kernel API and is not portable + // across kernel versions. + RetprobeMaxActive int + // Prefix used for the event name if the kprobe must be attached using tracefs. + // The group name will be formatted as `_`. + // The default empty string is equivalent to "ebpf" as the prefix. + TraceFSPrefix string } -const ( - kprobeType probeType = iota - uprobeType -) - -func (pt probeType) String() string { - if pt == kprobeType { - return "kprobe" - } - return "uprobe" -} - -func (pt probeType) EventsPath() string { - if pt == kprobeType { - return kprobeEventsPath - } - return uprobeEventsPath -} - -func (pt probeType) PerfEventType(ret bool) perfEventType { - if pt == kprobeType { - if ret { - return kretprobeEvent - } - return kprobeEvent - } - if ret { - return uretprobeEvent - } - return uprobeEvent -} - -func (pt probeType) RetprobeBit() (uint64, error) { - if pt == kprobeType { - return kretprobeBit() +func (ko *KprobeOptions) cookie() uint64 { + if ko == nil { + return 0 } - return uretprobeBit() + return ko.Cookie } // Kprobe attaches the given eBPF program to a perf event that fires when the @@ -99,13 +55,17 @@ func (pt probeType) RetprobeBit() (uint64, error) { // Losing the reference to the resulting Link (kp) will close the Kprobe // and prevent further execution of prog. The Link must be Closed during // program shutdown to avoid leaking system resources. +// +// If attaching to symbol fails, automatically retries with the running +// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls +// in a portable fashion. func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { k, err := kprobe(symbol, prog, opts, false) if err != nil { return nil, err } - lnk, err := attachPerfEvent(k, prog) + lnk, err := attachPerfEvent(k, prog, opts.cookie()) if err != nil { k.Close() return nil, err @@ -123,13 +83,20 @@ func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error // Losing the reference to the resulting Link (kp) will close the Kretprobe // and prevent further execution of prog. The Link must be Closed during // program shutdown to avoid leaking system resources. +// +// If attaching to symbol fails, automatically retries with the running +// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls +// in a portable fashion. +// +// On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol +// incorrectly returns unix.EINVAL instead of os.ErrNotExist. func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { k, err := kprobe(symbol, prog, opts, true) if err != nil { return nil, err } - lnk, err := attachPerfEvent(k, prog) + lnk, err := attachPerfEvent(k, prog, opts.cookie()) if err != nil { k.Close() return nil, err @@ -181,50 +148,51 @@ func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (* return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput) } - args := probeArgs{ - pid: perfAllThreads, - symbol: symbol, - ret: ret, + args := tracefs.ProbeArgs{ + Type: tracefs.Kprobe, + Pid: perfAllThreads, + Symbol: symbol, + Ret: ret, } if opts != nil { - args.cookie = opts.Cookie - args.offset = opts.Offset + args.RetprobeMaxActive = opts.RetprobeMaxActive + args.Cookie = opts.Cookie + args.Offset = opts.Offset + args.Group = opts.TraceFSPrefix } // Use kprobe PMU if the kernel has it available. - tp, err := pmuKprobe(args) - if errors.Is(err, os.ErrNotExist) { - args.symbol = platformPrefix(symbol) - tp, err = pmuKprobe(args) + tp, err := pmuProbe(args) + if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { + if prefix := internal.PlatformPrefix(); prefix != "" { + args.Symbol = prefix + symbol + tp, err = pmuProbe(args) + } } if err == nil { return tp, nil } if err != nil && !errors.Is(err, ErrNotSupported) { - return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err) + return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err) } // Use tracefs if kprobe PMU is missing. - args.symbol = symbol - tp, err = tracefsKprobe(args) - if errors.Is(err, os.ErrNotExist) { - args.symbol = platformPrefix(symbol) - tp, err = tracefsKprobe(args) + args.Symbol = symbol + tp, err = tracefsProbe(args) + if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { + if prefix := internal.PlatformPrefix(); prefix != "" { + args.Symbol = prefix + symbol + tp, err = tracefsProbe(args) + } } if err != nil { - return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err) + return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err) } return tp, nil } -// pmuKprobe opens a perf event based on the kprobe PMU. -// Returns os.ErrNotExist if the given symbol does not exist in the kernel. -func pmuKprobe(args probeArgs) (*perfEvent, error) { - return pmuProbe(kprobeType, args) -} - // pmuProbe opens a perf event based on a Performance Monitoring Unit. // // Requires at least a 4.17 kernel. @@ -232,17 +200,25 @@ func pmuKprobe(args probeArgs) (*perfEvent, error) { // 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU" // // Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU -func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) { +func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) { // Getting the PMU type will fail if the kernel doesn't support // the perf_[k,u]probe PMU. - et, err := getPMUEventType(typ) + eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type") + if errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported) + } if err != nil { return nil, err } + // Use tracefs if we want to set kretprobe's retprobeMaxActive. + if args.RetprobeMaxActive != 0 { + return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported) + } + var config uint64 - if args.ret { - bit, err := typ.RetprobeBit() + if args.Ret { + bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe") if err != nil { return nil, err } @@ -250,75 +226,81 @@ func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) { } var ( - attr unix.PerfEventAttr - sp unsafe.Pointer + attr unix.PerfEventAttr + sp unsafe.Pointer + token string ) - switch typ { - case kprobeType: + switch args.Type { + case tracefs.Kprobe: // Create a pointer to a NUL-terminated string for the kernel. - sp, err = unsafeStringPtr(args.symbol) + sp, err = unsafeStringPtr(args.Symbol) if err != nil { return nil, err } + token = tracefs.KprobeToken(args) + attr = unix.PerfEventAttr{ // The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1, // since it added the config2 (Ext2) field. Use Ext2 as probe_offset. Size: unix.PERF_ATTR_SIZE_VER1, - Type: uint32(et), // PMU event type read from sysfs + Type: uint32(eventType), // PMU event type read from sysfs Ext1: uint64(uintptr(sp)), // Kernel symbol to trace - Ext2: args.offset, // Kernel symbol offset + Ext2: args.Offset, // Kernel symbol offset Config: config, // Retprobe flag } - case uprobeType: - sp, err = unsafeStringPtr(args.path) + case tracefs.Uprobe: + sp, err = unsafeStringPtr(args.Path) if err != nil { return nil, err } - if args.refCtrOffset != 0 { - config |= args.refCtrOffset << uprobeRefCtrOffsetShift + if args.RefCtrOffset != 0 { + config |= args.RefCtrOffset << uprobeRefCtrOffsetShift } + token = tracefs.UprobeToken(args) + attr = unix.PerfEventAttr{ // The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1, // since it added the config2 (Ext2) field. The Size field controls the // size of the internal buffer the kernel allocates for reading the // perf_event_attr argument from userspace. Size: unix.PERF_ATTR_SIZE_VER1, - Type: uint32(et), // PMU event type read from sysfs + Type: uint32(eventType), // PMU event type read from sysfs Ext1: uint64(uintptr(sp)), // Uprobe path - Ext2: args.offset, // Uprobe offset + Ext2: args.Offset, // Uprobe offset Config: config, // RefCtrOffset, Retprobe flag } } - rawFd, err := unix.PerfEventOpen(&attr, args.pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) + rawFd, err := unix.PerfEventOpen(&attr, args.Pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) // On some old kernels, kprobe PMU doesn't allow `.` in symbol names and // return -EINVAL. Return ErrNotSupported to allow falling back to tracefs. // https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343 - if errors.Is(err, unix.EINVAL) && strings.Contains(args.symbol, ".") { - return nil, fmt.Errorf("symbol '%s+%#x': older kernels don't accept dots: %w", args.symbol, args.offset, ErrNotSupported) + if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") { + return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported) } // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL - // when trying to create a kretprobe for a missing symbol. Make sure ENOENT - // is returned to the caller. - if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { - return nil, fmt.Errorf("symbol '%s+%#x' not found: %w", args.symbol, args.offset, os.ErrNotExist) + // when trying to create a retprobe for a missing symbol. + if errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("token %s: not found: %w", token, err) } - // Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved - // to an invalid insn boundary. - if errors.Is(err, syscall.EILSEQ) { - return nil, fmt.Errorf("symbol '%s+%#x' not found (bad insn boundary): %w", args.symbol, args.offset, os.ErrNotExist) + // Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved + // to an invalid insn boundary. The exact conditions that trigger this error are + // arch specific however. + if errors.Is(err, unix.EILSEQ) { + return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) } // Since at least commit cb9a19fe4aa51, ENOTSUPP is returned // when attempting to set a uprobe on a trap instruction. - if errors.Is(err, unix.ENOTSUPP) { - return nil, fmt.Errorf("failed setting uprobe on offset %#x (possible trap insn): %w", args.offset, err) + if errors.Is(err, sys.ENOTSUPP) { + return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err) } + if err != nil { - return nil, fmt.Errorf("opening perf event: %w", err) + return nil, fmt.Errorf("token %s: opening perf event: %w", token, err) } // Ensure the string pointer is not collected before PerfEventOpen returns. @@ -330,18 +312,7 @@ func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) { } // Kernel has perf_[k,u]probe PMU available, initialize perf event. - return &perfEvent{ - typ: typ.PerfEventType(args.ret), - name: args.symbol, - pmuID: et, - cookie: args.cookie, - fd: fd, - }, nil -} - -// tracefsKprobe creates a Kprobe tracefs entry. -func tracefsKprobe(args probeArgs) (*perfEvent, error) { - return tracefsProbe(kprobeType, args) + return newPerfEvent(fd, nil), nil } // tracefsProbe creates a trace event by writing an entry to /[k,u]probe_events. @@ -350,219 +321,37 @@ func tracefsKprobe(args probeArgs) (*perfEvent, error) { // Path and offset are only set in the case of uprobe(s) and are used to set // the executable/library path on the filesystem and the offset where the probe is inserted. // A perf event is then opened on the newly-created trace event and returned to the caller. -func tracefsProbe(typ probeType, args probeArgs) (_ *perfEvent, err error) { +func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) { + groupPrefix := "ebpf" + if args.Group != "" { + groupPrefix = args.Group + } + // Generate a random string for each trace event we attempt to create. // This value is used as the 'group' token in tracefs to allow creating // multiple kprobe trace events with the same name. - group, err := randomGroup("ebpf") + group, err := tracefs.RandomGroup(groupPrefix) if err != nil { return nil, fmt.Errorf("randomizing group name: %w", err) } - args.group = group - - // Before attempting to create a trace event through tracefs, - // check if an event with the same group and name already exists. - // Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate - // entry, so we need to rely on reads for detecting uniqueness. - _, err = getTraceEventID(group, args.symbol) - if err == nil { - return nil, fmt.Errorf("trace event already exists: %s/%s", group, args.symbol) - } - if err != nil && !errors.Is(err, os.ErrNotExist) { - return nil, fmt.Errorf("checking trace event %s/%s: %w", group, args.symbol, err) - } + args.Group = group // Create the [k,u]probe trace event using tracefs. - if err := createTraceFSProbeEvent(typ, args); err != nil { - return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) - } - defer func() { - if err != nil { - // Make sure we clean up the created tracefs event when we return error. - // If a livepatch handler is already active on the symbol, the write to - // tracefs will succeed, a trace event will show up, but creating the - // perf event will fail with EBUSY. - _ = closeTraceFSProbeEvent(typ, args.group, args.symbol) - } - }() - - // Get the newly-created trace event's id. - tid, err := getTraceEventID(group, args.symbol) + evt, err := tracefs.NewEvent(args) if err != nil { - return nil, fmt.Errorf("getting trace event id: %w", err) + return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) } // Kprobes are ephemeral tracepoints and share the same perf event type. - fd, err := openTracepointPerfEvent(tid, args.pid) + fd, err := openTracepointPerfEvent(evt.ID(), args.Pid) if err != nil { + // Make sure we clean up the created tracefs event when we return error. + // If a livepatch handler is already active on the symbol, the write to + // tracefs will succeed, a trace event will show up, but creating the + // perf event will fail with EBUSY. + _ = evt.Close() return nil, err } - return &perfEvent{ - typ: typ.PerfEventType(args.ret), - group: group, - name: args.symbol, - tracefsID: tid, - cookie: args.cookie, - fd: fd, - }, nil -} - -// createTraceFSProbeEvent creates a new ephemeral trace event by writing to -// /[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid -// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist -// if a probe with the same group and symbol already exists. -func createTraceFSProbeEvent(typ probeType, args probeArgs) error { - // Open the kprobe_events file in tracefs. - f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) - if err != nil { - return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err) - } - defer f.Close() - - var pe, token string - switch typ { - case kprobeType: - // The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt): - // p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe - // r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe - // -:[GRP/]EVENT : Clear a probe - // - // Some examples: - // r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy - // p:ebpf_5678/p_my_kprobe __x64_sys_execve - // - // Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the - // kernel default to NR_CPUS. This is desired in most eBPF cases since - // subsampling or rate limiting logic can be more accurately implemented in - // the eBPF program itself. - // See Documentation/kprobes.txt for more details. - token = kprobeToken(args) - pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, sanitizeSymbol(args.symbol), token) - case uprobeType: - // The uprobe_events syntax is as follows: - // p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe - // r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe - // -:[GRP/]EVENT : Clear a probe - // - // Some examples: - // r:ebpf_1234/readline /bin/bash:0x12345 - // p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123) - // - // See Documentation/trace/uprobetracer.txt for more details. - token = uprobeToken(args) - pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, args.symbol, token) - } - _, err = f.WriteString(pe) - // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL - // when trying to create a kretprobe for a missing symbol. Make sure ENOENT - // is returned to the caller. - // EINVAL is also returned on pre-5.2 kernels when the `SYM[+offs]` token - // is resolved to an invalid insn boundary. - if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { - return fmt.Errorf("token %s: %w", token, os.ErrNotExist) - } - // Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved - // to an invalid insn boundary. - if errors.Is(err, syscall.EILSEQ) { - return fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) - } - // ERANGE is returned when the `SYM[+offs]` token is too big and cannot - // be resolved. - if errors.Is(err, syscall.ERANGE) { - return fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist) - } - if err != nil { - return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) - } - - return nil -} - -// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol -// from /[k,u]probe_events. -func closeTraceFSProbeEvent(typ probeType, group, symbol string) error { - f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) - if err != nil { - return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err) - } - defer f.Close() - - // See [k,u]probe_events syntax above. The probe type does not need to be specified - // for removals. - pe := fmt.Sprintf("-:%s/%s", group, sanitizeSymbol(symbol)) - if _, err = f.WriteString(pe); err != nil { - return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) - } - - return nil -} - -// randomGroup generates a pseudorandom string for use as a tracefs group name. -// Returns an error when the output string would exceed 63 characters (kernel -// limitation), when rand.Read() fails or when prefix contains characters not -// allowed by isValidTraceID. -func randomGroup(prefix string) (string, error) { - if !isValidTraceID(prefix) { - return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput) - } - - b := make([]byte, 8) - if _, err := rand.Read(b); err != nil { - return "", fmt.Errorf("reading random bytes: %w", err) - } - - group := fmt.Sprintf("%s_%x", prefix, b) - if len(group) > 63 { - return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput) - } - - return group, nil -} - -func probePrefix(ret bool) string { - if ret { - return "r" - } - return "p" -} - -// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit -// from /sys/bus/event_source/devices//format/retprobe. -func determineRetprobeBit(typ probeType) (uint64, error) { - p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe") - - data, err := os.ReadFile(p) - if err != nil { - return 0, err - } - - var rp uint64 - n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp) - if err != nil { - return 0, fmt.Errorf("parse retprobe bit: %w", err) - } - if n != 1 { - return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n) - } - - return rp, nil -} - -func kretprobeBit() (uint64, error) { - kprobeRetprobeBit.once.Do(func() { - kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType) - }) - return kprobeRetprobeBit.value, kprobeRetprobeBit.err -} - -// kprobeToken creates the SYM[+offs] token for the tracefs api. -func kprobeToken(args probeArgs) string { - po := args.symbol - - if args.offset != 0 { - po += fmt.Sprintf("+%#x", args.offset) - } - - return po + return newPerfEvent(fd, evt), nil } diff --git a/vendor/github.com/cilium/ebpf/link/kprobe_multi.go b/vendor/github.com/cilium/ebpf/link/kprobe_multi.go new file mode 100644 index 0000000000..4d364d80eb --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/kprobe_multi.go @@ -0,0 +1,180 @@ +package link + +import ( + "errors" + "fmt" + "os" + "unsafe" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// KprobeMultiOptions defines additional parameters that will be used +// when opening a KprobeMulti Link. +type KprobeMultiOptions struct { + // Symbols takes a list of kernel symbol names to attach an ebpf program to. + // + // Mutually exclusive with Addresses. + Symbols []string + + // Addresses takes a list of kernel symbol addresses in case they can not + // be referred to by name. + // + // Note that only start addresses can be specified, since the fprobe API + // limits the attach point to the function entry or return. + // + // Mutually exclusive with Symbols. + Addresses []uintptr + + // Cookies specifies arbitrary values that can be fetched from an eBPF + // program via `bpf_get_attach_cookie()`. + // + // If set, its length should be equal to the length of Symbols or Addresses. + // Each Cookie is assigned to the Symbol or Address specified at the + // corresponding slice index. + Cookies []uint64 +} + +// KprobeMulti attaches the given eBPF program to the entry point of a given set +// of kernel symbols. +// +// The difference with Kprobe() is that multi-kprobe accomplishes this in a +// single system call, making it significantly faster than attaching many +// probes one at a time. +// +// Requires at least Linux 5.18. +func KprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions) (Link, error) { + return kprobeMulti(prog, opts, 0) +} + +// KretprobeMulti attaches the given eBPF program to the return point of a given +// set of kernel symbols. +// +// The difference with Kretprobe() is that multi-kprobe accomplishes this in a +// single system call, making it significantly faster than attaching many +// probes one at a time. +// +// Requires at least Linux 5.18. +func KretprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions) (Link, error) { + return kprobeMulti(prog, opts, unix.BPF_F_KPROBE_MULTI_RETURN) +} + +func kprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions, flags uint32) (Link, error) { + if prog == nil { + return nil, errors.New("cannot attach a nil program") + } + + syms := uint32(len(opts.Symbols)) + addrs := uint32(len(opts.Addresses)) + cookies := uint32(len(opts.Cookies)) + + if syms == 0 && addrs == 0 { + return nil, fmt.Errorf("one of Symbols or Addresses is required: %w", errInvalidInput) + } + if syms != 0 && addrs != 0 { + return nil, fmt.Errorf("Symbols and Addresses are mutually exclusive: %w", errInvalidInput) + } + if cookies > 0 && cookies != syms && cookies != addrs { + return nil, fmt.Errorf("Cookies must be exactly Symbols or Addresses in length: %w", errInvalidInput) + } + + attr := &sys.LinkCreateKprobeMultiAttr{ + ProgFd: uint32(prog.FD()), + AttachType: sys.BPF_TRACE_KPROBE_MULTI, + KprobeMultiFlags: flags, + } + + switch { + case syms != 0: + attr.Count = syms + attr.Syms = sys.NewStringSlicePointer(opts.Symbols) + + case addrs != 0: + attr.Count = addrs + attr.Addrs = sys.NewPointer(unsafe.Pointer(&opts.Addresses[0])) + } + + if cookies != 0 { + attr.Cookies = sys.NewPointer(unsafe.Pointer(&opts.Cookies[0])) + } + + fd, err := sys.LinkCreateKprobeMulti(attr) + if errors.Is(err, unix.ESRCH) { + return nil, fmt.Errorf("couldn't find one or more symbols: %w", os.ErrNotExist) + } + if errors.Is(err, unix.EINVAL) { + return nil, fmt.Errorf("%w (missing kernel symbol or prog's AttachType not AttachTraceKprobeMulti?)", err) + } + + if err != nil { + if haveFeatErr := haveBPFLinkKprobeMulti(); haveFeatErr != nil { + return nil, haveFeatErr + } + return nil, err + } + + return &kprobeMultiLink{RawLink{fd, ""}}, nil +} + +type kprobeMultiLink struct { + RawLink +} + +var _ Link = (*kprobeMultiLink)(nil) + +func (kml *kprobeMultiLink) Update(prog *ebpf.Program) error { + return fmt.Errorf("update kprobe_multi: %w", ErrNotSupported) +} + +func (kml *kprobeMultiLink) Pin(string) error { + return fmt.Errorf("pin kprobe_multi: %w", ErrNotSupported) +} + +func (kml *kprobeMultiLink) Unpin() error { + return fmt.Errorf("unpin kprobe_multi: %w", ErrNotSupported) +} + +var haveBPFLinkKprobeMulti = internal.NewFeatureTest("bpf_link_kprobe_multi", "5.18", func() error { + prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ + Name: "probe_kpm_link", + Type: ebpf.Kprobe, + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + }, + AttachType: ebpf.AttachTraceKprobeMulti, + License: "MIT", + }) + if errors.Is(err, unix.E2BIG) { + // Kernel doesn't support AttachType field. + return internal.ErrNotSupported + } + if err != nil { + return err + } + defer prog.Close() + + fd, err := sys.LinkCreateKprobeMulti(&sys.LinkCreateKprobeMultiAttr{ + ProgFd: uint32(prog.FD()), + AttachType: sys.BPF_TRACE_KPROBE_MULTI, + Count: 1, + Syms: sys.NewStringSlicePointer([]string{"vprintk"}), + }) + switch { + case errors.Is(err, unix.EINVAL): + return internal.ErrNotSupported + // If CONFIG_FPROBE isn't set. + case errors.Is(err, unix.EOPNOTSUPP): + return internal.ErrNotSupported + case err != nil: + return err + } + + fd.Close() + + return nil +}) diff --git a/vendor/github.com/cilium/ebpf/link/link.go b/vendor/github.com/cilium/ebpf/link/link.go index 067d0101aa..81428568f8 100644 --- a/vendor/github.com/cilium/ebpf/link/link.go +++ b/vendor/github.com/cilium/ebpf/link/link.go @@ -1,9 +1,9 @@ package link import ( - "bytes" - "encoding/binary" + "errors" "fmt" + "os" "github.com/cilium/ebpf" "github.com/cilium/ebpf/btf" @@ -46,6 +46,38 @@ type Link interface { isLink() } +// NewLinkFromFD creates a link from a raw fd. +// +// Deprecated: use [NewFromFD] instead. +func NewLinkFromFD(fd int) (Link, error) { + return NewFromFD(fd) +} + +// NewFromFD creates a link from a raw fd. +// +// You should not use fd after calling this function. +func NewFromFD(fd int) (Link, error) { + sysFD, err := sys.NewFD(fd) + if err != nil { + return nil, err + } + + return wrapRawLink(&RawLink{fd: sysFD}) +} + +// NewFromID returns the link associated with the given id. +// +// Returns ErrNotExist if there is no link with the given id. +func NewFromID(id ID) (Link, error) { + getFdAttr := &sys.LinkGetFdByIdAttr{Id: id} + fd, err := sys.LinkGetFdById(getFdAttr) + if err != nil { + return nil, fmt.Errorf("get link fd from ID %d: %w", id, err) + } + + return wrapRawLink(&RawLink{fd, ""}) +} + // LoadPinnedLink loads a link that was persisted into a bpffs. func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) { raw, err := loadPinnedRawLink(fileName, opts) @@ -59,10 +91,15 @@ func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) { // wrap a RawLink in a more specific type if possible. // // The function takes ownership of raw and closes it on error. -func wrapRawLink(raw *RawLink) (Link, error) { +func wrapRawLink(raw *RawLink) (_ Link, err error) { + defer func() { + if err != nil { + raw.Close() + } + }() + info, err := raw.Info() if err != nil { - raw.Close() return nil, err } @@ -77,6 +114,18 @@ func wrapRawLink(raw *RawLink) (Link, error) { return &Iter{*raw}, nil case NetNsType: return &NetNsLink{*raw}, nil + case KprobeMultiType: + return &kprobeMultiLink{*raw}, nil + case UprobeMultiType: + return &uprobeMultiLink{*raw}, nil + case PerfEventType: + return nil, fmt.Errorf("recovering perf event fd: %w", ErrNotSupported) + case TCXType: + return &tcxLink{*raw}, nil + case NetfilterType: + return &netfilterLink{*raw}, nil + case NetkitType: + return &netkitLink{*raw}, nil default: return raw, nil } @@ -107,10 +156,85 @@ type Info struct { extra interface{} } -type TracingInfo sys.TracingLinkInfo -type CgroupInfo sys.CgroupLinkInfo -type NetNsInfo sys.NetNsLinkInfo -type XDPInfo sys.XDPLinkInfo +type TracingInfo struct { + AttachType sys.AttachType + TargetObjId uint32 + TargetBtfId sys.TypeID +} + +type CgroupInfo struct { + CgroupId uint64 + AttachType sys.AttachType + _ [4]byte +} + +type NetNsInfo struct { + NetnsIno uint32 + AttachType sys.AttachType +} + +type TCXInfo struct { + Ifindex uint32 + AttachType sys.AttachType +} + +type XDPInfo struct { + Ifindex uint32 +} + +type NetfilterInfo struct { + Pf uint32 + Hooknum uint32 + Priority int32 + Flags uint32 +} + +type NetkitInfo struct { + Ifindex uint32 + AttachType sys.AttachType +} + +type KprobeMultiInfo struct { + count uint32 + flags uint32 + missed uint64 +} + +// AddressCount is the number of addresses hooked by the kprobe. +func (kpm *KprobeMultiInfo) AddressCount() (uint32, bool) { + return kpm.count, kpm.count > 0 +} + +func (kpm *KprobeMultiInfo) Flags() (uint32, bool) { + return kpm.flags, kpm.count > 0 +} + +func (kpm *KprobeMultiInfo) Missed() (uint64, bool) { + return kpm.missed, kpm.count > 0 +} + +type PerfEventInfo struct { + Type sys.PerfEventType + extra interface{} +} + +func (r *PerfEventInfo) Kprobe() *KprobeInfo { + e, _ := r.extra.(*KprobeInfo) + return e +} + +type KprobeInfo struct { + address uint64 + missed uint64 +} + +func (kp *KprobeInfo) Address() (uint64, bool) { + return kp.address, kp.address > 0 +} + +func (kp *KprobeInfo) Missed() (uint64, bool) { + return kp.missed, kp.address > 0 +} // Tracing returns tracing type-specific link info. // @@ -136,7 +260,7 @@ func (r Info) NetNs() *NetNsInfo { return e } -// ExtraNetNs returns XDP type-specific link info. +// XDP returns XDP type-specific link info. // // Returns nil if the type-specific link info isn't available. func (r Info) XDP() *XDPInfo { @@ -144,6 +268,46 @@ func (r Info) XDP() *XDPInfo { return e } +// TCX returns TCX type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) TCX() *TCXInfo { + e, _ := r.extra.(*TCXInfo) + return e +} + +// Netfilter returns netfilter type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) Netfilter() *NetfilterInfo { + e, _ := r.extra.(*NetfilterInfo) + return e +} + +// Netkit returns netkit type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) Netkit() *NetkitInfo { + e, _ := r.extra.(*NetkitInfo) + return e +} + +// KprobeMulti returns kprobe-multi type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) KprobeMulti() *KprobeMultiInfo { + e, _ := r.extra.(*KprobeMultiInfo) + return e +} + +// PerfEvent returns perf-event type-specific link info. +// +// Returns nil if the type-specific link info isn't available. +func (r Info) PerfEvent() *PerfEventInfo { + e, _ := r.extra.(*PerfEventInfo) + return e +} + // RawLink is the low-level API to bpf_link. // // You should consider using the higher level interfaces in this @@ -172,12 +336,12 @@ func AttachRawLink(opts RawLinkOptions) (*RawLink, error) { TargetFd: uint32(opts.Target), ProgFd: uint32(progFd), AttachType: sys.AttachType(opts.Attach), - TargetBtfId: uint32(opts.BTF), + TargetBtfId: opts.BTF, Flags: opts.Flags, } fd, err := sys.LinkCreate(&attr) if err != nil { - return nil, fmt.Errorf("can't create link: %s", err) + return nil, fmt.Errorf("create link: %w", err) } return &RawLink{fd, ""}, nil @@ -230,6 +394,11 @@ func (l *RawLink) Unpin() error { return nil } +// IsPinned returns true if the Link has a non-empty pinned path. +func (l *RawLink) IsPinned() bool { + return l.pinnedPath != "" +} + // Update implements the Link interface. func (l *RawLink) Update(new *ebpf.Program) error { return l.UpdateArgs(RawLinkUpdateOptions{ @@ -279,29 +448,107 @@ func (l *RawLink) Info() (*Info, error) { var extra interface{} switch info.Type { case CgroupType: - extra = &CgroupInfo{} - case IterType: - // not supported + var cgroupInfo sys.CgroupLinkInfo + if err := sys.ObjInfo(l.fd, &cgroupInfo); err != nil { + return nil, fmt.Errorf("cgroup link info: %s", err) + } + extra = &CgroupInfo{ + CgroupId: cgroupInfo.CgroupId, + AttachType: cgroupInfo.AttachType, + } case NetNsType: - extra = &NetNsInfo{} - case RawTracepointType: - // not supported + var netnsInfo sys.NetNsLinkInfo + if err := sys.ObjInfo(l.fd, &netnsInfo); err != nil { + return nil, fmt.Errorf("netns link info: %s", err) + } + extra = &NetNsInfo{ + NetnsIno: netnsInfo.NetnsIno, + AttachType: netnsInfo.AttachType, + } case TracingType: - extra = &TracingInfo{} + var tracingInfo sys.TracingLinkInfo + if err := sys.ObjInfo(l.fd, &tracingInfo); err != nil { + return nil, fmt.Errorf("tracing link info: %s", err) + } + extra = &TracingInfo{ + TargetObjId: tracingInfo.TargetObjId, + TargetBtfId: tracingInfo.TargetBtfId, + AttachType: tracingInfo.AttachType, + } case XDPType: - extra = &XDPInfo{} + var xdpInfo sys.XDPLinkInfo + if err := sys.ObjInfo(l.fd, &xdpInfo); err != nil { + return nil, fmt.Errorf("xdp link info: %s", err) + } + extra = &XDPInfo{ + Ifindex: xdpInfo.Ifindex, + } + case RawTracepointType, IterType, UprobeMultiType: + // Extra metadata not supported. + case TCXType: + var tcxInfo sys.TcxLinkInfo + if err := sys.ObjInfo(l.fd, &tcxInfo); err != nil { + return nil, fmt.Errorf("tcx link info: %s", err) + } + extra = &TCXInfo{ + Ifindex: tcxInfo.Ifindex, + AttachType: tcxInfo.AttachType, + } + case NetfilterType: + var netfilterInfo sys.NetfilterLinkInfo + if err := sys.ObjInfo(l.fd, &netfilterInfo); err != nil { + return nil, fmt.Errorf("netfilter link info: %s", err) + } + extra = &NetfilterInfo{ + Pf: netfilterInfo.Pf, + Hooknum: netfilterInfo.Hooknum, + Priority: netfilterInfo.Priority, + Flags: netfilterInfo.Flags, + } + case NetkitType: + var netkitInfo sys.NetkitLinkInfo + if err := sys.ObjInfo(l.fd, &netkitInfo); err != nil { + return nil, fmt.Errorf("tcx link info: %s", err) + } + extra = &NetkitInfo{ + Ifindex: netkitInfo.Ifindex, + AttachType: netkitInfo.AttachType, + } + case KprobeMultiType: + var kprobeMultiInfo sys.KprobeMultiLinkInfo + if err := sys.ObjInfo(l.fd, &kprobeMultiInfo); err != nil { + return nil, fmt.Errorf("kprobe multi link info: %s", err) + } + extra = &KprobeMultiInfo{ + count: kprobeMultiInfo.Count, + flags: kprobeMultiInfo.Flags, + missed: kprobeMultiInfo.Missed, + } case PerfEventType: - // no extra - default: - return nil, fmt.Errorf("unknown link info type: %d", info.Type) - } + var perfEventInfo sys.PerfEventLinkInfo + if err := sys.ObjInfo(l.fd, &perfEventInfo); err != nil { + return nil, fmt.Errorf("perf event link info: %s", err) + } - if info.Type != RawTracepointType && info.Type != IterType && info.Type != PerfEventType { - buf := bytes.NewReader(info.Extra[:]) - err := binary.Read(buf, internal.NativeEndian, extra) - if err != nil { - return nil, fmt.Errorf("can not read extra link info: %w", err) + var extra2 interface{} + switch perfEventInfo.PerfEventType { + case sys.BPF_PERF_EVENT_KPROBE, sys.BPF_PERF_EVENT_KRETPROBE: + var kprobeInfo sys.KprobeLinkInfo + if err := sys.ObjInfo(l.fd, &kprobeInfo); err != nil { + return nil, fmt.Errorf("kprobe multi link info: %s", err) + } + extra2 = &KprobeInfo{ + address: kprobeInfo.Addr, + missed: kprobeInfo.Missed, + } + } + + extra = &PerfEventInfo{ + Type: perfEventInfo.PerfEventType, + extra: extra2, } + default: + return nil, fmt.Errorf("unknown link info type: %d", info.Type) } return &Info{ @@ -311,3 +558,74 @@ func (l *RawLink) Info() (*Info, error) { extra, }, nil } + +// Iterator allows iterating over links attached into the kernel. +type Iterator struct { + // The ID of the current link. Only valid after a call to Next + ID ID + // The current link. Only valid until a call to Next. + // See Take if you want to retain the link. + Link Link + err error +} + +// Next retrieves the next link. +// +// Returns true if another link was found. Call [Iterator.Err] after the function returns false. +func (it *Iterator) Next() bool { + id := it.ID + for { + getIdAttr := &sys.LinkGetNextIdAttr{Id: id} + err := sys.LinkGetNextId(getIdAttr) + if errors.Is(err, os.ErrNotExist) { + // There are no more links. + break + } else if err != nil { + it.err = fmt.Errorf("get next link ID: %w", err) + break + } + + id = getIdAttr.NextId + l, err := NewFromID(id) + if errors.Is(err, os.ErrNotExist) { + // Couldn't load the link fast enough. Try next ID. + continue + } else if err != nil { + it.err = fmt.Errorf("get link for ID %d: %w", id, err) + break + } + + if it.Link != nil { + it.Link.Close() + } + it.ID, it.Link = id, l + return true + } + + // No more links or we encountered an error. + if it.Link != nil { + it.Link.Close() + } + it.Link = nil + return false +} + +// Take the ownership of the current link. +// +// It's the callers responsibility to close the link. +func (it *Iterator) Take() Link { + l := it.Link + it.Link = nil + return l +} + +// Err returns an error if iteration failed for some reason. +func (it *Iterator) Err() error { + return it.err +} + +func (it *Iterator) Close() { + if it.Link != nil { + it.Link.Close() + } +} diff --git a/vendor/github.com/cilium/ebpf/link/netfilter.go b/vendor/github.com/cilium/ebpf/link/netfilter.go new file mode 100644 index 0000000000..250c87677b --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/netfilter.go @@ -0,0 +1,70 @@ +package link + +import ( + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +const NetfilterIPDefrag NetfilterAttachFlags = 0 // Enable IP packet defragmentation + +type NetfilterAttachFlags uint32 + +type NetfilterOptions struct { + // Program must be a netfilter BPF program. + Program *ebpf.Program + // The protocol family. + ProtocolFamily uint32 + // The number of the hook you are interested in. + HookNumber uint32 + // Priority within hook + Priority int32 + // Extra link flags + Flags uint32 + // Netfilter flags + NetfilterFlags NetfilterAttachFlags +} + +type netfilterLink struct { + RawLink +} + +// AttachNetfilter links a netfilter BPF program to a netfilter hook. +func AttachNetfilter(opts NetfilterOptions) (Link, error) { + if opts.Program == nil { + return nil, fmt.Errorf("netfilter program is nil") + } + + if t := opts.Program.Type(); t != ebpf.Netfilter { + return nil, fmt.Errorf("invalid program type %s, expected netfilter", t) + } + + progFd := opts.Program.FD() + if progFd < 0 { + return nil, fmt.Errorf("invalid program: %s", sys.ErrClosedFd) + } + + attr := sys.LinkCreateNetfilterAttr{ + ProgFd: uint32(opts.Program.FD()), + AttachType: sys.BPF_NETFILTER, + Flags: opts.Flags, + Pf: uint32(opts.ProtocolFamily), + Hooknum: uint32(opts.HookNumber), + Priority: opts.Priority, + NetfilterFlags: uint32(opts.NetfilterFlags), + } + + fd, err := sys.LinkCreateNetfilter(&attr) + if err != nil { + return nil, fmt.Errorf("attach netfilter link: %w", err) + } + + return &netfilterLink{RawLink{fd, ""}}, nil +} + +func (*netfilterLink) Update(new *ebpf.Program) error { + return fmt.Errorf("netfilter update: %w", ErrNotSupported) +} + +var _ Link = (*netfilterLink)(nil) diff --git a/vendor/github.com/cilium/ebpf/link/netkit.go b/vendor/github.com/cilium/ebpf/link/netkit.go new file mode 100644 index 0000000000..36ed72a480 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/netkit.go @@ -0,0 +1,71 @@ +package link + +import ( + "fmt" + "runtime" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +type NetkitOptions struct { + // Index of the interface to attach to. + Interface int + // Program to attach. + Program *ebpf.Program + // One of the AttachNetkit* constants. + Attach ebpf.AttachType + // Attach relative to an anchor. Optional. + Anchor Anchor + // Only attach if the expected revision matches. + ExpectedRevision uint64 + // Flags control the attach behaviour. Specify an Anchor instead of + // F_LINK, F_ID, F_BEFORE, F_AFTER and R_REPLACE. Optional. + Flags uint32 +} + +func AttachNetkit(opts NetkitOptions) (Link, error) { + if opts.Interface < 0 { + return nil, fmt.Errorf("interface %d is out of bounds", opts.Interface) + } + + if opts.Flags&anchorFlags != 0 { + return nil, fmt.Errorf("disallowed flags: use Anchor to specify attach target") + } + + attr := sys.LinkCreateNetkitAttr{ + ProgFd: uint32(opts.Program.FD()), + AttachType: sys.AttachType(opts.Attach), + TargetIfindex: uint32(opts.Interface), + ExpectedRevision: opts.ExpectedRevision, + Flags: opts.Flags, + } + + if opts.Anchor != nil { + fdOrID, flags, err := opts.Anchor.anchor() + if err != nil { + return nil, fmt.Errorf("attach netkit link: %w", err) + } + + attr.RelativeFdOrId = fdOrID + attr.Flags |= flags + } + + fd, err := sys.LinkCreateNetkit(&attr) + runtime.KeepAlive(opts.Program) + runtime.KeepAlive(opts.Anchor) + if err != nil { + if haveFeatErr := haveNetkit(); haveFeatErr != nil { + return nil, haveFeatErr + } + return nil, fmt.Errorf("attach netkit link: %w", err) + } + + return &netkitLink{RawLink{fd, ""}}, nil +} + +type netkitLink struct { + RawLink +} + +var _ Link = (*netkitLink)(nil) diff --git a/vendor/github.com/cilium/ebpf/link/perf_event.go b/vendor/github.com/cilium/ebpf/link/perf_event.go index 0e5bd47911..5f7a628b3d 100644 --- a/vendor/github.com/cilium/ebpf/link/perf_event.go +++ b/vendor/github.com/cilium/ebpf/link/perf_event.go @@ -1,20 +1,16 @@ package link import ( - "bytes" "errors" "fmt" - "os" - "path/filepath" "runtime" - "strconv" - "strings" "unsafe" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/tracefs" "github.com/cilium/ebpf/internal/unix" ) @@ -42,67 +38,41 @@ import ( // stops any further invocations of the attached eBPF program. var ( - tracefsPath = "/sys/kernel/debug/tracing" - - errInvalidInput = errors.New("invalid input") + errInvalidInput = tracefs.ErrInvalidInput ) const ( perfAllThreads = -1 ) -type perfEventType uint8 - -const ( - tracepointEvent perfEventType = iota - kprobeEvent - kretprobeEvent - uprobeEvent - uretprobeEvent -) - // A perfEvent represents a perf event kernel object. Exactly one eBPF program // can be attached to it. It is created based on a tracefs trace event or a // Performance Monitoring Unit (PMU). type perfEvent struct { - // The event type determines the types of programs that can be attached. - typ perfEventType - - // Group and name of the tracepoint/kprobe/uprobe. - group string - name string - - // PMU event ID read from sysfs. Valid IDs are non-zero. - pmuID uint64 - // ID of the trace event read from tracefs. Valid IDs are non-zero. - tracefsID uint64 - - // User provided arbitrary value. - cookie uint64 + // Trace event backing this perfEvent. May be nil. + tracefsEvent *tracefs.Event // This is the perf event FD. fd *sys.FD } +func newPerfEvent(fd *sys.FD, event *tracefs.Event) *perfEvent { + pe := &perfEvent{event, fd} + // Both event and fd have their own finalizer, but we want to + // guarantee that they are closed in a certain order. + runtime.SetFinalizer(pe, (*perfEvent).Close) + return pe +} + func (pe *perfEvent) Close() error { + runtime.SetFinalizer(pe, nil) + if err := pe.fd.Close(); err != nil { return fmt.Errorf("closing perf event fd: %w", err) } - switch pe.typ { - case kprobeEvent, kretprobeEvent: - // Clean up kprobe tracefs entry. - if pe.tracefsID != 0 { - return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name) - } - case uprobeEvent, uretprobeEvent: - // Clean up uprobe tracefs entry. - if pe.tracefsID != 0 { - return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name) - } - case tracepointEvent: - // Tracepoint trace events don't hold any extra resources. - return nil + if pe.tracefsEvent != nil { + return pe.tracefsEvent.Close() } return nil @@ -136,10 +106,14 @@ func (pl *perfEventLink) Unpin() error { } func (pl *perfEventLink) Close() error { + if err := pl.fd.Close(); err != nil { + return fmt.Errorf("perf link close: %w", err) + } + if err := pl.pe.Close(); err != nil { - return fmt.Errorf("perf event link close: %w", err) + return fmt.Errorf("perf event close: %w", err) } - return pl.fd.Close() + return nil } func (pl *perfEventLink) Update(prog *ebpf.Program) error { @@ -183,7 +157,7 @@ func (pi *perfEventIoctl) Info() (*Info, error) { // attach the given eBPF prog to the perf event stored in pe. // pe must contain a valid perf event fd. // prog's type must match the program type stored in pe. -func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) { +func attachPerfEvent(pe *perfEvent, prog *ebpf.Program, cookie uint64) (Link, error) { if prog == nil { return nil, errors.New("cannot attach a nil program") } @@ -191,30 +165,18 @@ func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) { return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd) } - switch pe.typ { - case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent: - if t := prog.Type(); t != ebpf.Kprobe { - return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t) - } - case tracepointEvent: - if t := prog.Type(); t != ebpf.TracePoint { - return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t) - } - default: - return nil, fmt.Errorf("unknown perf event type: %d", pe.typ) + if err := haveBPFLinkPerfEvent(); err == nil { + return attachPerfEventLink(pe, prog, cookie) } - if err := haveBPFLinkPerfEvent(); err == nil { - return attachPerfEventLink(pe, prog) + if cookie != 0 { + return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported) } + return attachPerfEventIoctl(pe, prog) } func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) { - if pe.cookie != 0 { - return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported) - } - // Assign the eBPF program to the perf event. err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD()) if err != nil { @@ -226,32 +188,24 @@ func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, e return nil, fmt.Errorf("enable perf event: %s", err) } - pi := &perfEventIoctl{pe} - - // Close the perf event when its reference is lost to avoid leaking system resources. - runtime.SetFinalizer(pi, (*perfEventIoctl).Close) - return pi, nil + return &perfEventIoctl{pe}, nil } // Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+). // // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e -func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) { +func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program, cookie uint64) (*perfEventLink, error) { fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{ ProgFd: uint32(prog.FD()), TargetFd: pe.fd.Uint(), AttachType: sys.BPF_PERF_EVENT, - BpfCookie: pe.cookie, + BpfCookie: cookie, }) if err != nil { return nil, fmt.Errorf("cannot create bpf perf link: %v", err) } - pl := &perfEventLink{RawLink{fd: fd}, pe} - - // Close the perf event when its reference is lost to avoid leaking system resources. - runtime.SetFinalizer(pl, (*perfEventLink).Close) - return pl, nil + return &perfEventLink{RawLink{fd: fd}, pe}, nil } // unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str. @@ -263,40 +217,6 @@ func unsafeStringPtr(str string) (unsafe.Pointer, error) { return unsafe.Pointer(p), nil } -// getTraceEventID reads a trace event's ID from tracefs given its group and name. -// The kernel requires group and name to be alphanumeric or underscore. -// -// name automatically has its invalid symbols converted to underscores so the caller -// can pass a raw symbol name, e.g. a kernel symbol containing dots. -func getTraceEventID(group, name string) (uint64, error) { - name = sanitizeSymbol(name) - tid, err := uint64FromFile(tracefsPath, "events", group, name, "id") - if errors.Is(err, os.ErrNotExist) { - return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist) - } - if err != nil { - return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err) - } - - return tid, nil -} - -// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier) -// from /sys/bus/event_source/devices//type. -// -// Returns ErrNotSupported if the pmu type is not supported. -func getPMUEventType(typ probeType) (uint64, error) { - et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type") - if errors.Is(err, os.ErrNotExist) { - return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported) - } - if err != nil { - return 0, fmt.Errorf("reading pmu type %s: %w", typ, err) - } - - return et, nil -} - // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide // [k,u]probes created by writing to /[k,u]probe_events are tracepoints // behind the scenes, and can be attached to using these perf events. @@ -317,30 +237,11 @@ func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) { return sys.NewFD(fd) } -// uint64FromFile reads a uint64 from a file. All elements of path are sanitized -// and joined onto base. Returns error if base no longer prefixes the path after -// joining all components. -func uint64FromFile(base string, path ...string) (uint64, error) { - l := filepath.Join(path...) - p := filepath.Join(base, l) - if !strings.HasPrefix(p, base) { - return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput) - } - - data, err := os.ReadFile(p) - if err != nil { - return 0, fmt.Errorf("reading file %s: %w", p, err) - } - - et := bytes.TrimSpace(data) - return strconv.ParseUint(string(et), 10, 64) -} - // Probe BPF perf link. // // https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307 // https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e -var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", func() error { +var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", "5.15", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Name: "probe_bpf_perf_link", Type: ebpf.Kprobe, @@ -367,28 +268,3 @@ var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", f } return err }) - -// isValidTraceID implements the equivalent of a regex match -// against "^[a-zA-Z_][0-9a-zA-Z_]*$". -// -// Trace event groups, names and kernel symbols must adhere to this set -// of characters. Non-empty, first character must not be a number, all -// characters must be alphanumeric or underscore. -func isValidTraceID(s string) bool { - if len(s) < 1 { - return false - } - for i, c := range []byte(s) { - switch { - case c >= 'a' && c <= 'z': - case c >= 'A' && c <= 'Z': - case c == '_': - case i > 0 && c >= '0' && c <= '9': - - default: - return false - } - } - - return true -} diff --git a/vendor/github.com/cilium/ebpf/link/platform.go b/vendor/github.com/cilium/ebpf/link/platform.go deleted file mode 100644 index eb6f7b7a37..0000000000 --- a/vendor/github.com/cilium/ebpf/link/platform.go +++ /dev/null @@ -1,25 +0,0 @@ -package link - -import ( - "fmt" - "runtime" -) - -func platformPrefix(symbol string) string { - - prefix := runtime.GOARCH - - // per https://github.com/golang/go/blob/master/src/go/build/syslist.go - switch prefix { - case "386": - prefix = "ia32" - case "amd64", "amd64p32": - prefix = "x64" - case "arm64", "arm64be": - prefix = "arm64" - default: - return symbol - } - - return fmt.Sprintf("__%s_%s", prefix, symbol) -} diff --git a/vendor/github.com/cilium/ebpf/link/program.go b/vendor/github.com/cilium/ebpf/link/program.go index ea31817377..d8a2a15f93 100644 --- a/vendor/github.com/cilium/ebpf/link/program.go +++ b/vendor/github.com/cilium/ebpf/link/program.go @@ -2,22 +2,27 @@ package link import ( "fmt" + "runtime" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal/sys" ) type RawAttachProgramOptions struct { - // File descriptor to attach to. This differs for each attach type. + // Target to query. This is usually a file descriptor but may refer to + // something else based on the attach type. Target int // Program to attach. Program *ebpf.Program - // Program to replace (cgroups). - Replace *ebpf.Program - // Attach must match the attach type of Program (and Replace). + // Attach must match the attach type of Program. Attach ebpf.AttachType - // Flags control the attach behaviour. This differs for each attach type. + // Attach relative to an anchor. Optional. + Anchor Anchor + // Flags control the attach behaviour. Specify an Anchor instead of + // F_LINK, F_ID, F_BEFORE, F_AFTER and F_REPLACE. Optional. Flags uint32 + // Only attach if the internal revision matches the given value. + ExpectedRevision uint64 } // RawAttachProgram is a low level wrapper around BPF_PROG_ATTACH. @@ -25,50 +30,76 @@ type RawAttachProgramOptions struct { // You should use one of the higher level abstractions available in this // package if possible. func RawAttachProgram(opts RawAttachProgramOptions) error { - if err := haveProgAttach(); err != nil { - return err + if opts.Flags&anchorFlags != 0 { + return fmt.Errorf("disallowed flags: use Anchor to specify attach target") } - var replaceFd uint32 - if opts.Replace != nil { - replaceFd = uint32(opts.Replace.FD()) + attr := sys.ProgAttachAttr{ + TargetFdOrIfindex: uint32(opts.Target), + AttachBpfFd: uint32(opts.Program.FD()), + AttachType: uint32(opts.Attach), + AttachFlags: uint32(opts.Flags), + ExpectedRevision: opts.ExpectedRevision, } - attr := sys.ProgAttachAttr{ - TargetFd: uint32(opts.Target), - AttachBpfFd: uint32(opts.Program.FD()), - ReplaceBpfFd: replaceFd, - AttachType: uint32(opts.Attach), - AttachFlags: uint32(opts.Flags), + if opts.Anchor != nil { + fdOrID, flags, err := opts.Anchor.anchor() + if err != nil { + return fmt.Errorf("attach program: %w", err) + } + + if flags == sys.BPF_F_REPLACE { + // Ensure that replacing a program works on old kernels. + attr.ReplaceBpfFd = fdOrID + } else { + attr.RelativeFdOrId = fdOrID + attr.AttachFlags |= flags + } } if err := sys.ProgAttach(&attr); err != nil { - return fmt.Errorf("can't attach program: %w", err) + if haveFeatErr := haveProgAttach(); haveFeatErr != nil { + return haveFeatErr + } + return fmt.Errorf("attach program: %w", err) } + runtime.KeepAlive(opts.Program) + return nil } -type RawDetachProgramOptions struct { - Target int - Program *ebpf.Program - Attach ebpf.AttachType -} +type RawDetachProgramOptions RawAttachProgramOptions // RawDetachProgram is a low level wrapper around BPF_PROG_DETACH. // // You should use one of the higher level abstractions available in this // package if possible. func RawDetachProgram(opts RawDetachProgramOptions) error { - if err := haveProgAttach(); err != nil { - return err + if opts.Flags&anchorFlags != 0 { + return fmt.Errorf("disallowed flags: use Anchor to specify attach target") } attr := sys.ProgDetachAttr{ - TargetFd: uint32(opts.Target), - AttachBpfFd: uint32(opts.Program.FD()), - AttachType: uint32(opts.Attach), + TargetFdOrIfindex: uint32(opts.Target), + AttachBpfFd: uint32(opts.Program.FD()), + AttachType: uint32(opts.Attach), + ExpectedRevision: opts.ExpectedRevision, } + + if opts.Anchor != nil { + fdOrID, flags, err := opts.Anchor.anchor() + if err != nil { + return fmt.Errorf("detach program: %w", err) + } + + attr.RelativeFdOrId = fdOrID + attr.AttachFlags |= flags + } + if err := sys.ProgDetach(&attr); err != nil { + if haveFeatErr := haveProgAttach(); haveFeatErr != nil { + return haveFeatErr + } return fmt.Errorf("can't detach program: %w", err) } diff --git a/vendor/github.com/cilium/ebpf/link/query.go b/vendor/github.com/cilium/ebpf/link/query.go new file mode 100644 index 0000000000..fe534f8efa --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/query.go @@ -0,0 +1,111 @@ +package link + +import ( + "fmt" + "unsafe" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +// QueryOptions defines additional parameters when querying for programs. +type QueryOptions struct { + // Target to query. This is usually a file descriptor but may refer to + // something else based on the attach type. + Target int + // Attach specifies the AttachType of the programs queried for + Attach ebpf.AttachType + // QueryFlags are flags for BPF_PROG_QUERY, e.g. BPF_F_QUERY_EFFECTIVE + QueryFlags uint32 +} + +// QueryResult describes which programs and links are active. +type QueryResult struct { + // List of attached programs. + Programs []AttachedProgram + + // Incremented by one every time the set of attached programs changes. + // May be zero if not supported by the [ebpf.AttachType]. + Revision uint64 +} + +// HaveLinkInfo returns true if the kernel supports querying link information +// for a particular [ebpf.AttachType]. +func (qr *QueryResult) HaveLinkInfo() bool { + return qr.Revision > 0 +} + +type AttachedProgram struct { + ID ebpf.ProgramID + linkID ID +} + +// LinkID returns the ID associated with the program. +// +// Returns 0, false if the kernel doesn't support retrieving the ID or if the +// program wasn't attached via a link. See [QueryResult.HaveLinkInfo] if you +// need to tell the two apart. +func (ap *AttachedProgram) LinkID() (ID, bool) { + return ap.linkID, ap.linkID != 0 +} + +// QueryPrograms retrieves a list of programs for the given AttachType. +// +// Returns a slice of attached programs, which may be empty. +// revision counts how many times the set of attached programs has changed and +// may be zero if not supported by the [ebpf.AttachType]. +// Returns ErrNotSupportd on a kernel without BPF_PROG_QUERY +func QueryPrograms(opts QueryOptions) (*QueryResult, error) { + // query the number of programs to allocate correct slice size + attr := sys.ProgQueryAttr{ + TargetFdOrIfindex: uint32(opts.Target), + AttachType: sys.AttachType(opts.Attach), + QueryFlags: opts.QueryFlags, + } + err := sys.ProgQuery(&attr) + if err != nil { + if haveFeatErr := haveProgQuery(); haveFeatErr != nil { + return nil, fmt.Errorf("query programs: %w", haveFeatErr) + } + return nil, fmt.Errorf("query programs: %w", err) + } + if attr.Count == 0 { + return &QueryResult{Revision: attr.Revision}, nil + } + + // The minimum bpf_mprog revision is 1, so we can use the field to detect + // whether the attach type supports link ids. + haveLinkIDs := attr.Revision != 0 + + count := attr.Count + progIds := make([]ebpf.ProgramID, count) + attr = sys.ProgQueryAttr{ + TargetFdOrIfindex: uint32(opts.Target), + AttachType: sys.AttachType(opts.Attach), + QueryFlags: opts.QueryFlags, + Count: count, + ProgIds: sys.NewPointer(unsafe.Pointer(&progIds[0])), + } + + var linkIds []ID + if haveLinkIDs { + linkIds = make([]ID, count) + attr.LinkIds = sys.NewPointer(unsafe.Pointer(&linkIds[0])) + } + + if err := sys.ProgQuery(&attr); err != nil { + return nil, fmt.Errorf("query programs: %w", err) + } + + // NB: attr.Count might have changed between the two syscalls. + var programs []AttachedProgram + for i, id := range progIds[:attr.Count] { + ap := AttachedProgram{ID: id} + if haveLinkIDs { + ap.linkID = linkIds[i] + } + programs = append(programs, ap) + } + + return &QueryResult{programs, attr.Revision}, nil +} diff --git a/vendor/github.com/cilium/ebpf/link/socket_filter.go b/vendor/github.com/cilium/ebpf/link/socket_filter.go index 94f3958cc4..84f0b656f8 100644 --- a/vendor/github.com/cilium/ebpf/link/socket_filter.go +++ b/vendor/github.com/cilium/ebpf/link/socket_filter.go @@ -15,7 +15,7 @@ func AttachSocketFilter(conn syscall.Conn, program *ebpf.Program) error { } var ssoErr error err = rawConn.Control(func(fd uintptr) { - ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD()) + ssoErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD()) }) if ssoErr != nil { return ssoErr @@ -31,7 +31,7 @@ func DetachSocketFilter(conn syscall.Conn) error { } var ssoErr error err = rawConn.Control(func(fd uintptr) { - ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0) + ssoErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0) }) if ssoErr != nil { return ssoErr diff --git a/vendor/github.com/cilium/ebpf/link/syscalls.go b/vendor/github.com/cilium/ebpf/link/syscalls.go index a661395b36..d09b5acb0f 100644 --- a/vendor/github.com/cilium/ebpf/link/syscalls.go +++ b/vendor/github.com/cilium/ebpf/link/syscalls.go @@ -23,9 +23,14 @@ const ( NetNsType = sys.BPF_LINK_TYPE_NETNS XDPType = sys.BPF_LINK_TYPE_XDP PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT + KprobeMultiType = sys.BPF_LINK_TYPE_KPROBE_MULTI + TCXType = sys.BPF_LINK_TYPE_TCX + UprobeMultiType = sys.BPF_LINK_TYPE_UPROBE_MULTI + NetfilterType = sys.BPF_LINK_TYPE_NETFILTER + NetkitType = sys.BPF_LINK_TYPE_NETKIT ) -var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error { +var haveProgAttach = internal.NewFeatureTest("BPF_PROG_ATTACH", "4.10", func() error { prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ Type: ebpf.CGroupSKB, License: "MIT", @@ -45,7 +50,7 @@ var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() erro return nil }) -var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replacement", "5.5", func() error { +var haveProgAttachReplace = internal.NewFeatureTest("BPF_PROG_ATTACH atomic replacement of MULTI progs", "5.5", func() error { if err := haveProgAttach(); err != nil { return err } @@ -59,9 +64,11 @@ var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replace asm.Return(), }, }) + if err != nil { return internal.ErrNotSupported } + defer prog.Close() // We know that we have BPF_PROG_ATTACH since we can load CGroupSKB programs. @@ -69,10 +76,10 @@ var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replace // present. attr := sys.ProgAttachAttr{ // We rely on this being checked after attachFlags. - TargetFd: ^uint32(0), - AttachBpfFd: uint32(prog.FD()), - AttachType: uint32(ebpf.AttachCGroupInetIngress), - AttachFlags: uint32(flagReplace), + TargetFdOrIfindex: ^uint32(0), + AttachBpfFd: uint32(prog.FD()), + AttachType: uint32(ebpf.AttachCGroupInetIngress), + AttachFlags: uint32(flagReplace), } err = sys.ProgAttach(&attr) @@ -85,7 +92,7 @@ var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replace return err }) -var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error { +var haveBPFLink = internal.NewFeatureTest("bpf_link", "5.7", func() error { attr := sys.LinkCreateAttr{ // This is a hopefully invalid file descriptor, which triggers EBADF. TargetFd: ^uint32(0), @@ -101,3 +108,93 @@ var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error { } return err }) + +var haveProgQuery = internal.NewFeatureTest("BPF_PROG_QUERY", "4.15", func() error { + attr := sys.ProgQueryAttr{ + // We rely on this being checked during the syscall. + // With an otherwise correct payload we expect EBADF here + // as an indication that the feature is present. + TargetFdOrIfindex: ^uint32(0), + AttachType: sys.AttachType(ebpf.AttachCGroupInetIngress), + } + + err := sys.ProgQuery(&attr) + + if errors.Is(err, unix.EBADF) { + return nil + } + if err != nil { + return ErrNotSupported + } + return errors.New("syscall succeeded unexpectedly") +}) + +var haveTCX = internal.NewFeatureTest("tcx", "6.6", func() error { + prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ + Type: ebpf.SchedCLS, + License: "MIT", + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + }, + }) + + if err != nil { + return internal.ErrNotSupported + } + + defer prog.Close() + attr := sys.LinkCreateTcxAttr{ + // We rely on this being checked during the syscall. + // With an otherwise correct payload we expect ENODEV here + // as an indication that the feature is present. + TargetIfindex: ^uint32(0), + ProgFd: uint32(prog.FD()), + AttachType: sys.AttachType(ebpf.AttachTCXIngress), + } + + _, err = sys.LinkCreateTcx(&attr) + + if errors.Is(err, unix.ENODEV) { + return nil + } + if err != nil { + return ErrNotSupported + } + return errors.New("syscall succeeded unexpectedly") +}) + +var haveNetkit = internal.NewFeatureTest("netkit", "6.7", func() error { + prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ + Type: ebpf.SchedCLS, + License: "MIT", + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + }, + }) + + if err != nil { + return internal.ErrNotSupported + } + + defer prog.Close() + attr := sys.LinkCreateNetkitAttr{ + // We rely on this being checked during the syscall. + // With an otherwise correct payload we expect ENODEV here + // as an indication that the feature is present. + TargetIfindex: ^uint32(0), + ProgFd: uint32(prog.FD()), + AttachType: sys.AttachType(ebpf.AttachNetkitPrimary), + } + + _, err = sys.LinkCreateNetkit(&attr) + + if errors.Is(err, unix.ENODEV) { + return nil + } + if err != nil { + return ErrNotSupported + } + return errors.New("syscall succeeded unexpectedly") +}) diff --git a/vendor/github.com/cilium/ebpf/link/tcx.go b/vendor/github.com/cilium/ebpf/link/tcx.go new file mode 100644 index 0000000000..88f2237d29 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/tcx.go @@ -0,0 +1,71 @@ +package link + +import ( + "fmt" + "runtime" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/sys" +) + +type TCXOptions struct { + // Index of the interface to attach to. + Interface int + // Program to attach. + Program *ebpf.Program + // One of the AttachTCX* constants. + Attach ebpf.AttachType + // Attach relative to an anchor. Optional. + Anchor Anchor + // Only attach if the expected revision matches. + ExpectedRevision uint64 + // Flags control the attach behaviour. Specify an Anchor instead of + // F_LINK, F_ID, F_BEFORE, F_AFTER and R_REPLACE. Optional. + Flags uint32 +} + +func AttachTCX(opts TCXOptions) (Link, error) { + if opts.Interface < 0 { + return nil, fmt.Errorf("interface %d is out of bounds", opts.Interface) + } + + if opts.Flags&anchorFlags != 0 { + return nil, fmt.Errorf("disallowed flags: use Anchor to specify attach target") + } + + attr := sys.LinkCreateTcxAttr{ + ProgFd: uint32(opts.Program.FD()), + AttachType: sys.AttachType(opts.Attach), + TargetIfindex: uint32(opts.Interface), + ExpectedRevision: opts.ExpectedRevision, + Flags: opts.Flags, + } + + if opts.Anchor != nil { + fdOrID, flags, err := opts.Anchor.anchor() + if err != nil { + return nil, fmt.Errorf("attach tcx link: %w", err) + } + + attr.RelativeFdOrId = fdOrID + attr.Flags |= flags + } + + fd, err := sys.LinkCreateTcx(&attr) + runtime.KeepAlive(opts.Program) + runtime.KeepAlive(opts.Anchor) + if err != nil { + if haveFeatErr := haveTCX(); haveFeatErr != nil { + return nil, haveFeatErr + } + return nil, fmt.Errorf("attach tcx link: %w", err) + } + + return &tcxLink{RawLink{fd, ""}}, nil +} + +type tcxLink struct { + RawLink +} + +var _ Link = (*tcxLink)(nil) diff --git a/vendor/github.com/cilium/ebpf/link/tracepoint.go b/vendor/github.com/cilium/ebpf/link/tracepoint.go index a59ef9d1c5..95f5fae3b0 100644 --- a/vendor/github.com/cilium/ebpf/link/tracepoint.go +++ b/vendor/github.com/cilium/ebpf/link/tracepoint.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/tracefs" ) // TracepointOptions defines additional parameters that will be used @@ -17,7 +18,7 @@ type TracepointOptions struct { } // Tracepoint attaches the given eBPF program to the tracepoint with the given -// group and name. See /sys/kernel/debug/tracing/events to find available +// group and name. See /sys/kernel/tracing/events to find available // tracepoints. The top-level directory is the group, the event's subdirectory // is the name. Example: // @@ -36,14 +37,11 @@ func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions) if prog == nil { return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) } - if !isValidTraceID(group) || !isValidTraceID(name) { - return nil, fmt.Errorf("group and name '%s/%s' must be alphanumeric or underscore: %w", group, name, errInvalidInput) - } if prog.Type() != ebpf.TracePoint { return nil, fmt.Errorf("eBPF program type %s is not a Tracepoint: %w", prog.Type(), errInvalidInput) } - tid, err := getTraceEventID(group, name) + tid, err := tracefs.EventID(group, name) if err != nil { return nil, err } @@ -58,16 +56,9 @@ func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions) cookie = opts.Cookie } - pe := &perfEvent{ - typ: tracepointEvent, - group: group, - name: name, - tracefsID: tid, - cookie: cookie, - fd: fd, - } + pe := newPerfEvent(fd, nil) - lnk, err := attachPerfEvent(pe, prog) + lnk, err := attachPerfEvent(pe, prog, cookie) if err != nil { pe.Close() return nil, err diff --git a/vendor/github.com/cilium/ebpf/link/tracing.go b/vendor/github.com/cilium/ebpf/link/tracing.go index e47e61a3b8..1e1a7834d8 100644 --- a/vendor/github.com/cilium/ebpf/link/tracing.go +++ b/vendor/github.com/cilium/ebpf/link/tracing.go @@ -1,11 +1,13 @@ package link import ( + "errors" "fmt" "github.com/cilium/ebpf" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" ) type tracing struct { @@ -70,6 +72,10 @@ func AttachFreplace(targetProg *ebpf.Program, name string, prog *ebpf.Program) ( Attach: ebpf.AttachNone, BTF: typeID, }) + if errors.Is(err, sys.ENOTSUPP) { + // This may be returned by bpf_tracing_prog_attach via bpf_arch_text_poke. + return nil, fmt.Errorf("create raw tracepoint: %w", ErrNotSupported) + } if err != nil { return nil, err } @@ -82,25 +88,71 @@ type TracingOptions struct { // AttachTraceFEntry/AttachTraceFExit/AttachModifyReturn or // AttachTraceRawTp. Program *ebpf.Program + // Program attach type. Can be one of: + // - AttachTraceFEntry + // - AttachTraceFExit + // - AttachModifyReturn + // - AttachTraceRawTp + // This field is optional. + AttachType ebpf.AttachType + // Arbitrary value that can be fetched from an eBPF program + // via `bpf_get_attach_cookie()`. + Cookie uint64 } type LSMOptions struct { // Program must be of type LSM with attach type // AttachLSMMac. Program *ebpf.Program + // Arbitrary value that can be fetched from an eBPF program + // via `bpf_get_attach_cookie()`. + Cookie uint64 } // attachBTFID links all BPF program types (Tracing/LSM) that they attach to a btf_id. -func attachBTFID(program *ebpf.Program) (Link, error) { +func attachBTFID(program *ebpf.Program, at ebpf.AttachType, cookie uint64) (Link, error) { if program.FD() < 0 { return nil, fmt.Errorf("invalid program %w", sys.ErrClosedFd) } - fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{ - ProgFd: uint32(program.FD()), - }) - if err != nil { - return nil, err + var ( + fd *sys.FD + err error + ) + switch at { + case ebpf.AttachTraceFEntry, ebpf.AttachTraceFExit, ebpf.AttachTraceRawTp, + ebpf.AttachModifyReturn, ebpf.AttachLSMMac: + // Attach via BPF link + fd, err = sys.LinkCreateTracing(&sys.LinkCreateTracingAttr{ + ProgFd: uint32(program.FD()), + AttachType: sys.AttachType(at), + Cookie: cookie, + }) + if err == nil { + break + } + if !errors.Is(err, unix.EINVAL) && !errors.Is(err, sys.ENOTSUPP) { + return nil, fmt.Errorf("create tracing link: %w", err) + } + fallthrough + case ebpf.AttachNone: + // Attach via RawTracepointOpen + if cookie > 0 { + return nil, fmt.Errorf("create raw tracepoint with cookie: %w", ErrNotSupported) + } + + fd, err = sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{ + ProgFd: uint32(program.FD()), + }) + if errors.Is(err, sys.ENOTSUPP) { + // This may be returned by bpf_tracing_prog_attach via bpf_arch_text_poke. + return nil, fmt.Errorf("create raw tracepoint: %w", ErrNotSupported) + } + if err != nil { + return nil, fmt.Errorf("create raw tracepoint: %w", err) + } + default: + return nil, fmt.Errorf("invalid attach type: %s", at.String()) } raw := RawLink{fd: fd} @@ -115,8 +167,7 @@ func attachBTFID(program *ebpf.Program) (Link, error) { // a raw_tracepoint link. Other types return a tracing link. return &rawTracepoint{raw}, nil } - - return &tracing{RawLink: RawLink{fd: fd}}, nil + return &tracing{raw}, nil } // AttachTracing links a tracing (fentry/fexit/fmod_ret) BPF program or @@ -127,7 +178,14 @@ func AttachTracing(opts TracingOptions) (Link, error) { return nil, fmt.Errorf("invalid program type %s, expected Tracing", t) } - return attachBTFID(opts.Program) + switch opts.AttachType { + case ebpf.AttachTraceFEntry, ebpf.AttachTraceFExit, ebpf.AttachModifyReturn, + ebpf.AttachTraceRawTp, ebpf.AttachNone: + default: + return nil, fmt.Errorf("invalid attach type: %s", opts.AttachType.String()) + } + + return attachBTFID(opts.Program, opts.AttachType, opts.Cookie) } // AttachLSM links a Linux security module (LSM) BPF Program to a BPF @@ -137,5 +195,5 @@ func AttachLSM(opts LSMOptions) (Link, error) { return nil, fmt.Errorf("invalid program type %s, expected LSM", t) } - return attachBTFID(opts.Program) + return attachBTFID(opts.Program, ebpf.AttachLSMMac, opts.Cookie) } diff --git a/vendor/github.com/cilium/ebpf/link/uprobe.go b/vendor/github.com/cilium/ebpf/link/uprobe.go index edf925b570..ad85024e38 100644 --- a/vendor/github.com/cilium/ebpf/link/uprobe.go +++ b/vendor/github.com/cilium/ebpf/link/uprobe.go @@ -5,31 +5,25 @@ import ( "errors" "fmt" "os" - "path/filepath" - "strings" "sync" "github.com/cilium/ebpf" "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/tracefs" ) var ( - uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events") - - uprobeRetprobeBit = struct { - once sync.Once - value uint64 - err error - }{} - uprobeRefCtrOffsetPMUPath = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset" // elixir.bootlin.com/linux/v5.15-rc7/source/kernel/events/core.c#L9799 uprobeRefCtrOffsetShift = 32 - haveRefCtrOffsetPMU = internal.FeatureTest("RefCtrOffsetPMU", "4.20", func() error { + haveRefCtrOffsetPMU = internal.NewFeatureTest("RefCtrOffsetPMU", "4.20", func() error { _, err := os.Stat(uprobeRefCtrOffsetPMUPath) - if err != nil { + if errors.Is(err, os.ErrNotExist) { return internal.ErrNotSupported } + if err != nil { + return err + } return nil }) @@ -42,8 +36,10 @@ var ( type Executable struct { // Path of the executable on the filesystem. path string - // Parsed ELF and dynamic symbols' addresses. - addresses map[string]uint64 + // Parsed ELF and dynamic symbols' cachedAddresses. + cachedAddresses map[string]uint64 + // Keep track of symbol table lazy load. + cacheAddressesOnce sync.Once } // UprobeOptions defines additional parameters that will be used @@ -77,11 +73,22 @@ type UprobeOptions struct { // // Needs kernel 5.15+. Cookie uint64 + // Prefix used for the event name if the uprobe must be attached using tracefs. + // The group name will be formatted as `_`. + // The default empty string is equivalent to "ebpf" as the prefix. + TraceFSPrefix string +} + +func (uo *UprobeOptions) cookie() uint64 { + if uo == nil { + return 0 + } + return uo.Cookie } // To open a new Executable, use: // -// OpenExecutable("/bin/bash") +// OpenExecutable("/bin/bash") // // The returned value can then be used to open Uprobe(s). func OpenExecutable(path string) (*Executable, error) { @@ -89,32 +96,21 @@ func OpenExecutable(path string) (*Executable, error) { return nil, fmt.Errorf("path cannot be empty") } - f, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("open file '%s': %w", path, err) - } - defer f.Close() - - se, err := internal.NewSafeELFFile(f) + f, err := internal.OpenSafeELFFile(path) if err != nil { return nil, fmt.Errorf("parse ELF file: %w", err) } + defer f.Close() - if se.Type != elf.ET_EXEC && se.Type != elf.ET_DYN { + if f.Type != elf.ET_EXEC && f.Type != elf.ET_DYN { // ELF is not an executable or a shared object. return nil, errors.New("the given file is not an executable or a shared object") } - ex := Executable{ - path: path, - addresses: make(map[string]uint64), - } - - if err := ex.load(se); err != nil { - return nil, err - } - - return &ex, nil + return &Executable{ + path: path, + cachedAddresses: make(map[string]uint64), + }, nil } func (ex *Executable) load(f *internal.SafeELFFile) error { @@ -157,7 +153,7 @@ func (ex *Executable) load(f *internal.SafeELFFile) error { } } - ex.addresses[s.Name] = address + ex.cachedAddresses[s.Name] = address } return nil @@ -166,12 +162,28 @@ func (ex *Executable) load(f *internal.SafeELFFile) error { // address calculates the address of a symbol in the executable. // // opts must not be nil. -func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error) { - if opts.Address > 0 { - return opts.Address + opts.Offset, nil +func (ex *Executable) address(symbol string, address, offset uint64) (uint64, error) { + if address > 0 { + return address + offset, nil } - address, ok := ex.addresses[symbol] + var err error + ex.cacheAddressesOnce.Do(func() { + var f *internal.SafeELFFile + f, err = internal.OpenSafeELFFile(ex.path) + if err != nil { + err = fmt.Errorf("parse ELF file: %w", err) + return + } + defer f.Close() + + err = ex.load(f) + }) + if err != nil { + return 0, fmt.Errorf("lazy load symbols: %w", err) + } + + address, ok := ex.cachedAddresses[symbol] if !ok { return 0, fmt.Errorf("symbol %s: %w", symbol, ErrNoSymbol) } @@ -187,20 +199,20 @@ func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error "(consider providing UprobeOptions.Address)", ex.path, symbol, ErrNotSupported) } - return address + opts.Offset, nil + return address + offset, nil } // Uprobe attaches the given eBPF program to a perf event that fires when the // given symbol starts executing in the given Executable. // For example, /bin/bash::main(): // -// ex, _ = OpenExecutable("/bin/bash") -// ex.Uprobe("main", prog, nil) +// ex, _ = OpenExecutable("/bin/bash") +// ex.Uprobe("main", prog, nil) // // When using symbols which belongs to shared libraries, // an offset must be provided via options: // -// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123}) +// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123}) // // Note: Setting the Offset field in the options supersedes the symbol's offset. // @@ -216,7 +228,7 @@ func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti return nil, err } - lnk, err := attachPerfEvent(u, prog) + lnk, err := attachPerfEvent(u, prog, opts.cookie()) if err != nil { u.Close() return nil, err @@ -228,13 +240,13 @@ func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti // Uretprobe attaches the given eBPF program to a perf event that fires right // before the given symbol exits. For example, /bin/bash::main(): // -// ex, _ = OpenExecutable("/bin/bash") -// ex.Uretprobe("main", prog, nil) +// ex, _ = OpenExecutable("/bin/bash") +// ex.Uretprobe("main", prog, nil) // // When using symbols which belongs to shared libraries, // an offset must be provided via options: // -// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123}) +// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123}) // // Note: Setting the Offset field in the options supersedes the symbol's offset. // @@ -250,7 +262,7 @@ func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program, opts *UprobeO return nil, err } - lnk, err := attachPerfEvent(u, prog) + lnk, err := attachPerfEvent(u, prog, opts.cookie()) if err != nil { u.Close() return nil, err @@ -272,7 +284,7 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti opts = &UprobeOptions{} } - offset, err := ex.address(symbol, opts) + offset, err := ex.address(symbol, opts.Address, opts.Offset) if err != nil { return nil, err } @@ -288,18 +300,20 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti } } - args := probeArgs{ - symbol: symbol, - path: ex.path, - offset: offset, - pid: pid, - refCtrOffset: opts.RefCtrOffset, - ret: ret, - cookie: opts.Cookie, + args := tracefs.ProbeArgs{ + Type: tracefs.Uprobe, + Symbol: symbol, + Path: ex.path, + Offset: offset, + Pid: pid, + RefCtrOffset: opts.RefCtrOffset, + Ret: ret, + Cookie: opts.Cookie, + Group: opts.TraceFSPrefix, } // Use uprobe PMU if the kernel has it available. - tp, err := pmuUprobe(args) + tp, err := pmuProbe(args) if err == nil { return tp, nil } @@ -308,66 +322,10 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti } // Use tracefs if uprobe PMU is missing. - args.symbol = sanitizeSymbol(symbol) - tp, err = tracefsUprobe(args) + tp, err = tracefsProbe(args) if err != nil { return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err) } return tp, nil } - -// pmuUprobe opens a perf event based on the uprobe PMU. -func pmuUprobe(args probeArgs) (*perfEvent, error) { - return pmuProbe(uprobeType, args) -} - -// tracefsUprobe creates a Uprobe tracefs entry. -func tracefsUprobe(args probeArgs) (*perfEvent, error) { - return tracefsProbe(uprobeType, args) -} - -// sanitizeSymbol replaces every invalid character for the tracefs api with an underscore. -// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_"). -func sanitizeSymbol(s string) string { - var b strings.Builder - b.Grow(len(s)) - var skip bool - for _, c := range []byte(s) { - switch { - case c >= 'a' && c <= 'z', - c >= 'A' && c <= 'Z', - c >= '0' && c <= '9': - skip = false - b.WriteByte(c) - - default: - if !skip { - b.WriteByte('_') - skip = true - } - } - } - - return b.String() -} - -// uprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api. -func uprobeToken(args probeArgs) string { - po := fmt.Sprintf("%s:%#x", args.path, args.offset) - - if args.refCtrOffset != 0 { - // This is not documented in Documentation/trace/uprobetracer.txt. - // elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564 - po += fmt.Sprintf("(%#x)", args.refCtrOffset) - } - - return po -} - -func uretprobeBit() (uint64, error) { - uprobeRetprobeBit.once.Do(func() { - uprobeRetprobeBit.value, uprobeRetprobeBit.err = determineRetprobeBit(uprobeType) - }) - return uprobeRetprobeBit.value, uprobeRetprobeBit.err -} diff --git a/vendor/github.com/cilium/ebpf/link/uprobe_multi.go b/vendor/github.com/cilium/ebpf/link/uprobe_multi.go new file mode 100644 index 0000000000..9a8d329c8f --- /dev/null +++ b/vendor/github.com/cilium/ebpf/link/uprobe_multi.go @@ -0,0 +1,224 @@ +package link + +import ( + "errors" + "fmt" + "os" + "unsafe" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +// UprobeMultiOptions defines additional parameters that will be used +// when opening a UprobeMulti Link. +type UprobeMultiOptions struct { + // Symbol addresses. If set, overrides the addresses eventually parsed from + // the executable. Mutually exclusive with UprobeMulti's symbols argument. + Addresses []uint64 + + // Offsets into functions provided by UprobeMulti's symbols argument. + // For example: to set uprobes to main+5 and _start+10, call UprobeMulti + // with: + // symbols: "main", "_start" + // opt.Offsets: 5, 10 + Offsets []uint64 + + // Optional list of associated ref counter offsets. + RefCtrOffsets []uint64 + + // Optional list of associated BPF cookies. + Cookies []uint64 + + // Only set the uprobe_multi link on the given process ID, zero PID means + // system-wide. + PID uint32 +} + +func (ex *Executable) UprobeMulti(symbols []string, prog *ebpf.Program, opts *UprobeMultiOptions) (Link, error) { + return ex.uprobeMulti(symbols, prog, opts, 0) +} + +func (ex *Executable) UretprobeMulti(symbols []string, prog *ebpf.Program, opts *UprobeMultiOptions) (Link, error) { + + // The return probe is not limited for symbols entry, so there's no special + // setup for return uprobes (other than the extra flag). The symbols, opts.Offsets + // and opts.Addresses arrays follow the same logic as for entry uprobes. + return ex.uprobeMulti(symbols, prog, opts, unix.BPF_F_UPROBE_MULTI_RETURN) +} + +func (ex *Executable) uprobeMulti(symbols []string, prog *ebpf.Program, opts *UprobeMultiOptions, flags uint32) (Link, error) { + if prog == nil { + return nil, errors.New("cannot attach a nil program") + } + + if opts == nil { + opts = &UprobeMultiOptions{} + } + + addresses, err := ex.addresses(symbols, opts.Addresses, opts.Offsets) + if err != nil { + return nil, err + } + + addrs := len(addresses) + cookies := len(opts.Cookies) + refCtrOffsets := len(opts.RefCtrOffsets) + + if addrs == 0 { + return nil, fmt.Errorf("Addresses are required: %w", errInvalidInput) + } + if refCtrOffsets > 0 && refCtrOffsets != addrs { + return nil, fmt.Errorf("RefCtrOffsets must be exactly Addresses in length: %w", errInvalidInput) + } + if cookies > 0 && cookies != addrs { + return nil, fmt.Errorf("Cookies must be exactly Addresses in length: %w", errInvalidInput) + } + + attr := &sys.LinkCreateUprobeMultiAttr{ + Path: sys.NewStringPointer(ex.path), + ProgFd: uint32(prog.FD()), + AttachType: sys.BPF_TRACE_UPROBE_MULTI, + UprobeMultiFlags: flags, + Count: uint32(addrs), + Offsets: sys.NewPointer(unsafe.Pointer(&addresses[0])), + Pid: opts.PID, + } + + if refCtrOffsets != 0 { + attr.RefCtrOffsets = sys.NewPointer(unsafe.Pointer(&opts.RefCtrOffsets[0])) + } + if cookies != 0 { + attr.Cookies = sys.NewPointer(unsafe.Pointer(&opts.Cookies[0])) + } + + fd, err := sys.LinkCreateUprobeMulti(attr) + if errors.Is(err, unix.ESRCH) { + return nil, fmt.Errorf("%w (specified pid not found?)", os.ErrNotExist) + } + if errors.Is(err, unix.EINVAL) { + return nil, fmt.Errorf("%w (missing symbol or prog's AttachType not AttachTraceUprobeMulti?)", err) + } + + if err != nil { + if haveFeatErr := haveBPFLinkUprobeMulti(); haveFeatErr != nil { + return nil, haveFeatErr + } + return nil, err + } + + return &uprobeMultiLink{RawLink{fd, ""}}, nil +} + +func (ex *Executable) addresses(symbols []string, addresses, offsets []uint64) ([]uint64, error) { + n := len(symbols) + if n == 0 { + n = len(addresses) + } + + if n == 0 { + return nil, fmt.Errorf("%w: neither symbols nor addresses given", errInvalidInput) + } + + if symbols != nil && len(symbols) != n { + return nil, fmt.Errorf("%w: have %d symbols but want %d", errInvalidInput, len(symbols), n) + } + + if addresses != nil && len(addresses) != n { + return nil, fmt.Errorf("%w: have %d addresses but want %d", errInvalidInput, len(addresses), n) + } + + if offsets != nil && len(offsets) != n { + return nil, fmt.Errorf("%w: have %d offsets but want %d", errInvalidInput, len(offsets), n) + } + + results := make([]uint64, 0, n) + for i := 0; i < n; i++ { + var sym string + if symbols != nil { + sym = symbols[i] + } + + var addr, off uint64 + if addresses != nil { + addr = addresses[i] + } + + if offsets != nil { + off = offsets[i] + } + + result, err := ex.address(sym, addr, off) + if err != nil { + return nil, err + } + + results = append(results, result) + } + + return results, nil +} + +type uprobeMultiLink struct { + RawLink +} + +var _ Link = (*uprobeMultiLink)(nil) + +func (kml *uprobeMultiLink) Update(prog *ebpf.Program) error { + return fmt.Errorf("update uprobe_multi: %w", ErrNotSupported) +} + +func (kml *uprobeMultiLink) Pin(string) error { + return fmt.Errorf("pin uprobe_multi: %w", ErrNotSupported) +} + +func (kml *uprobeMultiLink) Unpin() error { + return fmt.Errorf("unpin uprobe_multi: %w", ErrNotSupported) +} + +var haveBPFLinkUprobeMulti = internal.NewFeatureTest("bpf_link_uprobe_multi", "6.6", func() error { + prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ + Name: "probe_upm_link", + Type: ebpf.Kprobe, + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + }, + AttachType: ebpf.AttachTraceUprobeMulti, + License: "MIT", + }) + if errors.Is(err, unix.E2BIG) { + // Kernel doesn't support AttachType field. + return internal.ErrNotSupported + } + if err != nil { + return err + } + defer prog.Close() + + // We try to create uprobe multi link on '/' path which results in + // error with -EBADF in case uprobe multi link is supported. + fd, err := sys.LinkCreateUprobeMulti(&sys.LinkCreateUprobeMultiAttr{ + ProgFd: uint32(prog.FD()), + AttachType: sys.BPF_TRACE_UPROBE_MULTI, + Path: sys.NewStringPointer("/"), + Offsets: sys.NewPointer(unsafe.Pointer(&[]uint64{0})), + Count: 1, + }) + switch { + case errors.Is(err, unix.EBADF): + return nil + case errors.Is(err, unix.EINVAL): + return internal.ErrNotSupported + case err != nil: + return err + } + + // should not happen + fd.Close() + return errors.New("successfully attached uprobe_multi to /, kernel bug?") +}) diff --git a/vendor/github.com/cilium/ebpf/linker.go b/vendor/github.com/cilium/ebpf/linker.go index e6276b1829..788f21b7b6 100644 --- a/vendor/github.com/cilium/ebpf/linker.go +++ b/vendor/github.com/cilium/ebpf/linker.go @@ -1,14 +1,56 @@ package ebpf import ( + "debug/elf" + "encoding/binary" "errors" "fmt" - "sync" + "io" + "io/fs" + "math" + "slices" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" + "github.com/cilium/ebpf/internal" ) +// handles stores handle objects to avoid gc cleanup +type handles []*btf.Handle + +func (hs *handles) add(h *btf.Handle) (int, error) { + if h == nil { + return 0, nil + } + + if len(*hs) == math.MaxInt16 { + return 0, fmt.Errorf("can't add more than %d module FDs to fdArray", math.MaxInt16) + } + + *hs = append(*hs, h) + + // return length of slice so that indexes start at 1 + return len(*hs), nil +} + +func (hs handles) fdArray() []int32 { + // first element of fda is reserved as no module can be indexed with 0 + fda := []int32{0} + for _, h := range hs { + fda = append(fda, int32(h.FD())) + } + + return fda +} + +func (hs *handles) Close() error { + var errs []error + for _, h := range *hs { + errs = append(errs, h.Close()) + } + return errors.Join(errs...) +} + // splitSymbols splits insns into subsections delimited by Symbol Instructions. // insns cannot be empty and must start with a Symbol Instruction. // @@ -18,21 +60,33 @@ func splitSymbols(insns asm.Instructions) (map[string]asm.Instructions, error) { return nil, errors.New("insns is empty") } - if insns[0].Symbol() == "" { + currentSym := insns[0].Symbol() + if currentSym == "" { return nil, errors.New("insns must start with a Symbol") } - var name string + start := 0 progs := make(map[string]asm.Instructions) - for _, ins := range insns { - if sym := ins.Symbol(); sym != "" { - if progs[sym] != nil { - return nil, fmt.Errorf("insns contains duplicate Symbol %s", sym) - } - name = sym + for i, ins := range insns[1:] { + i := i + 1 + + sym := ins.Symbol() + if sym == "" { + continue + } + + // New symbol, flush the old one out. + progs[currentSym] = slices.Clone(insns[start:i]) + + if progs[sym] != nil { + return nil, fmt.Errorf("insns contains duplicate Symbol %s", sym) } + currentSym = sym + start = i + } - progs[name] = append(progs[name], ins) + if tail := insns[start:]; len(tail) > 0 { + progs[currentSym] = slices.Clone(tail) } return progs, nil @@ -67,7 +121,7 @@ func hasFunctionReferences(insns asm.Instructions) bool { // // Passing a nil target will relocate against the running kernel. insns are // modified in place. -func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error { +func applyRelocations(insns asm.Instructions, targets []*btf.Spec, kmodName string, bo binary.ByteOrder, b *btf.Builder) error { var relos []*btf.CORERelocation var reloInsns []*asm.Instruction iter := insns.Iterate() @@ -82,19 +136,37 @@ func applyRelocations(insns asm.Instructions, local, target *btf.Spec) error { return nil } - target, err := maybeLoadKernelBTF(target) - if err != nil { - return err + if bo == nil { + bo = internal.NativeEndian } - fixups, err := btf.CORERelocate(local, target, relos) + if len(targets) == 0 { + kernelTarget, err := btf.LoadKernelSpec() + if err != nil { + return fmt.Errorf("load kernel spec: %w", err) + } + targets = append(targets, kernelTarget) + + if kmodName != "" { + kmodTarget, err := btf.LoadKernelModuleSpec(kmodName) + // Ignore ErrNotExists to cater to kernels which have CONFIG_DEBUG_INFO_BTF_MODULES disabled. + if err != nil && !errors.Is(err, fs.ErrNotExist) { + return fmt.Errorf("load kernel module spec: %w", err) + } + if err == nil { + targets = append(targets, kmodTarget) + } + } + } + + fixups, err := btf.CORERelocate(relos, targets, bo, b.Add) if err != nil { return err } for i, fixup := range fixups { if err := fixup.Apply(reloInsns[i]); err != nil { - return fmt.Errorf("apply fixup %s: %w", &fixup, err) + return fmt.Errorf("fixup for %s: %w", relos[i], err) } } @@ -181,8 +253,9 @@ func fixupAndValidate(insns asm.Instructions) error { ins := iter.Ins // Map load was tagged with a Reference, but does not contain a Map pointer. - if ins.IsLoadFromMap() && ins.Reference() != "" && ins.Map() == nil { - return fmt.Errorf("instruction %d: map %s: %w", iter.Index, ins.Reference(), asm.ErrUnsatisfiedMapReference) + needsMap := ins.Reference() != "" || ins.Metadata.Get(kconfigMetaKey{}) != nil + if ins.IsLoadFromMap() && needsMap && ins.Map() == nil { + return fmt.Errorf("instruction %d: %w", iter.Index, asm.ErrUnsatisfiedMapReference) } fixupProbeReadKernel(ins) @@ -191,6 +264,120 @@ func fixupAndValidate(insns asm.Instructions) error { return nil } +// POISON_CALL_KFUNC_BASE in libbpf. +// https://github.com/libbpf/libbpf/blob/2778cbce609aa1e2747a69349f7f46a2f94f0522/src/libbpf.c#L5767 +const kfuncCallPoisonBase = 2002000000 + +// fixupKfuncs loops over all instructions in search for kfunc calls. +// If at least one is found, the current kernels BTF and module BTFis are searched to set Instruction.Constant +// and Instruction.Offset to the correct values. +func fixupKfuncs(insns asm.Instructions) (_ handles, err error) { + closeOnError := func(c io.Closer) { + if err != nil { + c.Close() + } + } + + iter := insns.Iterate() + for iter.Next() { + ins := iter.Ins + if metadata := ins.Metadata.Get(kfuncMetaKey{}); metadata != nil { + goto fixups + } + } + + return nil, nil + +fixups: + // only load the kernel spec if we found at least one kfunc call + kernelSpec, err := btf.LoadKernelSpec() + if err != nil { + return nil, err + } + + fdArray := make(handles, 0) + defer closeOnError(&fdArray) + + for { + ins := iter.Ins + + metadata := ins.Metadata.Get(kfuncMetaKey{}) + if metadata == nil { + if !iter.Next() { + // break loop if this was the last instruction in the stream. + break + } + continue + } + + // check meta, if no meta return err + kfm, _ := metadata.(*kfuncMeta) + if kfm == nil { + return nil, fmt.Errorf("kfuncMetaKey doesn't contain kfuncMeta") + } + + target := btf.Type((*btf.Func)(nil)) + spec, module, err := findTargetInKernel(kernelSpec, kfm.Func.Name, &target) + if kfm.Binding == elf.STB_WEAK && errors.Is(err, btf.ErrNotFound) { + if ins.IsKfuncCall() { + // If the kfunc call is weak and not found, poison the call. Use a recognizable constant + // to make it easier to debug. And set src to zero so the verifier doesn't complain + // about the invalid imm/offset values before dead-code elimination. + ins.Constant = kfuncCallPoisonBase + ins.Src = 0 + } else if ins.OpCode.IsDWordLoad() { + // If the kfunc DWordLoad is weak and not found, set its address to 0. + ins.Constant = 0 + ins.Src = 0 + } else { + return nil, fmt.Errorf("only kfunc calls and dword loads may have kfunc metadata") + } + + iter.Next() + continue + } + // Error on non-weak kfunc not found. + if errors.Is(err, btf.ErrNotFound) { + return nil, fmt.Errorf("kfunc %q: %w", kfm.Func.Name, ErrNotSupported) + } + if err != nil { + return nil, err + } + + idx, err := fdArray.add(module) + if err != nil { + return nil, err + } + + if err := btf.CheckTypeCompatibility(kfm.Func.Type, target.(*btf.Func).Type); err != nil { + return nil, &incompatibleKfuncError{kfm.Func.Name, err} + } + + id, err := spec.TypeID(target) + if err != nil { + return nil, err + } + + ins.Constant = int64(id) + ins.Offset = int16(idx) + + if !iter.Next() { + break + } + } + + return fdArray, nil +} + +type incompatibleKfuncError struct { + name string + err error +} + +func (ike *incompatibleKfuncError) Error() string { + return fmt.Sprintf("kfunc %q: %s", ike.name, ike.err) +} + // fixupProbeReadKernel replaces calls to bpf_probe_read_{kernel,user}(_str) // with bpf_probe_read(_str) on kernels that don't support it yet. func fixupProbeReadKernel(ins *asm.Instruction) { @@ -211,28 +398,62 @@ func fixupProbeReadKernel(ins *asm.Instruction) { } } -var kernelBTF struct { - sync.Mutex - spec *btf.Spec -} - -// maybeLoadKernelBTF loads the current kernel's BTF if spec is nil, otherwise -// it returns spec unchanged. +// resolveKconfigReferences creates and populates a .kconfig map if necessary. // -// The kernel BTF is cached for the lifetime of the process. -func maybeLoadKernelBTF(spec *btf.Spec) (*btf.Spec, error) { - if spec != nil { - return spec, nil +// Returns a nil Map and no error if no references exist. +func resolveKconfigReferences(insns asm.Instructions) (_ *Map, err error) { + closeOnError := func(c io.Closer) { + if err != nil { + c.Close() + } + } + + var spec *MapSpec + iter := insns.Iterate() + for iter.Next() { + meta, _ := iter.Ins.Metadata.Get(kconfigMetaKey{}).(*kconfigMeta) + if meta != nil { + spec = meta.Map + break + } + } + + if spec == nil { + return nil, nil } - kernelBTF.Lock() - defer kernelBTF.Unlock() + cpy := spec.Copy() + if err := resolveKconfig(cpy); err != nil { + return nil, err + } + + kconfig, err := NewMap(cpy) + if err != nil { + return nil, err + } + defer closeOnError(kconfig) + + // Resolve all instructions which load from .kconfig map with actual map + // and offset inside it. + iter = insns.Iterate() + for iter.Next() { + meta, _ := iter.Ins.Metadata.Get(kconfigMetaKey{}).(*kconfigMeta) + if meta == nil { + continue + } + + if meta.Map != spec { + return nil, fmt.Errorf("instruction %d: reference to multiple .kconfig maps is not allowed", iter.Index) + } + + if err := iter.Ins.AssociateMap(kconfig); err != nil { + return nil, fmt.Errorf("instruction %d: %w", iter.Index, err) + } - if kernelBTF.spec != nil { - return kernelBTF.spec, nil + // Encode a map read at the offset of the var in the datasec. + iter.Ins.Constant = int64(uint64(meta.Offset) << 32) + iter.Ins.Metadata.Set(kconfigMetaKey{}, nil) } - var err error - kernelBTF.spec, err = btf.LoadKernelSpec() - return kernelBTF.spec, err + return kconfig, nil } diff --git a/vendor/github.com/cilium/ebpf/map.go b/vendor/github.com/cilium/ebpf/map.go index e4a6c87e92..e46fa3f12e 100644 --- a/vendor/github.com/cilium/ebpf/map.go +++ b/vendor/github.com/cilium/ebpf/map.go @@ -6,14 +6,18 @@ import ( "fmt" "io" "math/rand" + "os" "path/filepath" "reflect" + "strings" + "sync" "time" "unsafe" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/sysenc" "github.com/cilium/ebpf/internal/unix" ) @@ -77,9 +81,6 @@ type MapSpec struct { // The key and value type of this map. May be nil. Key, Value btf.Type - - // The BTF associated with this map. - BTF *btf.Spec } func (ms *MapSpec) String() string { @@ -104,32 +105,55 @@ func (ms *MapSpec) Copy() *MapSpec { return &cpy } -// hasBTF returns true if the MapSpec has a valid BTF spec and if its -// map type supports associated BTF metadata in the kernel. -func (ms *MapSpec) hasBTF() bool { - return ms.BTF != nil && ms.Type.hasBTF() -} +// fixupMagicFields fills fields of MapSpec which are usually +// left empty in ELF or which depend on runtime information. +// +// The method doesn't modify Spec, instead returning a copy. +// The copy is only performed if fixups are necessary, so callers mustn't mutate +// the returned spec. +func (spec *MapSpec) fixupMagicFields() (*MapSpec, error) { + switch spec.Type { + case ArrayOfMaps, HashOfMaps: + if spec.ValueSize != 0 && spec.ValueSize != 4 { + return nil, errors.New("ValueSize must be zero or four for map of map") + } -func (ms *MapSpec) clampPerfEventArraySize() error { - if ms.Type != PerfEventArray { - return nil - } + spec = spec.Copy() + spec.ValueSize = 4 - n, err := internal.PossibleCPUs() - if err != nil { - return fmt.Errorf("perf event array: %w", err) - } + case PerfEventArray: + if spec.KeySize != 0 && spec.KeySize != 4 { + return nil, errors.New("KeySize must be zero or four for perf event array") + } + + if spec.ValueSize != 0 && spec.ValueSize != 4 { + return nil, errors.New("ValueSize must be zero or four for perf event array") + } + + spec = spec.Copy() + spec.KeySize = 4 + spec.ValueSize = 4 + + n, err := PossibleCPU() + if err != nil { + return nil, fmt.Errorf("fixup perf event array: %w", err) + } - if n := uint32(n); ms.MaxEntries > n { - ms.MaxEntries = n + if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n { + // MaxEntries should be zero most of the time, but there is code + // out there which hardcodes large constants. Clamp the number + // of entries to the number of CPUs at most. Allow creating maps with + // less than n items since some kernel selftests relied on this + // behaviour in the past. + spec.MaxEntries = n + } } - return nil + return spec, nil } // dataSection returns the contents and BTF Datasec descriptor of the spec. func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) { - if ms.Value == nil { return nil, nil, errMapNoBTFValue } @@ -158,25 +182,42 @@ type MapKV struct { Value interface{} } -func (ms *MapSpec) checkCompatibility(m *Map) error { - switch { - case m.typ != ms.Type: - return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible) - - case m.keySize != ms.KeySize: - return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible) +// Compatible returns nil if an existing map may be used instead of creating +// one from the spec. +// +// Returns an error wrapping [ErrMapIncompatible] otherwise. +func (ms *MapSpec) Compatible(m *Map) error { + ms, err := ms.fixupMagicFields() + if err != nil { + return err + } - case m.valueSize != ms.ValueSize: - return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible) + diffs := []string{} + if m.typ != ms.Type { + diffs = append(diffs, fmt.Sprintf("Type: %s changed to %s", m.typ, ms.Type)) + } + if m.keySize != ms.KeySize { + diffs = append(diffs, fmt.Sprintf("KeySize: %d changed to %d", m.keySize, ms.KeySize)) + } + if m.valueSize != ms.ValueSize { + diffs = append(diffs, fmt.Sprintf("ValueSize: %d changed to %d", m.valueSize, ms.ValueSize)) + } + if m.maxEntries != ms.MaxEntries { + diffs = append(diffs, fmt.Sprintf("MaxEntries: %d changed to %d", m.maxEntries, ms.MaxEntries)) + } - case !(ms.Type == PerfEventArray && ms.MaxEntries == 0) && - m.maxEntries != ms.MaxEntries: - return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible) + // BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow this + // mismatch. + if !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == unix.BPF_F_RDONLY_PROG) && + m.flags != ms.Flags { + diffs = append(diffs, fmt.Sprintf("Flags: %d changed to %d", m.flags, ms.Flags)) + } - case m.flags != ms.Flags: - return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible) + if len(diffs) == 0 { + return nil } - return nil + + return fmt.Errorf("%s: %w", strings.Join(diffs, ", "), ErrMapIncompatible) } // Map represents a Map file descriptor. @@ -241,10 +282,7 @@ func NewMap(spec *MapSpec) (*Map, error) { // // May return an error wrapping ErrMapIncompatible. func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { - handles := newHandleCache() - defer handles.close() - - m, err := newMapWithOptions(spec, opts, handles) + m, err := newMapWithOptions(spec, opts) if err != nil { return nil, fmt.Errorf("creating map: %w", err) } @@ -257,7 +295,7 @@ func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { return m, nil } -func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ *Map, err error) { +func newMapWithOptions(spec *MapSpec, opts MapOptions) (_ *Map, err error) { closeOnError := func(c io.Closer) { if err != nil { c.Close() @@ -284,7 +322,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ } defer closeOnError(m) - if err := spec.checkCompatibility(m); err != nil { + if err := spec.Compatible(m); err != nil { return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) } @@ -307,7 +345,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ return nil, errors.New("inner maps cannot be pinned") } - template, err := spec.InnerMap.createMap(nil, opts, handles) + template, err := spec.InnerMap.createMap(nil, opts) if err != nil { return nil, fmt.Errorf("inner map: %w", err) } @@ -319,7 +357,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ innerFd = template.fd } - m, err := spec.createMap(innerFd, opts, handles) + m, err := spec.createMap(innerFd, opts) if err != nil { return nil, err } @@ -328,7 +366,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ if spec.Pinning == PinByName { path := filepath.Join(opts.PinPath, spec.Name) if err := m.Pin(path); err != nil { - return nil, fmt.Errorf("pin map: %w", err) + return nil, fmt.Errorf("pin map to %s: %w", path, err) } } @@ -337,15 +375,13 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ // createMap validates the spec's properties and creates the map in the kernel // using the given opts. It does not populate or freeze the map. -func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) { +func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err error) { closeOnError := func(closer io.Closer) { if err != nil { closer.Close() } } - spec = spec.Copy() - // Kernels 4.13 through 5.4 used a struct bpf_map_def that contained // additional 'inner_map_idx' and later 'numa_node' fields. // In order to support loading these definitions, tolerate the presence of @@ -356,56 +392,9 @@ func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCa } } - switch spec.Type { - case ArrayOfMaps, HashOfMaps: - if err := haveNestedMaps(); err != nil { - return nil, err - } - - if spec.ValueSize != 0 && spec.ValueSize != 4 { - return nil, errors.New("ValueSize must be zero or four for map of map") - } - spec.ValueSize = 4 - - case PerfEventArray: - if spec.KeySize != 0 && spec.KeySize != 4 { - return nil, errors.New("KeySize must be zero or four for perf event array") - } - spec.KeySize = 4 - - if spec.ValueSize != 0 && spec.ValueSize != 4 { - return nil, errors.New("ValueSize must be zero or four for perf event array") - } - spec.ValueSize = 4 - - if spec.MaxEntries == 0 { - n, err := internal.PossibleCPUs() - if err != nil { - return nil, fmt.Errorf("perf event array: %w", err) - } - spec.MaxEntries = uint32(n) - } - } - - if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze { - if err := haveMapMutabilityModifiers(); err != nil { - return nil, fmt.Errorf("map create: %w", err) - } - } - if spec.Flags&unix.BPF_F_MMAPABLE > 0 { - if err := haveMmapableMaps(); err != nil { - return nil, fmt.Errorf("map create: %w", err) - } - } - if spec.Flags&unix.BPF_F_INNER_MAP > 0 { - if err := haveInnerMaps(); err != nil { - return nil, fmt.Errorf("map create: %w", err) - } - } - if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { - if err := haveNoPreallocMaps(); err != nil { - return nil, fmt.Errorf("map create: %w", err) - } + spec, err = spec.fixupMagicFields() + if err != nil { + return nil, err } attr := sys.MapCreateAttr{ @@ -413,7 +402,7 @@ func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCa KeySize: spec.KeySize, ValueSize: spec.ValueSize, MaxEntries: spec.MaxEntries, - MapFlags: spec.Flags, + MapFlags: sys.MapFlags(spec.Flags), NumaNode: spec.NumaNode, } @@ -425,52 +414,98 @@ func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCa attr.MapName = sys.NewObjName(spec.Name) } - if spec.hasBTF() { - handle, err := handles.btfHandle(spec.BTF) + if spec.Key != nil || spec.Value != nil { + handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value) if err != nil && !errors.Is(err, btf.ErrNotSupported) { return nil, fmt.Errorf("load BTF: %w", err) } if handle != nil { - keyTypeID, err := spec.BTF.TypeID(spec.Key) - if err != nil { - return nil, err - } - - valueTypeID, err := spec.BTF.TypeID(spec.Value) - if err != nil { - return nil, err - } + defer handle.Close() + // Use BTF k/v during map creation. attr.BtfFd = uint32(handle.FD()) - attr.BtfKeyTypeId = uint32(keyTypeID) - attr.BtfValueTypeId = uint32(valueTypeID) + attr.BtfKeyTypeId = keyTypeID + attr.BtfValueTypeId = valueTypeID } } fd, err := sys.MapCreate(&attr) + + // Some map types don't support BTF k/v in earlier kernel versions. + // Remove BTF metadata and retry map creation. + if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 { + attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0 + fd, err = sys.MapCreate(&attr) + } if err != nil { - if errors.Is(err, unix.EPERM) { - return nil, fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) - } - if !spec.hasBTF() { - return nil, fmt.Errorf("map create without BTF: %w", err) - } - if errors.Is(err, unix.EINVAL) && attr.MaxEntries == 0 { - return nil, fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err) - } - return nil, fmt.Errorf("map create: %w", err) + return nil, handleMapCreateError(attr, spec, err) } - defer closeOnError(fd) + defer closeOnError(fd) m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags) if err != nil { return nil, fmt.Errorf("map create: %w", err) } - return m, nil } +func handleMapCreateError(attr sys.MapCreateAttr, spec *MapSpec, err error) error { + if errors.Is(err, unix.EPERM) { + return fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) + } + if errors.Is(err, unix.EINVAL) && spec.MaxEntries == 0 { + return fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err) + } + if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap { + return fmt.Errorf("map create: cannot use type %s", UnspecifiedMap) + } + if errors.Is(err, unix.EINVAL) && spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { + return fmt.Errorf("map create: %w (noPrealloc flag may be incompatible with map type %s)", err, spec.Type) + } + + switch spec.Type { + case ArrayOfMaps, HashOfMaps: + if haveFeatErr := haveNestedMaps(); haveFeatErr != nil { + return fmt.Errorf("map create: %w", haveFeatErr) + } + } + if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze { + if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil { + return fmt.Errorf("map create: %w", haveFeatErr) + } + } + if spec.Flags&unix.BPF_F_MMAPABLE > 0 { + if haveFeatErr := haveMmapableMaps(); haveFeatErr != nil { + return fmt.Errorf("map create: %w", haveFeatErr) + } + } + if spec.Flags&unix.BPF_F_INNER_MAP > 0 { + if haveFeatErr := haveInnerMaps(); haveFeatErr != nil { + return fmt.Errorf("map create: %w", haveFeatErr) + } + } + if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { + if haveFeatErr := haveNoPreallocMaps(); haveFeatErr != nil { + return fmt.Errorf("map create: %w", haveFeatErr) + } + } + // BPF_MAP_TYPE_RINGBUF's max_entries must be a power-of-2 multiple of kernel's page size. + if errors.Is(err, unix.EINVAL) && + (attr.MapType == sys.BPF_MAP_TYPE_RINGBUF || attr.MapType == sys.BPF_MAP_TYPE_USER_RINGBUF) { + pageSize := uint32(os.Getpagesize()) + maxEntries := attr.MaxEntries + if maxEntries%pageSize != 0 || !internal.IsPow(maxEntries) { + return fmt.Errorf("map create: %w (ring map size %d not a multiple of page size %d)", err, maxEntries, pageSize) + } + } + if attr.BtfFd == 0 { + return fmt.Errorf("map create: %w (without BTF k/v)", err) + } + + return fmt.Errorf("map create: %w", err) +} + // newMap allocates and returns a new Map structure. // Sets the fullValueSize on per-CPU maps. func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) { @@ -490,12 +525,12 @@ func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries return m, nil } - possibleCPUs, err := internal.PossibleCPUs() + possibleCPUs, err := PossibleCPU() if err != nil { return nil, err } - m.fullValueSize = internal.Align(int(valueSize), 8) * possibleCPUs + m.fullValueSize = int(internal.Align(valueSize, 8)) * possibleCPUs return m, nil } @@ -540,7 +575,7 @@ func (m *Map) Info() (*MapInfo, error) { type MapLookupFlags uint64 // LookupLock look up the value of a spin-locked map. -const LookupLock MapLookupFlags = 4 +const LookupLock MapLookupFlags = unix.BPF_F_LOCK // Lookup retrieves a value from a Map. // @@ -549,12 +584,7 @@ const LookupLock MapLookupFlags = 4 // // Returns an error if the key doesn't exist, see ErrKeyNotExist. func (m *Map) Lookup(key, valueOut interface{}) error { - valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) - if err := m.lookup(key, valuePtr, 0); err != nil { - return err - } - - return m.unmarshalValue(valueOut, valueBytes) + return m.LookupWithFlags(key, valueOut, 0) } // LookupWithFlags retrieves a value from a Map with flags. @@ -568,8 +598,12 @@ func (m *Map) Lookup(key, valueOut interface{}) error { // // Returns an error if the key doesn't exist, see ErrKeyNotExist. func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { - valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) - if err := m.lookup(key, valuePtr, flags); err != nil { + if m.typ.hasPerCPUValue() { + return m.lookupPerCPU(key, valueOut, flags) + } + + valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize) + if err := m.lookup(key, valueBytes.Pointer(), flags); err != nil { return err } @@ -580,7 +614,7 @@ func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) e // // Returns ErrKeyNotExist if the key doesn't exist. func (m *Map) LookupAndDelete(key, valueOut interface{}) error { - return m.lookupAndDelete(key, valueOut, 0) + return m.LookupAndDeleteWithFlags(key, valueOut, 0) } // LookupAndDeleteWithFlags retrieves and deletes a value from a Map. @@ -591,7 +625,15 @@ func (m *Map) LookupAndDelete(key, valueOut interface{}) error { // // Returns ErrKeyNotExist if the key doesn't exist. func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { - return m.lookupAndDelete(key, valueOut, flags) + if m.typ.hasPerCPUValue() { + return m.lookupAndDeletePerCPU(key, valueOut, flags) + } + + valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize) + if err := m.lookupAndDelete(key, valueBytes.Pointer(), flags); err != nil { + return err + } + return m.unmarshalValue(valueOut, valueBytes) } // LookupBytes gets a value from Map. @@ -609,6 +651,18 @@ func (m *Map) LookupBytes(key interface{}) ([]byte, error) { return valueBytes, err } +func (m *Map) lookupPerCPU(key, valueOut any, flags MapLookupFlags) error { + slice, err := ensurePerCPUSlice(valueOut, int(m.valueSize)) + if err != nil { + return err + } + valueBytes := make([]byte, m.fullValueSize) + if err := m.lookup(key, sys.NewSlicePointer(valueBytes), flags); err != nil { + return err + } + return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes) +} + func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error { keyPtr, err := m.marshalKey(key) if err != nil { @@ -628,9 +682,57 @@ func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags return nil } -func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) error { - valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) +func (m *Map) lookupAndDeletePerCPU(key, valueOut any, flags MapLookupFlags) error { + slice, err := ensurePerCPUSlice(valueOut, int(m.valueSize)) + if err != nil { + return err + } + valueBytes := make([]byte, m.fullValueSize) + if err := m.lookupAndDelete(key, sys.NewSlicePointer(valueBytes), flags); err != nil { + return err + } + return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes) +} +// ensurePerCPUSlice allocates a slice for a per-CPU value if necessary. +func ensurePerCPUSlice(sliceOrPtr any, elemLength int) (any, error) { + sliceOrPtrType := reflect.TypeOf(sliceOrPtr) + if sliceOrPtrType.Kind() == reflect.Slice { + // The target is a slice, the caller is responsible for ensuring that + // size is correct. + return sliceOrPtr, nil + } + + slicePtrType := sliceOrPtrType + if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice { + return nil, fmt.Errorf("per-cpu value requires a slice or a pointer to slice") + } + + possibleCPUs, err := PossibleCPU() + if err != nil { + return nil, err + } + + sliceType := slicePtrType.Elem() + slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) + + sliceElemType := sliceType.Elem() + sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr + reflect.ValueOf(sliceOrPtr).Elem().Set(slice) + if !sliceElemIsPointer { + return slice.Interface(), nil + } + sliceElemType = sliceElemType.Elem() + + for i := 0; i < possibleCPUs; i++ { + newElem := reflect.New(sliceElemType) + slice.Index(i).Set(newElem) + } + + return slice.Interface(), nil +} + +func (m *Map) lookupAndDelete(key any, valuePtr sys.Pointer, flags MapLookupFlags) error { keyPtr, err := m.marshalKey(key) if err != nil { return fmt.Errorf("can't marshal key: %w", err) @@ -647,7 +749,7 @@ func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) e return fmt.Errorf("lookup and delete: %w", wrapMapError(err)) } - return m.unmarshalValue(valueOut, valueBytes) + return nil } // MapUpdateFlags controls the behaviour of the Map.Update call. @@ -674,15 +776,32 @@ func (m *Map) Put(key, value interface{}) error { } // Update changes the value of a key. -func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error { - keyPtr, err := m.marshalKey(key) - if err != nil { - return fmt.Errorf("can't marshal key: %w", err) +func (m *Map) Update(key, value any, flags MapUpdateFlags) error { + if m.typ.hasPerCPUValue() { + return m.updatePerCPU(key, value, flags) } valuePtr, err := m.marshalValue(value) if err != nil { - return fmt.Errorf("can't marshal value: %w", err) + return fmt.Errorf("marshal value: %w", err) + } + + return m.update(key, valuePtr, flags) +} + +func (m *Map) updatePerCPU(key, value any, flags MapUpdateFlags) error { + valuePtr, err := marshalPerCPUValue(value, int(m.valueSize)) + if err != nil { + return fmt.Errorf("marshal value: %w", err) + } + + return m.update(key, valuePtr, flags) +} + +func (m *Map) update(key any, valuePtr sys.Pointer, flags MapUpdateFlags) error { + keyPtr, err := m.marshalKey(key) + if err != nil { + return fmt.Errorf("marshal key: %w", err) } attr := sys.MapUpdateElemAttr{ @@ -725,13 +844,13 @@ func (m *Map) Delete(key interface{}) error { // // Returns ErrKeyNotExist if there is no next key. func (m *Map) NextKey(key, nextKeyOut interface{}) error { - nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.keySize)) + nextKeyBytes := makeMapSyscallOutput(nextKeyOut, int(m.keySize)) - if err := m.nextKey(key, nextKeyPtr); err != nil { + if err := m.nextKey(key, nextKeyBytes.Pointer()); err != nil { return err } - if err := m.unmarshalKey(nextKeyOut, nextKeyBytes); err != nil { + if err := nextKeyBytes.Unmarshal(nextKeyOut); err != nil { return fmt.Errorf("can't unmarshal next key: %w", err) } return nil @@ -798,12 +917,22 @@ func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error { return nil } +var mmapProtectedPage = sync.OnceValues(func() ([]byte, error) { + return unix.Mmap(-1, 0, os.Getpagesize(), unix.PROT_NONE, unix.MAP_ANON|unix.MAP_SHARED) +}) + // guessNonExistentKey attempts to perform a map lookup that returns ENOENT. // This is necessary on kernels before 4.4.132, since those don't support // iterating maps from the start by providing an invalid key pointer. func (m *Map) guessNonExistentKey() ([]byte, error) { - // Provide an invalid value pointer to prevent a copy on the kernel side. - valuePtr := sys.NewPointer(unsafe.Pointer(^uintptr(0))) + // Map a protected page and use that as the value pointer. This saves some + // work copying out the value, which we're not interested in. + page, err := mmapProtectedPage() + if err != nil { + return nil, err + } + valuePtr := sys.NewSlicePointer(page) + randKey := make([]byte, int(m.keySize)) for i := 0; i < 4; i++ { @@ -844,14 +973,19 @@ func (m *Map) guessNonExistentKey() ([]byte, error) { // // "keysOut" and "valuesOut" must be of type slice, a pointer // to a slice or buffer will not work. -// "prevKey" is the key to start the batch lookup from, it will -// *not* be included in the results. Use nil to start at the first key. +// "cursor" is an pointer to an opaque handle. It must be non-nil. Pass +// "cursor" to subsequent calls of this function to continue the batching +// operation in the case of chunking. +// +// Warning: This API is not very safe to use as the kernel implementation for +// batching relies on the user to be aware of subtle details with regarding to +// different map type implementations. // // ErrKeyNotExist is returned when the batch lookup has reached // the end of all possible results, even when partial results // are returned. It should be used to evaluate when lookup is "done". -func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { - return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) +func (m *Map) BatchLookup(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { + return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, cursor, keysOut, valuesOut, opts) } // BatchLookupAndDelete looks up many elements in a map at once, @@ -859,47 +993,113 @@ func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, o // It then deletes all those elements. // "keysOut" and "valuesOut" must be of type slice, a pointer // to a slice or buffer will not work. -// "prevKey" is the key to start the batch lookup from, it will -// *not* be included in the results. Use nil to start at the first key. +// "cursor" is an pointer to an opaque handle. It must be non-nil. Pass +// "cursor" to subsequent calls of this function to continue the batching +// operation in the case of chunking. +// +// Warning: This API is not very safe to use as the kernel implementation for +// batching relies on the user to be aware of subtle details with regarding to +// different map type implementations. // // ErrKeyNotExist is returned when the batch lookup has reached // the end of all possible results, even when partial results // are returned. It should be used to evaluate when lookup is "done". -func (m *Map) BatchLookupAndDelete(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { - return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) +func (m *Map) BatchLookupAndDelete(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { + return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, cursor, keysOut, valuesOut, opts) } -func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { - if err := haveBatchAPI(); err != nil { - return 0, err - } +// MapBatchCursor represents a starting point for a batch operation. +type MapBatchCursor struct { + m *Map + opaque []byte +} + +func (m *Map) batchLookup(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { if m.typ.hasPerCPUValue() { - return 0, ErrNotSupported + return m.batchLookupPerCPU(cmd, cursor, keysOut, valuesOut, opts) } - keysValue := reflect.ValueOf(keysOut) - if keysValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("keys must be a slice") + + count, err := batchCount(keysOut, valuesOut) + if err != nil { + return 0, err + } + + valueBuf := sysenc.SyscallOutput(valuesOut, count*int(m.fullValueSize)) + + n, err := m.batchLookupCmd(cmd, cursor, count, keysOut, valueBuf.Pointer(), opts) + if err != nil { + return n, err } - valuesValue := reflect.ValueOf(valuesOut) - if valuesValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("valuesOut must be a slice") + + err = valueBuf.Unmarshal(valuesOut) + if err != nil { + return 0, err } - count := keysValue.Len() - if count != valuesValue.Len() { - return 0, fmt.Errorf("keysOut and valuesOut must be the same length") + + return n, nil +} + +func (m *Map) batchLookupPerCPU(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { + count, err := sliceLen(keysOut) + if err != nil { + return 0, fmt.Errorf("keys: %w", err) } - keyBuf := make([]byte, count*int(m.keySize)) - keyPtr := sys.NewSlicePointer(keyBuf) + valueBuf := make([]byte, count*int(m.fullValueSize)) valuePtr := sys.NewSlicePointer(valueBuf) - nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize)) + + n, sysErr := m.batchLookupCmd(cmd, cursor, count, keysOut, valuePtr, opts) + if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { + return 0, err + } + + err = unmarshalBatchPerCPUValue(valuesOut, count, int(m.valueSize), valueBuf) + if err != nil { + return 0, err + } + + return n, sysErr +} + +func (m *Map) batchLookupCmd(cmd sys.Cmd, cursor *MapBatchCursor, count int, keysOut any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) { + cursorLen := int(m.keySize) + if cursorLen < 4 { + // * generic_map_lookup_batch requires that batch_out is key_size bytes. + // This is used by array and LPM maps. + // + // * __htab_map_lookup_and_delete_batch requires u32. This is used by the + // various hash maps. + // + // Use a minimum of 4 bytes to avoid having to distinguish between the two. + cursorLen = 4 + } + + inBatch := cursor.opaque + if inBatch == nil { + // This is the first lookup, allocate a buffer to hold the cursor. + cursor.opaque = make([]byte, cursorLen) + cursor.m = m + } else if cursor.m != m { + // Prevent reuse of a cursor across maps. First, it's unlikely to work. + // Second, the maps may require different cursorLen and cursor.opaque + // may therefore be too short. This could lead to the kernel clobbering + // user space memory. + return 0, errors.New("a cursor may not be reused across maps") + } + + if err := haveBatchAPI(); err != nil { + return 0, err + } + + keyBuf := sysenc.SyscallOutput(keysOut, count*int(m.keySize)) attr := sys.MapLookupBatchAttr{ MapFd: m.fd.Uint(), - Keys: keyPtr, + Keys: keyBuf.Pointer(), Values: valuePtr, Count: uint32(count), - OutBatch: nextPtr, + InBatch: sys.NewSlicePointer(inBatch), + OutBatch: sys.NewSlicePointer(cursor.opaque), } if opts != nil { @@ -907,30 +1107,13 @@ func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut attr.Flags = opts.Flags } - var err error - if startKey != nil { - attr.InBatch, err = marshalPtr(startKey, int(m.keySize)) - if err != nil { - return 0, err - } - } - _, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) sysErr = wrapMapError(sysErr) if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { return 0, sysErr } - err = m.unmarshalKey(nextKeyOut, nextBuf) - if err != nil { - return 0, err - } - err = unmarshalBytes(keysOut, keyBuf) - if err != nil { - return 0, err - } - err = unmarshalBytes(valuesOut, valueBuf) - if err != nil { + if err := keyBuf.Unmarshal(keysOut); err != nil { return 0, err } @@ -942,33 +1125,25 @@ func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut // "keys" and "values" must be of type slice, a pointer // to a slice or buffer will not work. func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) { - if err := haveBatchAPI(); err != nil { - return 0, err - } if m.typ.hasPerCPUValue() { - return 0, ErrNotSupported - } - keysValue := reflect.ValueOf(keys) - if keysValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("keys must be a slice") + return m.batchUpdatePerCPU(keys, values, opts) } - valuesValue := reflect.ValueOf(values) - if valuesValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("values must be a slice") - } - var ( - count = keysValue.Len() - valuePtr sys.Pointer - err error - ) - if count != valuesValue.Len() { - return 0, fmt.Errorf("keys and values must be the same length") + + count, err := batchCount(keys, values) + if err != nil { + return 0, err } - keyPtr, err := marshalPtr(keys, count*int(m.keySize)) + + valuePtr, err := marshalMapSyscallInput(values, count*int(m.valueSize)) if err != nil { return 0, err } - valuePtr, err = marshalPtr(values, count*int(m.valueSize)) + + return m.batchUpdate(count, keys, valuePtr, opts) +} + +func (m *Map) batchUpdate(count int, keys any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) { + keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize)) if err != nil { return 0, err } @@ -986,27 +1161,38 @@ func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, er err = sys.MapUpdateBatch(&attr) if err != nil { + if haveFeatErr := haveBatchAPI(); haveFeatErr != nil { + return 0, haveFeatErr + } return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err)) } return int(attr.Count), nil } +func (m *Map) batchUpdatePerCPU(keys, values any, opts *BatchOptions) (int, error) { + count, err := sliceLen(keys) + if err != nil { + return 0, fmt.Errorf("keys: %w", err) + } + + valueBuf, err := marshalBatchPerCPUValue(values, count, int(m.valueSize)) + if err != nil { + return 0, err + } + + return m.batchUpdate(count, keys, sys.NewSlicePointer(valueBuf), opts) +} + // BatchDelete batch deletes entries in the map by keys. // "keys" must be of type slice, a pointer to a slice or buffer will not work. func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { - if err := haveBatchAPI(); err != nil { - return 0, err - } - if m.typ.hasPerCPUValue() { - return 0, ErrNotSupported - } - keysValue := reflect.ValueOf(keys) - if keysValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("keys must be a slice") + count, err := sliceLen(keys) + if err != nil { + return 0, fmt.Errorf("keys: %w", err) } - count := keysValue.Len() - keyPtr, err := marshalPtr(keys, count*int(m.keySize)) + + keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize)) if err != nil { return 0, fmt.Errorf("cannot marshal keys: %v", err) } @@ -1023,12 +1209,33 @@ func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { } if err = sys.MapDeleteBatch(&attr); err != nil { + if haveFeatErr := haveBatchAPI(); haveFeatErr != nil { + return 0, haveFeatErr + } return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err)) } return int(attr.Count), nil } +func batchCount(keys, values any) (int, error) { + keysLen, err := sliceLen(keys) + if err != nil { + return 0, fmt.Errorf("keys: %w", err) + } + + valuesLen, err := sliceLen(values) + if err != nil { + return 0, fmt.Errorf("values: %w", err) + } + + if keysLen != valuesLen { + return 0, fmt.Errorf("keys and values must have the same length") + } + + return keysLen, nil +} + // Iterate traverses a map. // // It's safe to create multiple iterators at the same time. @@ -1095,7 +1302,8 @@ func (m *Map) Clone() (*Map, error) { // the new path already exists. Re-pinning across filesystems is not supported. // You can Clone a map to pin it to a different path. // -// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs +// This requires bpffs to be mounted above fileName. +// See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd func (m *Map) Pin(fileName string) error { if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil { return err @@ -1126,15 +1334,14 @@ func (m *Map) IsPinned() bool { // // It makes no changes to kernel-side restrictions. func (m *Map) Freeze() error { - if err := haveMapMutabilityModifiers(); err != nil { - return fmt.Errorf("can't freeze map: %w", err) - } - attr := sys.MapFreezeAttr{ MapFd: m.fd.Uint(), } if err := sys.MapFreeze(&attr); err != nil { + if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil { + return fmt.Errorf("can't freeze map: %w", haveFeatErr) + } return fmt.Errorf("can't freeze map: %w", err) } return nil @@ -1167,23 +1374,10 @@ func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) { return sys.Pointer{}, errors.New("can't use nil as key of map") } - return marshalPtr(data, int(m.keySize)) -} - -func (m *Map) unmarshalKey(data interface{}, buf []byte) error { - if buf == nil { - // This is from a makeBuffer call, nothing do do here. - return nil - } - - return unmarshalBytes(data, buf) + return marshalMapSyscallInput(data, int(m.keySize)) } func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { - if m.typ.hasPerCPUValue() { - return marshalPerCPUValue(data, int(m.valueSize)) - } - var ( buf []byte err error @@ -1203,7 +1397,7 @@ func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { buf, err = marshalProgram(value, int(m.valueSize)) default: - return marshalPtr(data, int(m.valueSize)) + return marshalMapSyscallInput(data, int(m.valueSize)) } if err != nil { @@ -1213,16 +1407,7 @@ func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { return sys.NewSlicePointer(buf), nil } -func (m *Map) unmarshalValue(value interface{}, buf []byte) error { - if buf == nil { - // This is from a makeBuffer call, nothing do do here. - return nil - } - - if m.typ.hasPerCPUValue() { - return unmarshalPerCPUValue(value, int(m.valueSize), buf) - } - +func (m *Map) unmarshalValue(value any, buf sysenc.Buffer) error { switch value := value.(type) { case **Map: if !m.typ.canStoreMap() { @@ -1269,7 +1454,7 @@ func (m *Map) unmarshalValue(value interface{}, buf []byte) error { return errors.New("require pointer to *Program") } - return unmarshalBytes(value, buf) + return buf.Unmarshal(value) } // LoadPinnedMap loads a Map from a BPF file. @@ -1291,12 +1476,11 @@ func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) { } // unmarshalMap creates a map from a map ID encoded in host endianness. -func unmarshalMap(buf []byte) (*Map, error) { - if len(buf) != 4 { - return nil, errors.New("map id requires 4 byte value") +func unmarshalMap(buf sysenc.Buffer) (*Map, error) { + var id uint32 + if err := buf.Unmarshal(&id); err != nil { + return nil, err } - - id := internal.NativeEndian.Uint32(buf) return NewMapFromID(MapID(id)) } @@ -1315,9 +1499,10 @@ func marshalMap(m *Map, length int) ([]byte, error) { // // See Map.Iterate. type MapIterator struct { - target *Map - prevKey interface{} - prevBytes []byte + target *Map + // Temporary storage to avoid allocations in Next(). This is any instead + // of []byte to avoid allocations. + cursor any count, maxEntries uint32 done bool err error @@ -1327,7 +1512,6 @@ func newMapIterator(target *Map) *MapIterator { return &MapIterator{ target: target, maxEntries: target.maxEntries, - prevBytes: make([]byte, target.keySize), } } @@ -1346,29 +1530,30 @@ func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { return false } - // For array-like maps NextKeyBytes returns nil only on after maxEntries + // For array-like maps NextKey returns nil only after maxEntries // iterations. for mi.count <= mi.maxEntries { - var nextBytes []byte - nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey) - if mi.err != nil { - return false + if mi.cursor == nil { + // Pass nil interface to NextKey to make sure the Map's first key + // is returned. If we pass an uninitialized []byte instead, it'll see a + // non-nil interface and try to marshal it. + mi.cursor = make([]byte, mi.target.keySize) + mi.err = mi.target.NextKey(nil, mi.cursor) + } else { + mi.err = mi.target.NextKey(mi.cursor, mi.cursor) } - if nextBytes == nil { + if errors.Is(mi.err, ErrKeyNotExist) { mi.done = true + mi.err = nil + return false + } else if mi.err != nil { + mi.err = fmt.Errorf("get next key: %w", mi.err) return false } - // The user can get access to nextBytes since unmarshalBytes - // does not copy when unmarshaling into a []byte. - // Make a copy to prevent accidental corruption of - // iterator state. - copy(mi.prevBytes, nextBytes) - mi.prevKey = mi.prevBytes - mi.count++ - mi.err = mi.target.Lookup(nextBytes, valueOut) + mi.err = mi.target.Lookup(mi.cursor, valueOut) if errors.Is(mi.err, ErrKeyNotExist) { // Even though the key should be valid, we couldn't look up // its value. If we're iterating a hash map this is probably @@ -1381,10 +1566,17 @@ func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { continue } if mi.err != nil { + mi.err = fmt.Errorf("look up next key: %w", mi.err) return false } - mi.err = mi.target.unmarshalKey(keyOut, nextBytes) + buf := mi.cursor.([]byte) + if ptr, ok := keyOut.(unsafe.Pointer); ok { + copy(unsafe.Slice((*byte)(ptr), len(buf)), buf) + } else { + mi.err = sysenc.Unmarshal(keyOut, buf) + } + return mi.err == nil } @@ -1422,3 +1614,12 @@ func NewMapFromID(id MapID) (*Map, error) { return newMapFromFD(fd) } + +// sliceLen returns the length if the value is a slice or an error otherwise. +func sliceLen(slice any) (int, error) { + sliceValue := reflect.ValueOf(slice) + if sliceValue.Kind() != reflect.Slice { + return 0, fmt.Errorf("%T is not a slice", slice) + } + return sliceValue.Len(), nil +} diff --git a/vendor/github.com/cilium/ebpf/marshalers.go b/vendor/github.com/cilium/ebpf/marshalers.go index 544d17f35e..57a0a8e88a 100644 --- a/vendor/github.com/cilium/ebpf/marshalers.go +++ b/vendor/github.com/cilium/ebpf/marshalers.go @@ -1,247 +1,210 @@ package ebpf import ( - "bytes" "encoding" - "encoding/binary" "errors" "fmt" "reflect" - "runtime" - "sync" + "slices" "unsafe" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/sysenc" ) -// marshalPtr converts an arbitrary value into a pointer suitable +// marshalMapSyscallInput converts an arbitrary value into a pointer suitable // to be passed to the kernel. // // As an optimization, it returns the original value if it is an // unsafe.Pointer. -func marshalPtr(data interface{}, length int) (sys.Pointer, error) { +func marshalMapSyscallInput(data any, length int) (sys.Pointer, error) { if ptr, ok := data.(unsafe.Pointer); ok { return sys.NewPointer(ptr), nil } - buf, err := marshalBytes(data, length) + buf, err := sysenc.Marshal(data, length) if err != nil { return sys.Pointer{}, err } - return sys.NewSlicePointer(buf), nil + return buf.Pointer(), nil } -// marshalBytes converts an arbitrary value into a byte buffer. -// -// Prefer using Map.marshalKey and Map.marshalValue if possible, since -// those have special cases that allow more types to be encoded. -// -// Returns an error if the given value isn't representable in exactly -// length bytes. -func marshalBytes(data interface{}, length int) (buf []byte, err error) { - if data == nil { - return nil, errors.New("can't marshal a nil value") - } - - switch value := data.(type) { - case encoding.BinaryMarshaler: - buf, err = value.MarshalBinary() - case string: - buf = []byte(value) - case []byte: - buf = value - case unsafe.Pointer: - err = errors.New("can't marshal from unsafe.Pointer") - case Map, *Map, Program, *Program: - err = fmt.Errorf("can't marshal %T", value) - default: - var wr bytes.Buffer - err = binary.Write(&wr, internal.NativeEndian, value) - if err != nil { - err = fmt.Errorf("encoding %T: %v", value, err) - } - buf = wr.Bytes() - } - if err != nil { - return nil, err +func makeMapSyscallOutput(dst any, length int) sysenc.Buffer { + if ptr, ok := dst.(unsafe.Pointer); ok { + return sysenc.UnsafeBuffer(ptr) } - if len(buf) != length { - return nil, fmt.Errorf("%T doesn't marshal to %d bytes", data, length) + _, ok := dst.(encoding.BinaryUnmarshaler) + if ok { + return sysenc.SyscallOutput(nil, length) } - return buf, nil + + return sysenc.SyscallOutput(dst, length) } -func makeBuffer(dst interface{}, length int) (sys.Pointer, []byte) { - if ptr, ok := dst.(unsafe.Pointer); ok { - return sys.NewPointer(ptr), nil +// appendPerCPUSlice encodes a slice containing one value per +// possible CPU into a buffer of bytes. +// +// Values are initialized to zero if the slice has less elements than CPUs. +func appendPerCPUSlice(buf []byte, slice any, possibleCPUs, elemLength, alignedElemLength int) ([]byte, error) { + sliceType := reflect.TypeOf(slice) + if sliceType.Kind() != reflect.Slice { + return nil, errors.New("per-CPU value requires slice") } - buf := make([]byte, length) - return sys.NewSlicePointer(buf), buf -} - -var bytesReaderPool = sync.Pool{ - New: func() interface{} { - return new(bytes.Reader) - }, -} + sliceValue := reflect.ValueOf(slice) + sliceLen := sliceValue.Len() + if sliceLen > possibleCPUs { + return nil, fmt.Errorf("per-CPU value greater than number of CPUs") + } -// unmarshalBytes converts a byte buffer into an arbitrary value. -// -// Prefer using Map.unmarshalKey and Map.unmarshalValue if possible, since -// those have special cases that allow more types to be encoded. -// -// The common int32 and int64 types are directly handled to avoid -// unnecessary heap allocations as happening in the default case. -func unmarshalBytes(data interface{}, buf []byte) error { - switch value := data.(type) { - case unsafe.Pointer: - dst := unsafe.Slice((*byte)(value), len(buf)) - copy(dst, buf) - runtime.KeepAlive(value) - return nil - case Map, *Map, Program, *Program: - return fmt.Errorf("can't unmarshal into %T", value) - case encoding.BinaryUnmarshaler: - return value.UnmarshalBinary(buf) - case *string: - *value = string(buf) - return nil - case *[]byte: - *value = buf - return nil - case *int32: - if len(buf) < 4 { - return errors.New("int32 requires 4 bytes") - } - *value = int32(internal.NativeEndian.Uint32(buf)) - return nil - case *uint32: - if len(buf) < 4 { - return errors.New("uint32 requires 4 bytes") - } - *value = internal.NativeEndian.Uint32(buf) - return nil - case *int64: - if len(buf) < 8 { - return errors.New("int64 requires 8 bytes") - } - *value = int64(internal.NativeEndian.Uint64(buf)) - return nil - case *uint64: - if len(buf) < 8 { - return errors.New("uint64 requires 8 bytes") - } - *value = internal.NativeEndian.Uint64(buf) - return nil - case string: - return errors.New("require pointer to string") - case []byte: - return errors.New("require pointer to []byte") - default: - rd := bytesReaderPool.Get().(*bytes.Reader) - rd.Reset(buf) - defer bytesReaderPool.Put(rd) - if err := binary.Read(rd, internal.NativeEndian, value); err != nil { - return fmt.Errorf("decoding %T: %v", value, err) + // Grow increases the slice's capacity, _if_necessary_ + buf = slices.Grow(buf, alignedElemLength*possibleCPUs) + for i := 0; i < sliceLen; i++ { + elem := sliceValue.Index(i).Interface() + elemBytes, err := sysenc.Marshal(elem, elemLength) + if err != nil { + return nil, err } - return nil + + buf = elemBytes.AppendTo(buf) + buf = append(buf, make([]byte, alignedElemLength-elemLength)...) } + + // Ensure buf is zero-padded full size. + buf = append(buf, make([]byte, (possibleCPUs-sliceLen)*alignedElemLength)...) + + return buf, nil } // marshalPerCPUValue encodes a slice containing one value per // possible CPU into a buffer of bytes. // // Values are initialized to zero if the slice has less elements than CPUs. -// -// slice must have a type like []elementType. -func marshalPerCPUValue(slice interface{}, elemLength int) (sys.Pointer, error) { - sliceType := reflect.TypeOf(slice) - if sliceType.Kind() != reflect.Slice { - return sys.Pointer{}, errors.New("per-CPU value requires slice") +func marshalPerCPUValue(slice any, elemLength int) (sys.Pointer, error) { + possibleCPUs, err := PossibleCPU() + if err != nil { + return sys.Pointer{}, err } - possibleCPUs, err := internal.PossibleCPUs() + alignedElemLength := internal.Align(elemLength, 8) + buf := make([]byte, 0, alignedElemLength*possibleCPUs) + buf, err = appendPerCPUSlice(buf, slice, possibleCPUs, elemLength, alignedElemLength) if err != nil { return sys.Pointer{}, err } - sliceValue := reflect.ValueOf(slice) - sliceLen := sliceValue.Len() - if sliceLen > possibleCPUs { - return sys.Pointer{}, fmt.Errorf("per-CPU value exceeds number of CPUs") + return sys.NewSlicePointer(buf), nil +} + +// marshalBatchPerCPUValue encodes a batch-sized slice of slices containing +// one value per possible CPU into a buffer of bytes. +func marshalBatchPerCPUValue(slice any, batchLen, elemLength int) ([]byte, error) { + sliceType := reflect.TypeOf(slice) + if sliceType.Kind() != reflect.Slice { + return nil, fmt.Errorf("batch value requires a slice") } + sliceValue := reflect.ValueOf(slice) + possibleCPUs, err := PossibleCPU() + if err != nil { + return nil, err + } + if sliceValue.Len() != batchLen*possibleCPUs { + return nil, fmt.Errorf("per-CPU slice has incorrect length, expected %d, got %d", + batchLen*possibleCPUs, sliceValue.Len()) + } alignedElemLength := internal.Align(elemLength, 8) - buf := make([]byte, alignedElemLength*possibleCPUs) - - for i := 0; i < sliceLen; i++ { - elem := sliceValue.Index(i).Interface() - elemBytes, err := marshalBytes(elem, elemLength) + buf := make([]byte, 0, batchLen*alignedElemLength*possibleCPUs) + for i := 0; i < batchLen; i++ { + batch := sliceValue.Slice(i*possibleCPUs, (i+1)*possibleCPUs).Interface() + buf, err = appendPerCPUSlice(buf, batch, possibleCPUs, elemLength, alignedElemLength) if err != nil { - return sys.Pointer{}, err + return nil, fmt.Errorf("batch %d: %w", i, err) } - - offset := i * alignedElemLength - copy(buf[offset:offset+elemLength], elemBytes) } - - return sys.NewSlicePointer(buf), nil + return buf, nil } // unmarshalPerCPUValue decodes a buffer into a slice containing one value per // possible CPU. // -// valueOut must have a type like *[]elementType -func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error { - slicePtrType := reflect.TypeOf(slicePtr) - if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice { - return fmt.Errorf("per-cpu value requires pointer to slice") +// slice must be a literal slice and not a pointer. +func unmarshalPerCPUValue(slice any, elemLength int, buf []byte) error { + sliceType := reflect.TypeOf(slice) + if sliceType.Kind() != reflect.Slice { + return fmt.Errorf("per-CPU value requires a slice") } - possibleCPUs, err := internal.PossibleCPUs() + possibleCPUs, err := PossibleCPU() if err != nil { return err } - sliceType := slicePtrType.Elem() - slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) + sliceValue := reflect.ValueOf(slice) + if sliceValue.Len() != possibleCPUs { + return fmt.Errorf("per-CPU slice has incorrect length, expected %d, got %d", + possibleCPUs, sliceValue.Len()) + } sliceElemType := sliceType.Elem() sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr - if sliceElemIsPointer { - sliceElemType = sliceElemType.Elem() - } - - step := len(buf) / possibleCPUs - if step < elemLength { - return fmt.Errorf("per-cpu element length is larger than available data") - } + stride := internal.Align(elemLength, 8) for i := 0; i < possibleCPUs; i++ { - var elem interface{} + var elem any + v := sliceValue.Index(i) if sliceElemIsPointer { - newElem := reflect.New(sliceElemType) - slice.Index(i).Set(newElem) - elem = newElem.Interface() + if !v.Elem().CanAddr() { + return fmt.Errorf("per-CPU slice elements cannot be nil") + } + elem = v.Elem().Addr().Interface() } else { - elem = slice.Index(i).Addr().Interface() + elem = v.Addr().Interface() } - - // Make a copy, since unmarshal can hold on to itemBytes - elemBytes := make([]byte, elemLength) - copy(elemBytes, buf[:elemLength]) - - err := unmarshalBytes(elem, elemBytes) + err := sysenc.Unmarshal(elem, buf[:elemLength]) if err != nil { return fmt.Errorf("cpu %d: %w", i, err) } - buf = buf[step:] + buf = buf[stride:] + } + return nil +} + +// unmarshalBatchPerCPUValue decodes a buffer into a batch-sized slice +// containing one value per possible CPU. +// +// slice must have length batchLen * PossibleCPUs(). +func unmarshalBatchPerCPUValue(slice any, batchLen, elemLength int, buf []byte) error { + sliceType := reflect.TypeOf(slice) + if sliceType.Kind() != reflect.Slice { + return fmt.Errorf("batch requires a slice") + } + + sliceValue := reflect.ValueOf(slice) + possibleCPUs, err := PossibleCPU() + if err != nil { + return err + } + if sliceValue.Len() != batchLen*possibleCPUs { + return fmt.Errorf("per-CPU slice has incorrect length, expected %d, got %d", + sliceValue.Len(), batchLen*possibleCPUs) + } + + fullValueSize := possibleCPUs * internal.Align(elemLength, 8) + if len(buf) != batchLen*fullValueSize { + return fmt.Errorf("input buffer has incorrect length, expected %d, got %d", + len(buf), batchLen*fullValueSize) } - reflect.ValueOf(slicePtr).Elem().Set(slice) + for i := 0; i < batchLen; i++ { + elem := sliceValue.Slice(i*possibleCPUs, (i+1)*possibleCPUs).Interface() + if err := unmarshalPerCPUValue(elem, elemLength, buf[:fullValueSize]); err != nil { + return fmt.Errorf("batch %d: %w", i, err) + } + buf = buf[fullValueSize:] + } return nil } diff --git a/vendor/github.com/cilium/ebpf/netlify.toml b/vendor/github.com/cilium/ebpf/netlify.toml new file mode 100644 index 0000000000..67c83f3b30 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/netlify.toml @@ -0,0 +1,4 @@ +[build] + base = "docs/" + publish = "site/" + command = "mkdocs build" diff --git a/vendor/github.com/cilium/ebpf/prog.go b/vendor/github.com/cilium/ebpf/prog.go index 675edc711d..f4f3af7c36 100644 --- a/vendor/github.com/cilium/ebpf/prog.go +++ b/vendor/github.com/cilium/ebpf/prog.go @@ -10,17 +10,32 @@ import ( "runtime" "strings" "time" + "unsafe" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/btf" "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/kallsyms" "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/sysenc" "github.com/cilium/ebpf/internal/unix" ) // ErrNotSupported is returned whenever the kernel doesn't support a feature. var ErrNotSupported = internal.ErrNotSupported +// errBadRelocation is returned when the verifier rejects a program due to a +// bad CO-RE relocation. +// +// This error is detected based on heuristics and therefore may not be reliable. +var errBadRelocation = errors.New("bad CO-RE relocation") + +// errUnknownKfunc is returned when the verifier rejects a program due to an +// unknown kfunc. +// +// This error is detected based on heuristics and therefore may not be reliable. +var errUnknownKfunc = errors.New("unknown kfunc") + // ProgramID represents the unique ID of an eBPF program. type ProgramID uint32 @@ -35,21 +50,57 @@ const ( // verifier log. const DefaultVerifierLogSize = 64 * 1024 +// maxVerifierLogSize is the maximum size of verifier log buffer the kernel +// will accept before returning EINVAL. +const maxVerifierLogSize = math.MaxUint32 >> 2 + // ProgramOptions control loading a program into the kernel. type ProgramOptions struct { - // Controls the detail emitted by the kernel verifier. Set to non-zero - // to enable logging. - LogLevel uint32 - // Controls the output buffer size for the verifier. Defaults to - // DefaultVerifierLogSize. + // Bitmap controlling the detail emitted by the kernel's eBPF verifier log. + // LogLevel-type values can be ORed together to request specific kinds of + // verifier output. See the documentation on [ebpf.LogLevel] for details. + // + // opts.LogLevel = (ebpf.LogLevelBranch | ebpf.LogLevelStats) + // + // If left to its default value, the program will first be loaded without + // verifier output enabled. Upon error, the program load will be repeated + // with LogLevelBranch and the given (or default) LogSize value. + // + // Unless LogDisabled is set, setting this to a non-zero value will enable the verifier + // log, populating the [ebpf.Program.VerifierLog] field on successful loads + // and including detailed verifier errors if the program is rejected. This + // will always allocate an output buffer, but will result in only a single + // attempt at loading the program. + LogLevel LogLevel + + // Controls the output buffer size for the verifier log, in bytes. See the + // documentation on ProgramOptions.LogLevel for details about how this value + // is used. + // + // If this value is set too low to fit the verifier log, the resulting + // [ebpf.VerifierError]'s Truncated flag will be true, and the error string + // will also contain a hint to that effect. + // + // Defaults to DefaultVerifierLogSize. LogSize int - // Type information used for CO-RE relocations and when attaching to - // kernel functions. + + // Disables the verifier log completely, regardless of other options. + LogDisabled bool + + // Type information used for CO-RE relocations. // // This is useful in environments where the kernel BTF is not available // (containers) or where it is in a non-standard location. Defaults to // use the kernel BTF from a well-known location if nil. KernelTypes *btf.Spec + + // Type information used for CO-RE relocations of kernel modules, + // indexed by module name. + // + // This is useful in environments where the kernel BTF is not available + // (containers) or where it is in a non-standard location. Defaults to + // use the kernel module BTF from a well-known location if nil. + KernelModuleTypes map[string]*btf.Spec } // ProgramSpec defines a Program. @@ -74,7 +125,7 @@ type ProgramSpec struct { // The program to attach to. Must be provided manually. AttachTarget *Program - // The name of the ELF section this program orininated from. + // The name of the ELF section this program originated from. SectionName string Instructions asm.Instructions @@ -95,11 +146,6 @@ type ProgramSpec struct { // detect this value automatically. KernelVersion uint32 - // The BTF associated with this program. Changing Instructions - // will most likely invalidate the contained data, and may - // result in errors when attempting to load it into the kernel. - BTF *btf.Spec - // The byte order this program was compiled for, may be nil. ByteOrder binary.ByteOrder } @@ -123,6 +169,32 @@ func (ps *ProgramSpec) Tag() (string, error) { return ps.Instructions.Tag(internal.NativeEndian) } +// KernelModule returns the kernel module, if any, the AttachTo function is contained in. +func (ps *ProgramSpec) KernelModule() (string, error) { + if ps.AttachTo == "" { + return "", nil + } + + switch ps.Type { + default: + return "", nil + case Tracing: + switch ps.AttachType { + default: + return "", nil + case AttachTraceFEntry: + case AttachTraceFExit: + } + fallthrough + case Kprobe: + return kallsyms.KernelModule(ps.AttachTo) + } +} + +// VerifierError is returned by [NewProgram] and [NewProgramWithOptions] if a +// program is rejected by the verifier. +// +// Use [errors.As] to access the error. type VerifierError = internal.VerifierError // Program represents BPF program loaded into the kernel. @@ -141,7 +213,10 @@ type Program struct { // NewProgram creates a new Program. // -// See NewProgramWithOptions for details. +// See [NewProgramWithOptions] for details. +// +// Returns a [VerifierError] containing the full verifier log if the program is +// rejected by the kernel. func NewProgram(spec *ProgramSpec) (*Program, error) { return NewProgramWithOptions(spec, ProgramOptions{}) } @@ -151,24 +226,30 @@ func NewProgram(spec *ProgramSpec) (*Program, error) { // Loading a program for the first time will perform // feature detection by loading small, temporary programs. // -// Returns an error wrapping VerifierError if the program or its BTF is rejected -// by the kernel. +// Returns a [VerifierError] containing the full verifier log if the program is +// rejected by the kernel. func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) { if spec == nil { return nil, errors.New("can't load a program from a nil spec") } - handles := newHandleCache() - defer handles.close() - - prog, err := newProgramWithOptions(spec, opts, handles) + prog, err := newProgramWithOptions(spec, opts) if errors.Is(err, asm.ErrUnsatisfiedMapReference) { return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err) } return prog, err } -func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *handleCache) (*Program, error) { +var ( + coreBadLoad = []byte(fmt.Sprintf("(18) r10 = 0x%x\n", btf.COREBadRelocationSentinel)) + // This log message was introduced by ebb676daa1a3 ("bpf: Print function name in + // addition to function id") which first appeared in v4.10 and has remained + // unchanged since. + coreBadCall = []byte(fmt.Sprintf("invalid func unknown#%d\n", btf.COREBadRelocationSentinel)) + kfuncBadCall = []byte(fmt.Sprintf("invalid func unknown#%d\n", kfuncCallPoisonBase)) +) + +func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) { if len(spec.Instructions) == 0 { return nil, errors.New("instructions cannot be empty") } @@ -181,6 +262,10 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian) } + if opts.LogSize < 0 { + return nil, errors.New("ProgramOptions.LogSize must be a positive value; disable verifier logs using ProgramOptions.LogDisabled") + } + // Kernels before 5.0 (6c4fc209fcf9 "bpf: remove useless version check for prog load") // require the version field to be set to the value of the KERNEL_VERSION // macro for kprobe-type programs. @@ -206,47 +291,87 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand attr.ProgName = sys.NewObjName(spec.Name) } - kernelTypes := opts.KernelTypes - insns := make(asm.Instructions, len(spec.Instructions)) copy(insns, spec.Instructions) - var btfDisabled bool - if spec.BTF != nil { - if err := applyRelocations(insns, spec.BTF, kernelTypes); err != nil { - return nil, fmt.Errorf("apply CO-RE relocations: %w", err) - } + kmodName, err := spec.KernelModule() + if err != nil { + return nil, fmt.Errorf("kernel module search: %w", err) + } - handle, err := handles.btfHandle(spec.BTF) - btfDisabled = errors.Is(err, btf.ErrNotSupported) - if err != nil && !btfDisabled { - return nil, fmt.Errorf("load BTF: %w", err) + var targets []*btf.Spec + if opts.KernelTypes != nil { + targets = append(targets, opts.KernelTypes) + } + if kmodName != "" && opts.KernelModuleTypes != nil { + if modBTF, ok := opts.KernelModuleTypes[kmodName]; ok { + targets = append(targets, modBTF) } + } + + var b btf.Builder + if err := applyRelocations(insns, targets, kmodName, spec.ByteOrder, &b); err != nil { + return nil, fmt.Errorf("apply CO-RE relocations: %w", err) + } - if handle != nil { - attr.ProgBtfFd = uint32(handle.FD()) + errExtInfos := haveProgramExtInfos() + if !b.Empty() && errors.Is(errExtInfos, ErrNotSupported) { + // There is at least one CO-RE relocation which relies on a stable local + // type ID. + // Return ErrNotSupported instead of E2BIG if there is no BTF support. + return nil, errExtInfos + } - fib, lib, err := btf.MarshalExtInfos(insns, spec.BTF.TypeID) - if err != nil { - return nil, err - } + if errExtInfos == nil { + // Only add func and line info if the kernel supports it. This allows + // BPF compiled with modern toolchains to work on old kernels. + fib, lib, err := btf.MarshalExtInfos(insns, &b) + if err != nil { + return nil, fmt.Errorf("marshal ext_infos: %w", err) + } + + attr.FuncInfoRecSize = btf.FuncInfoSize + attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize + attr.FuncInfo = sys.NewSlicePointer(fib) - attr.FuncInfoRecSize = btf.FuncInfoSize - attr.FuncInfoCnt = uint32(len(fib)) / btf.FuncInfoSize - attr.FuncInfo = sys.NewSlicePointer(fib) + attr.LineInfoRecSize = btf.LineInfoSize + attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize + attr.LineInfo = sys.NewSlicePointer(lib) + } - attr.LineInfoRecSize = btf.LineInfoSize - attr.LineInfoCnt = uint32(len(lib)) / btf.LineInfoSize - attr.LineInfo = sys.NewSlicePointer(lib) + if !b.Empty() { + handle, err := btf.NewHandle(&b) + if err != nil { + return nil, fmt.Errorf("load BTF: %w", err) } + defer handle.Close() + + attr.ProgBtfFd = uint32(handle.FD()) + } + + kconfig, err := resolveKconfigReferences(insns) + if err != nil { + return nil, fmt.Errorf("resolve .kconfig: %w", err) } + defer kconfig.Close() if err := fixupAndValidate(insns); err != nil { return nil, err } + handles, err := fixupKfuncs(insns) + if err != nil { + return nil, fmt.Errorf("fixing up kfuncs: %w", err) + } + defer handles.Close() + + if len(handles) > 0 { + fdArray := handles.fdArray() + attr.FdArray = sys.NewPointer(unsafe.Pointer(&fdArray[0])) + } + buf := bytes.NewBuffer(make([]byte, 0, insns.Size())) - err := insns.Marshal(buf, internal.NativeEndian) + err = insns.Marshal(buf, internal.NativeEndian) if err != nil { return nil, err } @@ -261,28 +386,32 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err) } - attr.AttachBtfId = uint32(targetID) - attr.AttachProgFd = uint32(spec.AttachTarget.FD()) + attr.AttachBtfId = targetID + attr.AttachBtfObjFd = uint32(spec.AttachTarget.FD()) defer runtime.KeepAlive(spec.AttachTarget) } else if spec.AttachTo != "" { - targetID, err := findTargetInKernel(kernelTypes, spec.AttachTo, spec.Type, spec.AttachType) + module, targetID, err := findProgramTargetInKernel(spec.AttachTo, spec.Type, spec.AttachType) if err != nil && !errors.Is(err, errUnrecognizedAttachType) { // We ignore errUnrecognizedAttachType since AttachTo may be non-empty // for programs that don't attach anywhere. return nil, fmt.Errorf("attach %s/%s: %w", spec.Type, spec.AttachType, err) } - attr.AttachBtfId = uint32(targetID) + attr.AttachBtfId = targetID + if module != nil { + attr.AttachBtfObjFd = uint32(module.FD()) + defer module.Close() + } } - logSize := DefaultVerifierLogSize - if opts.LogSize > 0 { - logSize = opts.LogSize + if opts.LogSize == 0 { + opts.LogSize = DefaultVerifierLogSize } + // The caller requested a specific verifier log level. Set up the log buffer. var logBuf []byte - if opts.LogLevel > 0 { - logBuf = make([]byte, logSize) + if !opts.LogDisabled && opts.LogLevel != 0 { + logBuf = make([]byte, opts.LogSize) attr.LogLevel = opts.LogLevel attr.LogSize = uint32(len(logBuf)) attr.LogBuf = sys.NewSlicePointer(logBuf) @@ -293,15 +422,27 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil } - if opts.LogLevel == 0 && opts.LogSize >= 0 { - // Re-run with the verifier enabled to get better error messages. - logBuf = make([]byte, logSize) - attr.LogLevel = 1 + // An error occurred loading the program, but the caller did not explicitly + // enable the verifier log. Re-run with branch-level verifier logs enabled to + // obtain more info. Preserve the original error to return it to the caller. + // An undersized log buffer will result in ENOSPC regardless of the underlying + // cause. + var err2 error + if !opts.LogDisabled && opts.LogLevel == 0 { + logBuf = make([]byte, opts.LogSize) + attr.LogLevel = LogLevelBranch attr.LogSize = uint32(len(logBuf)) attr.LogBuf = sys.NewSlicePointer(logBuf) - _, _ = sys.ProgLoad(attr) + + _, err2 = sys.ProgLoad(attr) } + end := bytes.IndexByte(logBuf, 0) + if end < 0 { + end = len(logBuf) + } + + tail := logBuf[max(end-256, 0):end] switch { case errors.Is(err, unix.EPERM): if len(logBuf) > 0 && logBuf[0] == 0 { @@ -310,21 +451,36 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand return nil, fmt.Errorf("load program: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) } - fallthrough - case errors.Is(err, unix.EINVAL): - if hasFunctionReferences(spec.Instructions) { - if err := haveBPFToBPFCalls(); err != nil { - return nil, fmt.Errorf("load program: %w", err) - } + if opts.LogSize > maxVerifierLogSize { + return nil, fmt.Errorf("load program: %w (ProgramOptions.LogSize exceeds maximum value of %d)", err, maxVerifierLogSize) + } + + if bytes.Contains(tail, coreBadCall) { + err = errBadRelocation + break + } else if bytes.Contains(tail, kfuncBadCall) { + err = errUnknownKfunc + break + } + + case errors.Is(err, unix.EACCES): + if bytes.Contains(tail, coreBadLoad) { + err = errBadRelocation + break } } - err = internal.ErrorWithLog(err, logBuf) - if btfDisabled { - return nil, fmt.Errorf("load program: %w (BTF disabled)", err) + // hasFunctionReferences may be expensive, so check it last. + if (errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM)) && + hasFunctionReferences(spec.Instructions) { + if err := haveBPFToBPFCalls(); err != nil { + return nil, fmt.Errorf("load program: %w", err) + } } - return nil, fmt.Errorf("load program: %w", err) + + truncated := errors.Is(err, unix.ENOSPC) || errors.Is(err2, unix.ENOSPC) + return nil, internal.ErrorWithLog("load program", err, logBuf, truncated) } // NewProgramFromFD creates a program from a raw fd. @@ -362,7 +518,7 @@ func newProgramFromFD(fd *sys.FD) (*Program, error) { return nil, fmt.Errorf("discover program type: %w", err) } - return &Program{"", fd, "", "", info.Type}, nil + return &Program{"", fd, info.Name, "", info.Type}, nil } func (p *Program) String() string { @@ -433,7 +589,8 @@ func (p *Program) Clone() (*Program, error) { // Calling Pin on a previously pinned program will overwrite the path, except when // the new path already exists. Re-pinning across filesystems is not supported. // -// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs +// This requires bpffs to be mounted above fileName. +// See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd func (p *Program) Pin(fileName string) error { if err := internal.Pin(p.pinnedPath, fileName, p.fd); err != nil { return err @@ -474,6 +631,9 @@ func (p *Program) Close() error { // Various options for Run'ing a Program type RunOptions struct { // Program's data input. Required field. + // + // The kernel expects at least 14 bytes input for an ethernet header for + // XDP and SKB programs. Data []byte // Program's data after Program has run. Caller must allocate. Optional field. DataOut []byte @@ -481,7 +641,10 @@ type RunOptions struct { Context interface{} // Program's context after Program has run. Must be a pointer or slice. Optional field. ContextOut interface{} - // Number of times to run Program. Optional field. Defaults to 1. + // Minimum number of times to run Program. Optional field. Defaults to 1. + // + // The program may be executed more often than this due to interruptions, e.g. + // when runtime.AllThreadsSyscall is invoked. Repeat uint32 // Optional flags. Flags uint32 @@ -490,11 +653,13 @@ type RunOptions struct { CPU uint32 // Called whenever the syscall is interrupted, and should be set to testing.B.ResetTimer // or similar. Typically used during benchmarking. Optional field. + // + // Deprecated: use [testing.B.ReportMetric] with unit "ns/op" instead. Reset func() } // Test runs the Program in the kernel with the given input and returns the -// value returned by the eBPF program. outLen may be zero. +// value returned by the eBPF program. // // Note: the kernel expects at least 14 bytes input for an ethernet header for // XDP and SKB programs. @@ -517,9 +682,9 @@ func (p *Program) Test(in []byte) (uint32, []byte, error) { Repeat: 1, } - ret, _, err := p.testRun(&opts) + ret, _, err := p.run(&opts) if err != nil { - return ret, nil, fmt.Errorf("can't test program: %w", err) + return ret, nil, fmt.Errorf("test program: %w", err) } return ret, opts.DataOut, nil } @@ -528,9 +693,9 @@ func (p *Program) Test(in []byte) (uint32, []byte, error) { // // Note: the same restrictions from Test apply. func (p *Program) Run(opts *RunOptions) (uint32, error) { - ret, _, err := p.testRun(opts) + ret, _, err := p.run(opts) if err != nil { - return ret, fmt.Errorf("can't test program: %w", err) + return ret, fmt.Errorf("run program: %w", err) } return ret, nil } @@ -542,9 +707,6 @@ func (p *Program) Run(opts *RunOptions) (uint32, error) { // run or an error. reset is called whenever the benchmark syscall is // interrupted, and should be set to testing.B.ResetTimer or similar. // -// Note: profiling a call to this function will skew it's results, see -// https://github.com/cilium/ebpf/issues/24 -// // This function requires at least Linux 4.12. func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) { if uint(repeat) > math.MaxUint32 { @@ -557,14 +719,14 @@ func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.D Reset: reset, } - ret, total, err := p.testRun(&opts) + ret, total, err := p.run(&opts) if err != nil { - return ret, total, fmt.Errorf("can't benchmark program: %w", err) + return ret, total, fmt.Errorf("benchmark program: %w", err) } return ret, total, nil } -var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() error { +var haveProgRun = internal.NewFeatureTest("BPF_PROG_RUN", "4.12", func() error { prog, err := NewProgram(&ProgramSpec{ // SocketFilter does not require privileges on newer kernels. Type: SocketFilter, @@ -580,8 +742,7 @@ var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() e } defer prog.Close() - // Programs require at least 14 bytes input - in := make([]byte, 14) + in := internal.EmptyBPFContext attr := sys.ProgRunAttr{ ProgFd: uint32(prog.FD()), DataSizeIn: uint32(len(in)), @@ -599,7 +760,7 @@ var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() e // We know that PROG_TEST_RUN is supported if we get EINTR. return nil - case errors.Is(err, unix.ENOTSUPP): + case errors.Is(err, sys.ENOTSUPP): // The first PROG_TEST_RUN patches shipped in 4.12 didn't include // a test runner for SocketFilter. ENOTSUPP means PROG_TEST_RUN is // supported, but not for the program type used in the probe. @@ -609,12 +770,12 @@ var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() e return err }) -func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) { +func (p *Program) run(opts *RunOptions) (uint32, time.Duration, error) { if uint(len(opts.Data)) > math.MaxUint32 { return 0, 0, fmt.Errorf("input is too long") } - if err := haveProgTestRun(); err != nil { + if err := haveProgRun(); err != nil { return 0, 0, err } @@ -647,24 +808,41 @@ func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) { Cpu: opts.CPU, } +retry: for { err := sys.ProgRun(&attr) if err == nil { - break + break retry } if errors.Is(err, unix.EINTR) { + if attr.Repeat <= 1 { + // Older kernels check whether enough repetitions have been + // executed only after checking for pending signals. + // + // run signal? done? run ... + // + // As a result we can get EINTR for repeat==1 even though + // the program was run exactly once. Treat this as a + // successful run instead. + // + // Since commit 607b9cc92bd7 ("bpf: Consolidate shared test timing code") + // the conditions are reversed: + // run done? signal? ... + break retry + } + if opts.Reset != nil { opts.Reset() } - continue + continue retry } - if errors.Is(err, unix.ENOTSUPP) { - return 0, 0, fmt.Errorf("kernel doesn't support testing program type %s: %w", p.Type(), ErrNotSupported) + if errors.Is(err, sys.ENOTSUPP) { + return 0, 0, fmt.Errorf("kernel doesn't support running %s: %w", p.Type(), ErrNotSupported) } - return 0, 0, fmt.Errorf("can't run test: %w", err) + return 0, 0, err } if opts.DataOut != nil { @@ -687,14 +865,14 @@ func (p *Program) testRun(opts *RunOptions) (uint32, time.Duration, error) { return attr.Retval, total, nil } -func unmarshalProgram(buf []byte) (*Program, error) { - if len(buf) != 4 { - return nil, errors.New("program id requires 4 byte value") +func unmarshalProgram(buf sysenc.Buffer) (*Program, error) { + var id uint32 + if err := buf.Unmarshal(&id); err != nil { + return nil, err } // Looking up an entry in a nested map or prog array returns an id, // not an fd. - id := internal.NativeEndian.Uint32(buf) return NewProgramFromID(ProgramID(id)) } @@ -726,7 +904,14 @@ func LoadPinnedProgram(fileName string, opts *LoadPinOptions) (*Program, error) return nil, fmt.Errorf("info for %s: %w", fileName, err) } - return &Program{"", fd, filepath.Base(fileName), fileName, info.Type}, nil + var progName string + if haveObjName() == nil { + progName = info.Name + } else { + progName = filepath.Base(fileName) + } + + return &Program{"", fd, progName, fileName, info.Type}, nil } // SanitizeName replaces all invalid characters in name with replacement. @@ -770,11 +955,15 @@ var errUnrecognizedAttachType = errors.New("unrecognized attach type") // find an attach target type in the kernel. // -// spec may be nil and defaults to the canonical kernel BTF. name together with -// progType and attachType determine which type we need to attach to. +// name, progType and attachType determine which type we need to attach to. // -// Returns errUnrecognizedAttachType. -func findTargetInKernel(spec *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.TypeID, error) { +// The attach target may be in a loaded kernel module. +// In that case the returned handle will be non-nil. +// The caller is responsible for closing the handle. +// +// Returns errUnrecognizedAttachType if the combination of progType and attachType +// is not recognised. +func findProgramTargetInKernel(name string, progType ProgramType, attachType AttachType) (*btf.Handle, btf.TypeID, error) { type match struct { p ProgramType a AttachType @@ -782,59 +971,128 @@ func findTargetInKernel(spec *btf.Spec, name string, progType ProgramType, attac var ( typeName, featureName string - isBTFTypeFunc = true + target btf.Type ) switch (match{progType, attachType}) { case match{LSM, AttachLSMMac}: typeName = "bpf_lsm_" + name featureName = name + " LSM hook" + target = (*btf.Func)(nil) case match{Tracing, AttachTraceIter}: typeName = "bpf_iter_" + name featureName = name + " iterator" + target = (*btf.Func)(nil) case match{Tracing, AttachTraceFEntry}: typeName = name featureName = fmt.Sprintf("fentry %s", name) + target = (*btf.Func)(nil) case match{Tracing, AttachTraceFExit}: typeName = name featureName = fmt.Sprintf("fexit %s", name) + target = (*btf.Func)(nil) case match{Tracing, AttachModifyReturn}: typeName = name featureName = fmt.Sprintf("fmod_ret %s", name) + target = (*btf.Func)(nil) case match{Tracing, AttachTraceRawTp}: typeName = fmt.Sprintf("btf_trace_%s", name) featureName = fmt.Sprintf("raw_tp %s", name) - isBTFTypeFunc = false + target = (*btf.Typedef)(nil) default: - return 0, errUnrecognizedAttachType + return nil, 0, errUnrecognizedAttachType } - spec, err := maybeLoadKernelBTF(spec) + spec, err := btf.LoadKernelSpec() if err != nil { - return 0, fmt.Errorf("load kernel spec: %w", err) + return nil, 0, fmt.Errorf("load kernel spec: %w", err) } - var target btf.Type - if isBTFTypeFunc { - var targetFunc *btf.Func - err = spec.TypeByName(typeName, &targetFunc) - target = targetFunc - } else { - var targetTypedef *btf.Typedef - err = spec.TypeByName(typeName, &targetTypedef) - target = targetTypedef + spec, module, err := findTargetInKernel(spec, typeName, &target) + if errors.Is(err, btf.ErrNotFound) { + return nil, 0, &internal.UnsupportedFeatureError{Name: featureName} + } + // See cilium/ebpf#894. Until we can disambiguate between equally-named kernel + // symbols, we should explicitly refuse program loads. They will not reliably + // do what the caller intended. + if errors.Is(err, btf.ErrMultipleMatches) { + return nil, 0, fmt.Errorf("attaching to ambiguous kernel symbol is not supported: %w", err) + } + if err != nil { + return nil, 0, fmt.Errorf("find target for %s: %w", featureName, err) } + id, err := spec.TypeID(target) if err != nil { + module.Close() + return nil, 0, err + } + + return module, id, nil +} + +// findTargetInKernel attempts to find a named type in the current kernel. +// +// target will point at the found type after a successful call. Searches both +// vmlinux and any loaded modules. +// +// Returns a non-nil handle if the type was found in a module, or btf.ErrNotFound +// if the type wasn't found at all. +func findTargetInKernel(kernelSpec *btf.Spec, typeName string, target *btf.Type) (*btf.Spec, *btf.Handle, error) { + err := kernelSpec.TypeByName(typeName, target) + if errors.Is(err, btf.ErrNotFound) { + spec, module, err := findTargetInModule(kernelSpec, typeName, target) + if err != nil { + return nil, nil, fmt.Errorf("find target in modules: %w", err) + } + return spec, module, nil + } + if err != nil { + return nil, nil, fmt.Errorf("find target in vmlinux: %w", err) + } + return kernelSpec, nil, err +} + +// findTargetInModule attempts to find a named type in any loaded module. +// +// base must contain the kernel's types and is used to parse kmod BTF. Modules +// are searched in the order they were loaded. +// +// Returns btf.ErrNotFound if the target can't be found in any module. +func findTargetInModule(base *btf.Spec, typeName string, target *btf.Type) (*btf.Spec, *btf.Handle, error) { + it := new(btf.HandleIterator) + defer it.Handle.Close() + + for it.Next() { + info, err := it.Handle.Info() + if err != nil { + return nil, nil, fmt.Errorf("get info for BTF ID %d: %w", it.ID, err) + } + + if !info.IsModule() { + continue + } + + spec, err := it.Handle.Spec(base) + if err != nil { + return nil, nil, fmt.Errorf("parse types for module %s: %w", info.Name, err) + } + + err = spec.TypeByName(typeName, target) if errors.Is(err, btf.ErrNotFound) { - return 0, &internal.UnsupportedFeatureError{ - Name: featureName, - } + continue } - return 0, fmt.Errorf("find target for %s: %w", featureName, err) + if err != nil { + return nil, nil, fmt.Errorf("lookup type in module %s: %w", info.Name, err) + } + + return spec, it.Take(), nil + } + if err := it.Err(); err != nil { + return nil, nil, fmt.Errorf("iterate modules: %w", err) } - return spec.TypeID(target) + return nil, nil, btf.ErrNotFound } // find an attach target type in a program. @@ -848,7 +1106,9 @@ func findTargetInProgram(prog *Program, name string, progType ProgramType, attac var typeName string switch (match{progType, attachType}) { - case match{Extension, AttachNone}: + case match{Extension, AttachNone}, + match{Tracing, AttachTraceFEntry}, + match{Tracing, AttachTraceFExit}: typeName = name default: return 0, errUnrecognizedAttachType diff --git a/vendor/github.com/cilium/ebpf/rlimit/rlimit.go b/vendor/github.com/cilium/ebpf/rlimit/rlimit.go new file mode 100644 index 0000000000..2a6973744f --- /dev/null +++ b/vendor/github.com/cilium/ebpf/rlimit/rlimit.go @@ -0,0 +1,123 @@ +// Package rlimit allows raising RLIMIT_MEMLOCK if necessary for the use of BPF. +package rlimit + +import ( + "errors" + "fmt" + "sync" + + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/unix" +) + +var ( + unsupportedMemcgAccounting = &internal.UnsupportedFeatureError{ + MinimumVersion: internal.Version{5, 11, 0}, + Name: "memcg-based accounting for BPF memory", + } + haveMemcgAccounting error + + rlimitMu sync.Mutex +) + +func init() { + // We have to run this feature test at init, since it relies on changing + // RLIMIT_MEMLOCK. Doing so is not safe in a concurrent program. Instead, + // we rely on the initialization order guaranteed by the Go runtime to + // execute the test in a safe environment: + // + // the invocation of init functions happens in a single goroutine, + // sequentially, one package at a time. + // + // This is also the reason why RemoveMemlock is in its own package: + // we only want to run the initializer if RemoveMemlock is called + // from somewhere. + haveMemcgAccounting = detectMemcgAccounting() +} + +func detectMemcgAccounting() error { + // Retrieve the original limit to prevent lowering Max, since + // doing so is a permanent operation when running unprivileged. + var oldLimit unix.Rlimit + if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, nil, &oldLimit); err != nil { + return fmt.Errorf("getting original memlock rlimit: %s", err) + } + + // Drop the current limit to zero, maintaining the old Max value. + // This is always permitted by the kernel for unprivileged users. + // Retrieve a new copy of the old limit tuple to minimize the chances + // of failing the restore operation below. + zeroLimit := unix.Rlimit{Cur: 0, Max: oldLimit.Max} + if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &zeroLimit, &oldLimit); err != nil { + return fmt.Errorf("lowering memlock rlimit: %s", err) + } + + attr := sys.MapCreateAttr{ + MapType: 2, /* Array */ + KeySize: 4, + ValueSize: 4, + MaxEntries: 1, + } + + // Creating a map allocates shared (and locked) memory that counts against + // the rlimit on pre-5.11 kernels, but against the memory cgroup budget on + // kernels 5.11 and over. If this call succeeds with the process' memlock + // rlimit set to 0, we can reasonably assume memcg accounting is supported. + fd, mapErr := sys.MapCreate(&attr) + + // Restore old limits regardless of what happened. + if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &oldLimit, nil); err != nil { + return fmt.Errorf("restoring old memlock rlimit: %s", err) + } + + // Map creation successful, memcg accounting supported. + if mapErr == nil { + fd.Close() + return nil + } + + // EPERM shows up when map creation would exceed the memory budget. + if errors.Is(mapErr, unix.EPERM) { + return unsupportedMemcgAccounting + } + + // This shouldn't happen really. + return fmt.Errorf("unexpected error detecting memory cgroup accounting: %s", mapErr) +} + +// RemoveMemlock removes the limit on the amount of memory the current +// process can lock into RAM, if necessary. +// +// This is not required to load eBPF resources on kernel versions 5.11+ +// due to the introduction of cgroup-based memory accounting. On such kernels +// the function is a no-op. +// +// Since the function may change global per-process limits it should be invoked +// at program start up, in main() or init(). +// +// This function exists as a convenience and should only be used when +// permanently raising RLIMIT_MEMLOCK to infinite is appropriate. Consider +// invoking prlimit(2) directly with a more reasonable limit if desired. +// +// Requires CAP_SYS_RESOURCE on kernels < 5.11. +func RemoveMemlock() error { + if haveMemcgAccounting == nil { + return nil + } + + if !errors.Is(haveMemcgAccounting, unsupportedMemcgAccounting) { + return haveMemcgAccounting + } + + rlimitMu.Lock() + defer rlimitMu.Unlock() + + // pid 0 affects the current process. Requires CAP_SYS_RESOURCE. + newLimit := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY} + if err := unix.Prlimit(0, unix.RLIMIT_MEMLOCK, &newLimit, nil); err != nil { + return fmt.Errorf("failed to set memlock rlimit: %w", err) + } + + return nil +} diff --git a/vendor/github.com/cilium/ebpf/run-tests.sh b/vendor/github.com/cilium/ebpf/run-tests.sh index c21cca9e5e..c7ff7ea333 100644 --- a/vendor/github.com/cilium/ebpf/run-tests.sh +++ b/vendor/github.com/cilium/ebpf/run-tests.sh @@ -6,12 +6,31 @@ # $ ./run-tests.sh 5.4 # Run a subset of tests: # $ ./run-tests.sh 5.4 ./link +# Run using a local kernel image +# $ ./run-tests.sh /path/to/bzImage set -euo pipefail script="$(realpath "$0")" readonly script +source "$(dirname "$script")/testdata/sh/lib.sh" + +quote_env() { + for var in "$@"; do + if [ -v "$var" ]; then + printf "%s=%q " "$var" "${!var}" + fi + done +} + +declare -a preserved_env=( + PATH + CI_MAX_KERNEL_VERSION + TEST_SEED + KERNEL_VERSION +) + # This script is a bit like a Matryoshka doll since it keeps re-executing itself # in various different contexts: # @@ -48,43 +67,34 @@ if [[ "${1:-}" = "--exec-vm" ]]; then rm "${output}/fake-stdin" fi - for ((i = 0; i < 3; i++)); do - if ! $sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \ - --rwdir="${testdir}=${testdir}" \ - --rodir=/run/input="${input}" \ - --rwdir=/run/output="${output}" \ - --script-sh "PATH=\"$PATH\" CI_MAX_KERNEL_VERSION="${CI_MAX_KERNEL_VERSION:-}" \"$script\" --exec-test $cmd" \ - --kopt possible_cpus=2; then # need at least two CPUs for some tests - exit 23 - fi - - if [[ -e "${output}/status" ]]; then - break - fi - - if [[ -v CI ]]; then - echo "Retrying test run due to qemu crash" - continue - fi + if ! $sudo virtme-run --kimg "${input}/boot/vmlinuz" --cpus 2 --memory 1G --pwd \ + --rwdir="${testdir}=${testdir}" \ + --rodir=/run/input="${input}" \ + --rwdir=/run/output="${output}" \ + --script-sh "$(quote_env "${preserved_env[@]}") \"$script\" \ + --exec-test $cmd"; then + exit 23 + fi + if ! [[ -e "${output}/status" ]]; then exit 42 - done + fi rc=$(<"${output}/status") $sudo rm -r "$output" - exit $rc + exit "$rc" elif [[ "${1:-}" = "--exec-test" ]]; then shift mount -t bpf bpf /sys/fs/bpf mount -t tracefs tracefs /sys/kernel/debug/tracing - if [[ -d "/run/input/bpf" ]]; then - export KERNEL_SELFTESTS="/run/input/bpf" + if [[ -d "/run/input/usr/src/linux/tools/testing/selftests/bpf" ]]; then + export KERNEL_SELFTESTS="/run/input/usr/src/linux/tools/testing/selftests/bpf" fi - if [[ -f "/run/input/bpf/bpf_testmod/bpf_testmod.ko" ]]; then - insmod "/run/input/bpf/bpf_testmod/bpf_testmod.ko" + if [[ -d "/run/input/lib/modules" ]]; then + find /run/input/lib/modules -type f -name bpf_testmod.ko -exec insmod {} \; fi dmesg --clear @@ -95,38 +105,29 @@ elif [[ "${1:-}" = "--exec-test" ]]; then exit $rc # this return code is "swallowed" by qemu fi -readonly kernel_version="${1:-}" -if [[ -z "${kernel_version}" ]]; then - echo "Expecting kernel version as first argument" +if [[ -z "${1:-}" ]]; then + echo "Expecting kernel version or path as first argument" exit 1 fi -shift - -readonly kernel="linux-${kernel_version}.bz" -readonly selftests="linux-${kernel_version}-selftests-bpf.tgz" -readonly input="$(mktemp -d)" -readonly tmp_dir="${TMPDIR:-/tmp}" -readonly branch="${BRANCH:-master}" - -fetch() { - echo Fetching "${1}" - pushd "${tmp_dir}" > /dev/null - curl -s -L -O --fail --etag-compare "${1}.etag" --etag-save "${1}.etag" "https://github.com/cilium/ci-kernels/raw/${branch}/${1}" - local ret=$? - popd > /dev/null - return $ret -} -fetch "${kernel}" -cp "${tmp_dir}/${kernel}" "${input}/bzImage" +input="$(mktemp -d)" +readonly input -if fetch "${selftests}"; then - echo "Decompressing selftests" - mkdir "${input}/bpf" - tar --strip-components=4 -xf "${tmp_dir}/${selftests}" -C "${input}/bpf" +if [[ -f "${1}" ]]; then + # First argument is a local file. + readonly kernel="${1}" + cp "${1}" "${input}/boot/vmlinuz" else - echo "No selftests found, disabling" + readonly kernel="${1}" + + # LINUX_VERSION_CODE test compares this to discovered value. + export KERNEL_VERSION="${1}" + + if ! extract_oci_image "ghcr.io/cilium/ci-kernels:${kernel}-selftests" "${input}"; then + extract_oci_image "ghcr.io/cilium/ci-kernels:${kernel}" "${input}" + fi fi +shift args=(-short -coverpkg=./... -coverprofile=coverage.out -count 1 ./...) if (( $# > 0 )); then @@ -135,11 +136,9 @@ fi export GOFLAGS=-mod=readonly export CGO_ENABLED=0 -# LINUX_VERSION_CODE test compares this to discovered value. -export KERNEL_VERSION="${kernel_version}" -echo Testing on "${kernel_version}" +echo Testing on "${kernel}" go test -exec "$script --exec-vm $input" "${args[@]}" -echo "Test successful on ${kernel_version}" +echo "Test successful on ${kernel}" rm -r "${input}" diff --git a/vendor/github.com/cilium/ebpf/syscalls.go b/vendor/github.com/cilium/ebpf/syscalls.go index e5c270a558..4aef7faebc 100644 --- a/vendor/github.com/cilium/ebpf/syscalls.go +++ b/vendor/github.com/cilium/ebpf/syscalls.go @@ -4,13 +4,26 @@ import ( "bytes" "errors" "fmt" + "math" + "os" + "runtime" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/sys" + "github.com/cilium/ebpf/internal/tracefs" "github.com/cilium/ebpf/internal/unix" ) +var ( + // pre-allocating these here since they may + // get called in hot code paths and cause + // unnecessary memory allocations + sysErrKeyNotExist = sys.Error(ErrKeyNotExist, unix.ENOENT) + sysErrKeyExist = sys.Error(ErrKeyExist, unix.EEXIST) + sysErrNotSupported = sys.Error(ErrNotSupported, sys.ENOTSUPP) +) + // invalidBPFObjNameChar returns true if char may not appear in // a BPF object name. func invalidBPFObjNameChar(char rune) bool { @@ -47,7 +60,7 @@ func progLoad(insns asm.Instructions, typ ProgramType, license string) (*sys.FD, }) } -var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error { +var haveNestedMaps = internal.NewFeatureTest("nested maps", "4.12", func() error { _, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(ArrayOfMaps), KeySize: 4, @@ -65,7 +78,7 @@ var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error { return err }) -var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps", "5.2", func() error { +var haveMapMutabilityModifiers = internal.NewFeatureTest("read- and write-only maps", "5.2", func() error { // This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since // BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check. m, err := sys.MapCreate(&sys.MapCreateAttr{ @@ -82,7 +95,7 @@ var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps return nil }) -var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error { +var haveMmapableMaps = internal.NewFeatureTest("mmapable maps", "5.5", func() error { // This checks BPF_F_MMAPABLE, which appeared in 5.5 for array maps. m, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(Array), @@ -98,7 +111,7 @@ var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error return nil }) -var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error { +var haveInnerMaps = internal.NewFeatureTest("inner maps", "5.10", func() error { // This checks BPF_F_INNER_MAP, which appeared in 5.10. m, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(Array), @@ -107,6 +120,7 @@ var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error { MaxEntries: 1, MapFlags: unix.BPF_F_INNER_MAP, }) + if err != nil { return internal.ErrNotSupported } @@ -114,7 +128,7 @@ var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error { return nil }) -var haveNoPreallocMaps = internal.FeatureTest("prealloc maps", "4.6", func() error { +var haveNoPreallocMaps = internal.NewFeatureTest("prealloc maps", "4.6", func() error { // This checks BPF_F_NO_PREALLOC, which appeared in 4.6. m, err := sys.MapCreate(&sys.MapCreateAttr{ MapType: sys.MapType(Hash), @@ -123,6 +137,7 @@ var haveNoPreallocMaps = internal.FeatureTest("prealloc maps", "4.6", func() err MaxEntries: 1, MapFlags: unix.BPF_F_NO_PREALLOC, }) + if err != nil { return internal.ErrNotSupported } @@ -136,15 +151,15 @@ func wrapMapError(err error) error { } if errors.Is(err, unix.ENOENT) { - return sys.Error(ErrKeyNotExist, unix.ENOENT) + return sysErrKeyNotExist } if errors.Is(err, unix.EEXIST) { - return sys.Error(ErrKeyExist, unix.EEXIST) + return sysErrKeyExist } - if errors.Is(err, unix.ENOTSUPP) { - return sys.Error(ErrNotSupported, unix.ENOTSUPP) + if errors.Is(err, sys.ENOTSUPP) { + return sysErrNotSupported } if errors.Is(err, unix.E2BIG) { @@ -154,7 +169,7 @@ func wrapMapError(err error) error { return err } -var haveObjName = internal.FeatureTest("object names", "4.15", func() error { +var haveObjName = internal.NewFeatureTest("object names", "4.15", func() error { attr := sys.MapCreateAttr{ MapType: sys.MapType(Array), KeySize: 4, @@ -172,7 +187,7 @@ var haveObjName = internal.FeatureTest("object names", "4.15", func() error { return nil }) -var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func() error { +var objNameAllowsDot = internal.NewFeatureTest("dot in object names", "5.2", func() error { if err := haveObjName(); err != nil { return err } @@ -194,7 +209,7 @@ var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func() return nil }) -var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error { +var haveBatchAPI = internal.NewFeatureTest("map batch api", "5.6", func() error { var maxEntries uint32 = 2 attr := sys.MapCreateAttr{ MapType: sys.MapType(Hash), @@ -211,8 +226,8 @@ var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error { keys := []uint32{1, 2} values := []uint32{3, 4} - kp, _ := marshalPtr(keys, 8) - vp, _ := marshalPtr(values, 8) + kp, _ := marshalMapSyscallInput(keys, 8) + vp, _ := marshalMapSyscallInput(values, 8) err = sys.MapUpdateBatch(&sys.MapUpdateBatchAttr{ MapFd: fd.Uint(), @@ -226,7 +241,7 @@ var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error { return nil }) -var haveProbeReadKernel = internal.FeatureTest("bpf_probe_read_kernel", "5.5", func() error { +var haveProbeReadKernel = internal.NewFeatureTest("bpf_probe_read_kernel", "5.5", func() error { insns := asm.Instructions{ asm.Mov.Reg(asm.R1, asm.R10), asm.Add.Imm(asm.R1, -8), @@ -244,7 +259,7 @@ var haveProbeReadKernel = internal.FeatureTest("bpf_probe_read_kernel", "5.5", f return nil }) -var haveBPFToBPFCalls = internal.FeatureTest("bpf2bpf calls", "4.16", func() error { +var haveBPFToBPFCalls = internal.NewFeatureTest("bpf2bpf calls", "4.16", func() error { insns := asm.Instructions{ asm.Call.Label("prog2").WithSymbol("prog1"), asm.Return(), @@ -253,12 +268,70 @@ var haveBPFToBPFCalls = internal.FeatureTest("bpf2bpf calls", "4.16", func() err } fd, err := progLoad(insns, SocketFilter, "MIT") - if errors.Is(err, unix.EINVAL) { + if err != nil { return internal.ErrNotSupported } + _ = fd.Close() + return nil +}) + +var haveSyscallWrapper = internal.NewFeatureTest("syscall wrapper", "4.17", func() error { + prefix := internal.PlatformPrefix() + if prefix == "" { + return fmt.Errorf("unable to find the platform prefix for (%s)", runtime.GOARCH) + } + + args := tracefs.ProbeArgs{ + Type: tracefs.Kprobe, + Symbol: prefix + "sys_bpf", + Pid: -1, + } + + var err error + args.Group, err = tracefs.RandomGroup("ebpf_probe") if err != nil { return err } - _ = fd.Close() - return nil + + evt, err := tracefs.NewEvent(args) + if errors.Is(err, os.ErrNotExist) { + return internal.ErrNotSupported + } + if err != nil { + return err + } + + return evt.Close() +}) + +var haveProgramExtInfos = internal.NewFeatureTest("program ext_infos", "5.0", func() error { + insns := asm.Instructions{ + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + } + + buf := bytes.NewBuffer(make([]byte, 0, insns.Size())) + if err := insns.Marshal(buf, internal.NativeEndian); err != nil { + return err + } + bytecode := buf.Bytes() + + _, err := sys.ProgLoad(&sys.ProgLoadAttr{ + ProgType: sys.ProgType(SocketFilter), + License: sys.NewStringPointer("MIT"), + Insns: sys.NewSlicePointer(bytecode), + InsnCnt: uint32(len(bytecode) / asm.InstructionSize), + FuncInfoCnt: 1, + ProgBtfFd: math.MaxUint32, + }) + + if errors.Is(err, unix.EBADF) { + return nil + } + + if errors.Is(err, unix.E2BIG) { + return ErrNotSupported + } + + return err }) diff --git a/vendor/github.com/cilium/ebpf/types.go b/vendor/github.com/cilium/ebpf/types.go index a27b442474..542c2397ca 100644 --- a/vendor/github.com/cilium/ebpf/types.go +++ b/vendor/github.com/cilium/ebpf/types.go @@ -1,20 +1,16 @@ package ebpf import ( + "github.com/cilium/ebpf/internal/sys" "github.com/cilium/ebpf/internal/unix" ) -//go:generate stringer -output types_string.go -type=MapType,ProgramType,PinType +//go:generate go run golang.org/x/tools/cmd/stringer@latest -output types_string.go -type=MapType,ProgramType,PinType // MapType indicates the type map structure // that will be initialized in the kernel. type MapType uint32 -// Max returns the latest supported MapType. -func (MapType) Max() MapType { - return maxMapType - 1 -} - // All the various map types that can be created const ( UnspecifiedMap MapType = iota @@ -48,7 +44,7 @@ const ( // if an skb is from a socket belonging to a specific cgroup CGroupArray // LRUHash - This allows you to create a small hash structure that will purge the - // least recently used items rather than thow an error when you run out of memory + // least recently used items rather than throw an error when you run out of memory LRUHash // LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs, // it has more to do with including the CPU id with the LRU calculation so that if a @@ -99,8 +95,6 @@ const ( InodeStorage // TaskStorage - Specialized local storage map for task_struct. TaskStorage - // maxMapType - Bound enum of MapTypes, has to be last in enum. - maxMapType ) // hasPerCPUValue returns true if the Map stores a value per CPU. @@ -108,6 +102,12 @@ func (mt MapType) hasPerCPUValue() bool { return mt == PerCPUHash || mt == PerCPUArray || mt == LRUCPUHash || mt == PerCPUCGroupStorage } +// canStoreMapOrProgram returns true if the Map stores references to another Map +// or Program. +func (mt MapType) canStoreMapOrProgram() bool { + return mt.canStoreMap() || mt.canStoreProgram() +} + // canStoreMap returns true if the map type accepts a map fd // for update and returns a map id for lookup. func (mt MapType) canStoreMap() bool { @@ -120,60 +120,44 @@ func (mt MapType) canStoreProgram() bool { return mt == ProgramArray } -// hasBTF returns true if the map type supports BTF key/value metadata. -func (mt MapType) hasBTF() bool { - switch mt { - case PerfEventArray, CGroupArray, StackTrace, ArrayOfMaps, HashOfMaps, DevMap, - DevMapHash, CPUMap, XSKMap, SockMap, SockHash, Queue, Stack, RingBuf: - return false - default: - return true - } -} - // ProgramType of the eBPF program type ProgramType uint32 -// Max return the latest supported ProgramType. -func (ProgramType) Max() ProgramType { - return maxProgramType - 1 -} - // eBPF program types const ( - UnspecifiedProgram ProgramType = iota - SocketFilter - Kprobe - SchedCLS - SchedACT - TracePoint - XDP - PerfEvent - CGroupSKB - CGroupSock - LWTIn - LWTOut - LWTXmit - SockOps - SkSKB - CGroupDevice - SkMsg - RawTracepoint - CGroupSockAddr - LWTSeg6Local - LircMode2 - SkReuseport - FlowDissector - CGroupSysctl - RawTracepointWritable - CGroupSockopt - Tracing - StructOps - Extension - LSM - SkLookup - Syscall - maxProgramType + UnspecifiedProgram = ProgramType(sys.BPF_PROG_TYPE_UNSPEC) + SocketFilter = ProgramType(sys.BPF_PROG_TYPE_SOCKET_FILTER) + Kprobe = ProgramType(sys.BPF_PROG_TYPE_KPROBE) + SchedCLS = ProgramType(sys.BPF_PROG_TYPE_SCHED_CLS) + SchedACT = ProgramType(sys.BPF_PROG_TYPE_SCHED_ACT) + TracePoint = ProgramType(sys.BPF_PROG_TYPE_TRACEPOINT) + XDP = ProgramType(sys.BPF_PROG_TYPE_XDP) + PerfEvent = ProgramType(sys.BPF_PROG_TYPE_PERF_EVENT) + CGroupSKB = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SKB) + CGroupSock = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SOCK) + LWTIn = ProgramType(sys.BPF_PROG_TYPE_LWT_IN) + LWTOut = ProgramType(sys.BPF_PROG_TYPE_LWT_OUT) + LWTXmit = ProgramType(sys.BPF_PROG_TYPE_LWT_XMIT) + SockOps = ProgramType(sys.BPF_PROG_TYPE_SOCK_OPS) + SkSKB = ProgramType(sys.BPF_PROG_TYPE_SK_SKB) + CGroupDevice = ProgramType(sys.BPF_PROG_TYPE_CGROUP_DEVICE) + SkMsg = ProgramType(sys.BPF_PROG_TYPE_SK_MSG) + RawTracepoint = ProgramType(sys.BPF_PROG_TYPE_RAW_TRACEPOINT) + CGroupSockAddr = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SOCK_ADDR) + LWTSeg6Local = ProgramType(sys.BPF_PROG_TYPE_LWT_SEG6LOCAL) + LircMode2 = ProgramType(sys.BPF_PROG_TYPE_LIRC_MODE2) + SkReuseport = ProgramType(sys.BPF_PROG_TYPE_SK_REUSEPORT) + FlowDissector = ProgramType(sys.BPF_PROG_TYPE_FLOW_DISSECTOR) + CGroupSysctl = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SYSCTL) + RawTracepointWritable = ProgramType(sys.BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) + CGroupSockopt = ProgramType(sys.BPF_PROG_TYPE_CGROUP_SOCKOPT) + Tracing = ProgramType(sys.BPF_PROG_TYPE_TRACING) + StructOps = ProgramType(sys.BPF_PROG_TYPE_STRUCT_OPS) + Extension = ProgramType(sys.BPF_PROG_TYPE_EXT) + LSM = ProgramType(sys.BPF_PROG_TYPE_LSM) + SkLookup = ProgramType(sys.BPF_PROG_TYPE_SK_LOOKUP) + Syscall = ProgramType(sys.BPF_PROG_TYPE_SYSCALL) + Netfilter = ProgramType(sys.BPF_PROG_TYPE_NETFILTER) ) // AttachType of the eBPF program, needed to differentiate allowed context accesses in @@ -181,61 +165,75 @@ const ( // Will cause invalid argument (EINVAL) at program load time if set incorrectly. type AttachType uint32 -//go:generate stringer -type AttachType -trimprefix Attach +//go:generate go run golang.org/x/tools/cmd/stringer@latest -type AttachType -trimprefix Attach // AttachNone is an alias for AttachCGroupInetIngress for readability reasons. const AttachNone AttachType = 0 const ( - AttachCGroupInetIngress AttachType = iota - AttachCGroupInetEgress - AttachCGroupInetSockCreate - AttachCGroupSockOps - AttachSkSKBStreamParser - AttachSkSKBStreamVerdict - AttachCGroupDevice - AttachSkMsgVerdict - AttachCGroupInet4Bind - AttachCGroupInet6Bind - AttachCGroupInet4Connect - AttachCGroupInet6Connect - AttachCGroupInet4PostBind - AttachCGroupInet6PostBind - AttachCGroupUDP4Sendmsg - AttachCGroupUDP6Sendmsg - AttachLircMode2 - AttachFlowDissector - AttachCGroupSysctl - AttachCGroupUDP4Recvmsg - AttachCGroupUDP6Recvmsg - AttachCGroupGetsockopt - AttachCGroupSetsockopt - AttachTraceRawTp - AttachTraceFEntry - AttachTraceFExit - AttachModifyReturn - AttachLSMMac - AttachTraceIter - AttachCgroupInet4GetPeername - AttachCgroupInet6GetPeername - AttachCgroupInet4GetSockname - AttachCgroupInet6GetSockname - AttachXDPDevMap - AttachCgroupInetSockRelease - AttachXDPCPUMap - AttachSkLookup - AttachXDP - AttachSkSKBVerdict - AttachSkReuseportSelect - AttachSkReuseportSelectOrMigrate - AttachPerfEvent + AttachCGroupInetIngress = AttachType(sys.BPF_CGROUP_INET_INGRESS) + AttachCGroupInetEgress = AttachType(sys.BPF_CGROUP_INET_EGRESS) + AttachCGroupInetSockCreate = AttachType(sys.BPF_CGROUP_INET_SOCK_CREATE) + AttachCGroupSockOps = AttachType(sys.BPF_CGROUP_SOCK_OPS) + AttachSkSKBStreamParser = AttachType(sys.BPF_SK_SKB_STREAM_PARSER) + AttachSkSKBStreamVerdict = AttachType(sys.BPF_SK_SKB_STREAM_VERDICT) + AttachCGroupDevice = AttachType(sys.BPF_CGROUP_DEVICE) + AttachSkMsgVerdict = AttachType(sys.BPF_SK_MSG_VERDICT) + AttachCGroupInet4Bind = AttachType(sys.BPF_CGROUP_INET4_BIND) + AttachCGroupInet6Bind = AttachType(sys.BPF_CGROUP_INET6_BIND) + AttachCGroupInet4Connect = AttachType(sys.BPF_CGROUP_INET4_CONNECT) + AttachCGroupInet6Connect = AttachType(sys.BPF_CGROUP_INET6_CONNECT) + AttachCGroupInet4PostBind = AttachType(sys.BPF_CGROUP_INET4_POST_BIND) + AttachCGroupInet6PostBind = AttachType(sys.BPF_CGROUP_INET6_POST_BIND) + AttachCGroupUDP4Sendmsg = AttachType(sys.BPF_CGROUP_UDP4_SENDMSG) + AttachCGroupUDP6Sendmsg = AttachType(sys.BPF_CGROUP_UDP6_SENDMSG) + AttachLircMode2 = AttachType(sys.BPF_LIRC_MODE2) + AttachFlowDissector = AttachType(sys.BPF_FLOW_DISSECTOR) + AttachCGroupSysctl = AttachType(sys.BPF_CGROUP_SYSCTL) + AttachCGroupUDP4Recvmsg = AttachType(sys.BPF_CGROUP_UDP4_RECVMSG) + AttachCGroupUDP6Recvmsg = AttachType(sys.BPF_CGROUP_UDP6_RECVMSG) + AttachCGroupGetsockopt = AttachType(sys.BPF_CGROUP_GETSOCKOPT) + AttachCGroupSetsockopt = AttachType(sys.BPF_CGROUP_SETSOCKOPT) + AttachTraceRawTp = AttachType(sys.BPF_TRACE_RAW_TP) + AttachTraceFEntry = AttachType(sys.BPF_TRACE_FENTRY) + AttachTraceFExit = AttachType(sys.BPF_TRACE_FEXIT) + AttachModifyReturn = AttachType(sys.BPF_MODIFY_RETURN) + AttachLSMMac = AttachType(sys.BPF_LSM_MAC) + AttachTraceIter = AttachType(sys.BPF_TRACE_ITER) + AttachCgroupInet4GetPeername = AttachType(sys.BPF_CGROUP_INET4_GETPEERNAME) + AttachCgroupInet6GetPeername = AttachType(sys.BPF_CGROUP_INET6_GETPEERNAME) + AttachCgroupInet4GetSockname = AttachType(sys.BPF_CGROUP_INET4_GETSOCKNAME) + AttachCgroupInet6GetSockname = AttachType(sys.BPF_CGROUP_INET6_GETSOCKNAME) + AttachXDPDevMap = AttachType(sys.BPF_XDP_DEVMAP) + AttachCgroupInetSockRelease = AttachType(sys.BPF_CGROUP_INET_SOCK_RELEASE) + AttachXDPCPUMap = AttachType(sys.BPF_XDP_CPUMAP) + AttachSkLookup = AttachType(sys.BPF_SK_LOOKUP) + AttachXDP = AttachType(sys.BPF_XDP) + AttachSkSKBVerdict = AttachType(sys.BPF_SK_SKB_VERDICT) + AttachSkReuseportSelect = AttachType(sys.BPF_SK_REUSEPORT_SELECT) + AttachSkReuseportSelectOrMigrate = AttachType(sys.BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) + AttachPerfEvent = AttachType(sys.BPF_PERF_EVENT) + AttachTraceKprobeMulti = AttachType(sys.BPF_TRACE_KPROBE_MULTI) + AttachLSMCgroup = AttachType(sys.BPF_LSM_CGROUP) + AttachStructOps = AttachType(sys.BPF_STRUCT_OPS) + AttachNetfilter = AttachType(sys.BPF_NETFILTER) + AttachTCXIngress = AttachType(sys.BPF_TCX_INGRESS) + AttachTCXEgress = AttachType(sys.BPF_TCX_EGRESS) + AttachTraceUprobeMulti = AttachType(sys.BPF_TRACE_UPROBE_MULTI) + AttachCgroupUnixConnect = AttachType(sys.BPF_CGROUP_UNIX_CONNECT) + AttachCgroupUnixSendmsg = AttachType(sys.BPF_CGROUP_UNIX_SENDMSG) + AttachCgroupUnixRecvmsg = AttachType(sys.BPF_CGROUP_UNIX_RECVMSG) + AttachCgroupUnixGetpeername = AttachType(sys.BPF_CGROUP_UNIX_GETPEERNAME) + AttachCgroupUnixGetsockname = AttachType(sys.BPF_CGROUP_UNIX_GETSOCKNAME) + AttachNetkitPrimary = AttachType(sys.BPF_NETKIT_PRIMARY) + AttachNetkitPeer = AttachType(sys.BPF_NETKIT_PEER) ) // AttachFlags of the eBPF program used in BPF_PROG_ATTACH command type AttachFlags uint32 // PinType determines whether a map is pinned into a BPFFS. -type PinType int +type PinType uint32 // Valid pin types. // @@ -282,3 +280,20 @@ type BatchOptions struct { ElemFlags uint64 Flags uint64 } + +// LogLevel controls the verbosity of the kernel's eBPF program verifier. +// These constants can be used for the ProgramOptions.LogLevel field. +type LogLevel = sys.LogLevel + +const ( + // Print verifier state at branch points. + LogLevelBranch = sys.BPF_LOG_LEVEL1 + + // Print verifier state for every instruction. + // Available since Linux v5.2. + LogLevelInstruction = sys.BPF_LOG_LEVEL2 + + // Print verifier errors and stats at the end of the verification process. + // Available since Linux v5.2. + LogLevelStats = sys.BPF_LOG_STATS +) diff --git a/vendor/github.com/cilium/ebpf/types_string.go b/vendor/github.com/cilium/ebpf/types_string.go index e80b948b09..ee60b5be5b 100644 --- a/vendor/github.com/cilium/ebpf/types_string.go +++ b/vendor/github.com/cilium/ebpf/types_string.go @@ -38,12 +38,11 @@ func _() { _ = x[RingBuf-27] _ = x[InodeStorage-28] _ = x[TaskStorage-29] - _ = x[maxMapType-30] } -const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHashStructOpsMapRingBufInodeStorageTaskStoragemaxMapType" +const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHashStructOpsMapRingBufInodeStorageTaskStorage" -var _MapType_index = [...]uint16{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248, 260, 267, 279, 290, 300} +var _MapType_index = [...]uint16{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248, 260, 267, 279, 290} func (i MapType) String() string { if i >= MapType(len(_MapType_index)-1) { @@ -87,12 +86,12 @@ func _() { _ = x[LSM-29] _ = x[SkLookup-30] _ = x[Syscall-31] - _ = x[maxProgramType-32] + _ = x[Netfilter-32] } -const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupSyscallmaxProgramType" +const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookupSyscallNetfilter" -var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 301, 315} +var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294, 301, 310} func (i ProgramType) String() string { if i >= ProgramType(len(_ProgramType_index)-1) { @@ -113,7 +112,7 @@ const _PinType_name = "PinNonePinByName" var _PinType_index = [...]uint8{0, 7, 16} func (i PinType) String() string { - if i < 0 || i >= PinType(len(_PinType_index)-1) { + if i >= PinType(len(_PinType_index)-1) { return "PinType(" + strconv.FormatInt(int64(i), 10) + ")" } return _PinType_name[_PinType_index[i]:_PinType_index[i+1]] diff --git a/vendor/github.com/godbus/dbus/v5/auth.go b/vendor/github.com/godbus/dbus/v5/auth.go index a59b4c0eb7..0f3b252c07 100644 --- a/vendor/github.com/godbus/dbus/v5/auth.go +++ b/vendor/github.com/godbus/dbus/v5/auth.go @@ -176,9 +176,10 @@ func (conn *Conn) tryAuth(m Auth, state authState, in *bufio.Reader) (error, boo return err, false } state = waitingForReject + } else { + conn.uuid = string(s[1]) + return nil, true } - conn.uuid = string(s[1]) - return nil, true case state == waitingForData: err = authWriteLine(conn.transport, []byte("ERROR")) if err != nil { @@ -191,9 +192,10 @@ func (conn *Conn) tryAuth(m Auth, state authState, in *bufio.Reader) (error, boo return err, false } state = waitingForReject + } else { + conn.uuid = string(s[1]) + return nil, true } - conn.uuid = string(s[1]) - return nil, true case state == waitingForOk && string(s[0]) == "DATA": err = authWriteLine(conn.transport, []byte("DATA")) if err != nil { diff --git a/vendor/github.com/godbus/dbus/v5/conn.go b/vendor/github.com/godbus/dbus/v5/conn.go index 76fc5cde3d..69978ea26a 100644 --- a/vendor/github.com/godbus/dbus/v5/conn.go +++ b/vendor/github.com/godbus/dbus/v5/conn.go @@ -169,7 +169,7 @@ func Connect(address string, opts ...ConnOption) (*Conn, error) { // SystemBusPrivate returns a new private connection to the system bus. // Note: this connection is not ready to use. One must perform Auth and Hello -// on the connection before it is useable. +// on the connection before it is usable. func SystemBusPrivate(opts ...ConnOption) (*Conn, error) { return Dial(getSystemBusPlatformAddress(), opts...) } @@ -284,10 +284,6 @@ func newConn(tr transport, opts ...ConnOption) (*Conn, error) { conn.ctx = context.Background() } conn.ctx, conn.cancelCtx = context.WithCancel(conn.ctx) - go func() { - <-conn.ctx.Done() - conn.Close() - }() conn.calls = newCallTracker() if conn.handler == nil { @@ -302,6 +298,11 @@ func newConn(tr transport, opts ...ConnOption) (*Conn, error) { conn.outHandler = &outputHandler{conn: conn} conn.names = newNameTracker() conn.busObj = conn.Object("org.freedesktop.DBus", "/org/freedesktop/DBus") + + go func() { + <-conn.ctx.Done() + conn.Close() + }() return conn, nil } @@ -550,6 +551,11 @@ func (conn *Conn) send(ctx context.Context, msg *Message, ch chan *Call) *Call { call.ctx = ctx call.ctxCanceler = canceler conn.calls.track(msg.serial, call) + if ctx.Err() != nil { + // short path: don't even send the message if context already cancelled + conn.calls.handleSendError(msg, ctx.Err()) + return call + } go func() { <-ctx.Done() conn.calls.handleSendError(msg, ctx.Err()) @@ -649,7 +655,9 @@ func (conn *Conn) RemoveMatchSignalContext(ctx context.Context, options ...Match // Signal registers the given channel to be passed all received signal messages. // -// Multiple of these channels can be registered at the same time. +// Multiple of these channels can be registered at the same time. The channel is +// closed if the Conn is closed; it should not be closed by the caller before +// RemoveSignal was called on it. // // These channels are "overwritten" by Eavesdrop; i.e., if there currently is a // channel for eavesdropped messages, this channel receives all signals, and @@ -765,7 +773,12 @@ func getKey(s, key string) string { for _, keyEqualsValue := range strings.Split(s, ",") { keyValue := strings.SplitN(keyEqualsValue, "=", 2) if len(keyValue) == 2 && keyValue[0] == key { - return keyValue[1] + val, err := UnescapeBusAddressValue(keyValue[1]) + if err != nil { + // No way to return an error. + return "" + } + return val } } return "" diff --git a/vendor/github.com/godbus/dbus/v5/conn_other.go b/vendor/github.com/godbus/dbus/v5/conn_other.go index 616dcf6644..90289ca85a 100644 --- a/vendor/github.com/godbus/dbus/v5/conn_other.go +++ b/vendor/github.com/godbus/dbus/v5/conn_other.go @@ -54,7 +54,7 @@ func tryDiscoverDbusSessionBusAddress() string { if runUserBusFile := path.Join(runtimeDirectory, "bus"); fileExists(runUserBusFile) { // if /run/user//bus exists, that file itself // *is* the unix socket, so return its path - return fmt.Sprintf("unix:path=%s", runUserBusFile) + return fmt.Sprintf("unix:path=%s", EscapeBusAddressValue(runUserBusFile)) } if runUserSessionDbusFile := path.Join(runtimeDirectory, "dbus-session"); fileExists(runUserSessionDbusFile) { // if /run/user//dbus-session exists, it's a @@ -85,9 +85,6 @@ func getRuntimeDirectory() (string, error) { } func fileExists(filename string) bool { - if _, err := os.Stat(filename); !os.IsNotExist(err) { - return true - } else { - return false - } + _, err := os.Stat(filename) + return !os.IsNotExist(err) } diff --git a/vendor/github.com/godbus/dbus/v5/dbus.go b/vendor/github.com/godbus/dbus/v5/dbus.go index ddf3b7afde..c188d10485 100644 --- a/vendor/github.com/godbus/dbus/v5/dbus.go +++ b/vendor/github.com/godbus/dbus/v5/dbus.go @@ -122,8 +122,11 @@ func isConvertibleTo(dest, src reflect.Type) bool { case dest.Kind() == reflect.Slice: return src.Kind() == reflect.Slice && isConvertibleTo(dest.Elem(), src.Elem()) + case dest.Kind() == reflect.Ptr: + dest = dest.Elem() + return isConvertibleTo(dest, src) case dest.Kind() == reflect.Struct: - return src == interfacesType + return src == interfacesType || dest.Kind() == src.Kind() default: return src.ConvertibleTo(dest) } @@ -274,13 +277,8 @@ func storeSliceIntoInterface(dest, src reflect.Value) error { func storeSliceIntoSlice(dest, src reflect.Value) error { if dest.IsNil() || dest.Len() < src.Len() { dest.Set(reflect.MakeSlice(dest.Type(), src.Len(), src.Cap())) - } - if dest.Len() != src.Len() { - return fmt.Errorf( - "dbus.Store: type mismatch: "+ - "slices are different lengths "+ - "need: %d have: %d", - src.Len(), dest.Len()) + } else if dest.Len() > src.Len() { + dest.Set(dest.Slice(0, src.Len())) } for i := 0; i < src.Len(); i++ { err := store(dest.Index(i), getVariantValue(src.Index(i))) diff --git a/vendor/github.com/godbus/dbus/v5/doc.go b/vendor/github.com/godbus/dbus/v5/doc.go index ade1df951c..8f25a00d61 100644 --- a/vendor/github.com/godbus/dbus/v5/doc.go +++ b/vendor/github.com/godbus/dbus/v5/doc.go @@ -10,8 +10,10 @@ value. Conversion Rules For outgoing messages, Go types are automatically converted to the -corresponding D-Bus types. The following types are directly encoded as their -respective D-Bus equivalents: +corresponding D-Bus types. See the official specification at +https://dbus.freedesktop.org/doc/dbus-specification.html#type-system for more +information on the D-Bus type system. The following types are directly encoded +as their respective D-Bus equivalents: Go type | D-Bus type ------------+----------- @@ -39,8 +41,8 @@ Maps encode as DICTs, provided that their key type can be used as a key for a DICT. Structs other than Variant and Signature encode as a STRUCT containing their -exported fields. Fields whose tags contain `dbus:"-"` and unexported fields will -be skipped. +exported fields in order. Fields whose tags contain `dbus:"-"` and unexported +fields will be skipped. Pointers encode as the value they're pointed to. diff --git a/vendor/github.com/godbus/dbus/v5/escape.go b/vendor/github.com/godbus/dbus/v5/escape.go new file mode 100644 index 0000000000..d1509d9458 --- /dev/null +++ b/vendor/github.com/godbus/dbus/v5/escape.go @@ -0,0 +1,84 @@ +package dbus + +import "net/url" + +// EscapeBusAddressValue implements a requirement to escape the values +// in D-Bus server addresses, as defined by the D-Bus specification at +// https://dbus.freedesktop.org/doc/dbus-specification.html#addresses. +func EscapeBusAddressValue(val string) string { + toEsc := strNeedsEscape(val) + if toEsc == 0 { + // Avoid unneeded allocation/copying. + return val + } + + // Avoid allocation for short paths. + var buf [64]byte + var out []byte + // Every to-be-escaped byte needs 2 extra bytes. + required := len(val) + 2*toEsc + if required <= len(buf) { + out = buf[:required] + } else { + out = make([]byte, required) + } + + j := 0 + for i := 0; i < len(val); i++ { + if ch := val[i]; needsEscape(ch) { + // Convert ch to %xx, where xx is hex value. + out[j] = '%' + out[j+1] = hexchar(ch >> 4) + out[j+2] = hexchar(ch & 0x0F) + j += 3 + } else { + out[j] = ch + j++ + } + } + + return string(out) +} + +// UnescapeBusAddressValue unescapes values in D-Bus server addresses, +// as defined by the D-Bus specification at +// https://dbus.freedesktop.org/doc/dbus-specification.html#addresses. +func UnescapeBusAddressValue(val string) (string, error) { + // Looks like url.PathUnescape does exactly what is required. + return url.PathUnescape(val) +} + +// hexchar returns an octal representation of a n, where n < 16. +// For invalid values of n, the function panics. +func hexchar(n byte) byte { + const hex = "0123456789abcdef" + + // For n >= len(hex), runtime will panic. + return hex[n] +} + +// needsEscape tells if a byte is NOT one of optionally-escaped bytes. +func needsEscape(c byte) bool { + if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { + return false + } + switch c { + case '-', '_', '/', '\\', '.', '*': + return false + } + + return true +} + +// strNeedsEscape tells how many bytes in the string need escaping. +func strNeedsEscape(val string) int { + count := 0 + + for i := 0; i < len(val); i++ { + if needsEscape(val[i]) { + count++ + } + } + + return count +} diff --git a/vendor/github.com/godbus/dbus/v5/export.go b/vendor/github.com/godbus/dbus/v5/export.go index 522334715b..d3dd9f7cd6 100644 --- a/vendor/github.com/godbus/dbus/v5/export.go +++ b/vendor/github.com/godbus/dbus/v5/export.go @@ -3,6 +3,7 @@ package dbus import ( "errors" "fmt" + "os" "reflect" "strings" ) @@ -209,28 +210,23 @@ func (conn *Conn) handleCall(msg *Message) { } reply.Headers[FieldSignature] = MakeVariant(SignatureOf(reply.Body...)) - conn.sendMessageAndIfClosed(reply, nil) + if err := reply.IsValid(); err != nil { + fmt.Fprintf(os.Stderr, "dbus: dropping invalid reply to %s.%s on obj %s: %s\n", ifaceName, name, path, err) + } else { + conn.sendMessageAndIfClosed(reply, nil) + } } } // Emit emits the given signal on the message bus. The name parameter must be // formatted as "interface.member", e.g., "org.freedesktop.DBus.NameLost". func (conn *Conn) Emit(path ObjectPath, name string, values ...interface{}) error { - if !path.IsValid() { - return errors.New("dbus: invalid object path") - } i := strings.LastIndex(name, ".") if i == -1 { return errors.New("dbus: invalid method name") } iface := name[:i] member := name[i+1:] - if !isValidMember(member) { - return errors.New("dbus: invalid method name") - } - if !isValidInterface(iface) { - return errors.New("dbus: invalid interface name") - } msg := new(Message) msg.Type = TypeSignal msg.Headers = make(map[HeaderField]Variant) @@ -241,6 +237,9 @@ func (conn *Conn) Emit(path ObjectPath, name string, values ...interface{}) erro if len(values) > 0 { msg.Headers[FieldSignature] = MakeVariant(SignatureOf(values...)) } + if err := msg.IsValid(); err != nil { + return err + } var closed bool conn.sendMessageAndIfClosed(msg, func() { diff --git a/vendor/github.com/godbus/dbus/v5/homedir.go b/vendor/github.com/godbus/dbus/v5/homedir.go index 0b745f9313..c44d9b5fc2 100644 --- a/vendor/github.com/godbus/dbus/v5/homedir.go +++ b/vendor/github.com/godbus/dbus/v5/homedir.go @@ -2,27 +2,24 @@ package dbus import ( "os" - "sync" -) - -var ( - homeDir string - homeDirLock sync.Mutex + "os/user" ) +// Get returns the home directory of the current user, which is usually the +// value of HOME environment variable. In case it is not set or empty, os/user +// package is used. +// +// If linking statically with cgo enabled against glibc, make sure the +// osusergo build tag is used. +// +// If needing to do nss lookups, do not disable cgo or set osusergo. func getHomeDir() string { - homeDirLock.Lock() - defer homeDirLock.Unlock() - + homeDir := os.Getenv("HOME") if homeDir != "" { return homeDir } - - homeDir = os.Getenv("HOME") - if homeDir != "" { - return homeDir + if u, err := user.Current(); err == nil { + return u.HomeDir } - - homeDir = lookupHomeDir() - return homeDir + return "/" } diff --git a/vendor/github.com/godbus/dbus/v5/homedir_dynamic.go b/vendor/github.com/godbus/dbus/v5/homedir_dynamic.go deleted file mode 100644 index 2732081e73..0000000000 --- a/vendor/github.com/godbus/dbus/v5/homedir_dynamic.go +++ /dev/null @@ -1,15 +0,0 @@ -// +build !static_build - -package dbus - -import ( - "os/user" -) - -func lookupHomeDir() string { - u, err := user.Current() - if err != nil { - return "/" - } - return u.HomeDir -} diff --git a/vendor/github.com/godbus/dbus/v5/homedir_static.go b/vendor/github.com/godbus/dbus/v5/homedir_static.go deleted file mode 100644 index b9d9cb5525..0000000000 --- a/vendor/github.com/godbus/dbus/v5/homedir_static.go +++ /dev/null @@ -1,45 +0,0 @@ -// +build static_build - -package dbus - -import ( - "bufio" - "os" - "strconv" - "strings" -) - -func lookupHomeDir() string { - myUid := os.Getuid() - - f, err := os.Open("/etc/passwd") - if err != nil { - return "/" - } - defer f.Close() - - s := bufio.NewScanner(f) - - for s.Scan() { - if err := s.Err(); err != nil { - break - } - - line := strings.TrimSpace(s.Text()) - if line == "" { - continue - } - - parts := strings.Split(line, ":") - - if len(parts) >= 6 { - uid, err := strconv.Atoi(parts[2]) - if err == nil && uid == myUid { - return parts[5] - } - } - } - - // Default to / if we can't get a better value - return "/" -} diff --git a/vendor/github.com/godbus/dbus/v5/message.go b/vendor/github.com/godbus/dbus/v5/message.go index 16693eb301..bdf43fdd6e 100644 --- a/vendor/github.com/godbus/dbus/v5/message.go +++ b/vendor/github.com/godbus/dbus/v5/message.go @@ -208,7 +208,7 @@ func DecodeMessageWithFDs(rd io.Reader, fds []int) (msg *Message, err error) { // The possibly returned error can be an error of the underlying reader, an // InvalidMessageError or a FormatError. func DecodeMessage(rd io.Reader) (msg *Message, err error) { - return DecodeMessageWithFDs(rd, make([]int, 0)); + return DecodeMessageWithFDs(rd, make([]int, 0)) } type nullwriter struct{} @@ -227,8 +227,8 @@ func (msg *Message) CountFds() (int, error) { } func (msg *Message) EncodeToWithFDs(out io.Writer, order binary.ByteOrder) (fds []int, err error) { - if err := msg.IsValid(); err != nil { - return make([]int, 0), err + if err := msg.validateHeader(); err != nil { + return nil, err } var vs [7]interface{} switch order { @@ -237,7 +237,7 @@ func (msg *Message) EncodeToWithFDs(out io.Writer, order binary.ByteOrder) (fds case binary.BigEndian: vs[0] = byte('B') default: - return make([]int, 0), errors.New("dbus: invalid byte order") + return nil, errors.New("dbus: invalid byte order") } body := new(bytes.Buffer) fds = make([]int, 0) @@ -284,8 +284,13 @@ func (msg *Message) EncodeTo(out io.Writer, order binary.ByteOrder) (err error) } // IsValid checks whether msg is a valid message and returns an -// InvalidMessageError if it is not. +// InvalidMessageError or FormatError if it is not. func (msg *Message) IsValid() error { + var b bytes.Buffer + return msg.EncodeTo(&b, nativeEndian) +} + +func (msg *Message) validateHeader() error { if msg.Flags & ^(FlagNoAutoStart|FlagNoReplyExpected|FlagAllowInteractiveAuthorization) != 0 { return InvalidMessageError("invalid flags") } @@ -330,6 +335,7 @@ func (msg *Message) IsValid() error { return InvalidMessageError("missing signature") } } + return nil } diff --git a/vendor/github.com/godbus/dbus/v5/server_interfaces.go b/vendor/github.com/godbus/dbus/v5/server_interfaces.go index 79d97edf3e..e4e0389fdf 100644 --- a/vendor/github.com/godbus/dbus/v5/server_interfaces.go +++ b/vendor/github.com/godbus/dbus/v5/server_interfaces.go @@ -63,7 +63,7 @@ type Method interface { // any other decoding scheme. type ArgumentDecoder interface { // To decode the arguments of a method the sender and message are - // provided incase the semantics of the implementer provides access + // provided in case the semantics of the implementer provides access // to these as part of the method invocation. DecodeArguments(conn *Conn, sender string, msg *Message, args []interface{}) ([]interface{}, error) } diff --git a/vendor/github.com/godbus/dbus/v5/sig.go b/vendor/github.com/godbus/dbus/v5/sig.go index 41a0398129..6b9cadb5fb 100644 --- a/vendor/github.com/godbus/dbus/v5/sig.go +++ b/vendor/github.com/godbus/dbus/v5/sig.go @@ -102,7 +102,7 @@ func getSignature(t reflect.Type, depth *depthCounter) (sig string) { } } if len(s) == 0 { - panic("empty struct") + panic(InvalidTypeError{t}) } return "(" + s + ")" case reflect.Array, reflect.Slice: diff --git a/vendor/github.com/godbus/dbus/v5/transport_unix.go b/vendor/github.com/godbus/dbus/v5/transport_unix.go index 2212e7fa7f..0a8c712ebd 100644 --- a/vendor/github.com/godbus/dbus/v5/transport_unix.go +++ b/vendor/github.com/godbus/dbus/v5/transport_unix.go @@ -154,17 +154,15 @@ func (t *unixTransport) ReadMessage() (*Message, error) { // substitute the values in the message body (which are indices for the // array receiver via OOB) with the actual values for i, v := range msg.Body { - switch v.(type) { + switch index := v.(type) { case UnixFDIndex: - j := v.(UnixFDIndex) - if uint32(j) >= unixfds { + if uint32(index) >= unixfds { return nil, InvalidMessageError("invalid index for unix fd") } - msg.Body[i] = UnixFD(fds[j]) + msg.Body[i] = UnixFD(fds[index]) case []UnixFDIndex: - idxArray := v.([]UnixFDIndex) - fdArray := make([]UnixFD, len(idxArray)) - for k, j := range idxArray { + fdArray := make([]UnixFD, len(index)) + for k, j := range index { if uint32(j) >= unixfds { return nil, InvalidMessageError("invalid index for unix fd") } diff --git a/vendor/github.com/godbus/dbus/v5/transport_zos.go b/vendor/github.com/godbus/dbus/v5/transport_zos.go new file mode 100644 index 0000000000..1bba0d6bf7 --- /dev/null +++ b/vendor/github.com/godbus/dbus/v5/transport_zos.go @@ -0,0 +1,6 @@ +package dbus + +func (t *unixTransport) SendNullByte() error { + _, err := t.Write([]byte{0}) + return err +} diff --git a/vendor/github.com/godbus/dbus/v5/variant.go b/vendor/github.com/godbus/dbus/v5/variant.go index f1e81f3ede..ca3dbe16a4 100644 --- a/vendor/github.com/godbus/dbus/v5/variant.go +++ b/vendor/github.com/godbus/dbus/v5/variant.go @@ -49,7 +49,7 @@ func ParseVariant(s string, sig Signature) (Variant, error) { } // format returns a formatted version of v and whether this string can be parsed -// unambigously. +// unambiguously. func (v Variant) format() (string, bool) { switch v.sig.str[0] { case 'b', 'i': diff --git a/vendor/github.com/moby/sys/mountinfo/mounted_linux.go b/vendor/github.com/moby/sys/mountinfo/mounted_linux.go index 5c9e3e30e6..e78e726196 100644 --- a/vendor/github.com/moby/sys/mountinfo/mounted_linux.go +++ b/vendor/github.com/moby/sys/mountinfo/mounted_linux.go @@ -7,6 +7,34 @@ import ( "golang.org/x/sys/unix" ) +// MountedFast is a method of detecting a mount point without reading +// mountinfo from procfs. A caller can only trust the result if no error +// and sure == true are returned. Otherwise, other methods (e.g. parsing +// /proc/mounts) have to be used. If unsure, use Mounted instead (which +// uses MountedFast, but falls back to parsing mountinfo if needed). +// +// If a non-existent path is specified, an appropriate error is returned. +// In case the caller is not interested in this particular error, it should +// be handled separately using e.g. errors.Is(err, fs.ErrNotExist). +// +// This function is only available on Linux. When available (since kernel +// v5.6), openat2(2) syscall is used to reliably detect all mounts. Otherwise, +// the implementation falls back to using stat(2), which can reliably detect +// normal (but not bind) mounts. +func MountedFast(path string) (mounted, sure bool, err error) { + // Root is always mounted. + if path == string(os.PathSeparator) { + return true, true, nil + } + + path, err = normalizePath(path) + if err != nil { + return false, false, err + } + mounted, sure, err = mountedFast(path) + return +} + // mountedByOpenat2 is a method of detecting a mount that works for all kinds // of mounts (incl. bind mounts), but requires a recent (v5.6+) linux kernel. func mountedByOpenat2(path string) (bool, error) { @@ -34,24 +62,40 @@ func mountedByOpenat2(path string) (bool, error) { return false, &os.PathError{Op: "openat2", Path: path, Err: err} } -func mounted(path string) (bool, error) { - path, err := normalizePath(path) - if err != nil { - return false, err +// mountedFast is similar to MountedFast, except it expects a normalized path. +func mountedFast(path string) (mounted, sure bool, err error) { + // Root is always mounted. + if path == string(os.PathSeparator) { + return true, true, nil } + // Try a fast path, using openat2() with RESOLVE_NO_XDEV. - mounted, err := mountedByOpenat2(path) + mounted, err = mountedByOpenat2(path) if err == nil { - return mounted, nil + return mounted, true, nil } + // Another fast path: compare st.st_dev fields. mounted, err = mountedByStat(path) // This does not work for bind mounts, so false negative // is possible, therefore only trust if return is true. if mounted && err == nil { + return true, true, nil + } + + return +} + +func mounted(path string) (bool, error) { + path, err := normalizePath(path) + if err != nil { + return false, err + } + mounted, sure, err := mountedFast(path) + if sure && err == nil { return mounted, nil } - // Fallback to parsing mountinfo + // Fallback to parsing mountinfo. return mountedByMountinfo(path) } diff --git a/vendor/github.com/moby/sys/mountinfo/mounted_unix.go b/vendor/github.com/moby/sys/mountinfo/mounted_unix.go index 45ddad236f..c7b7678f9a 100644 --- a/vendor/github.com/moby/sys/mountinfo/mounted_unix.go +++ b/vendor/github.com/moby/sys/mountinfo/mounted_unix.go @@ -1,10 +1,9 @@ -//go:build linux || (freebsd && cgo) || (openbsd && cgo) || (darwin && cgo) -// +build linux freebsd,cgo openbsd,cgo darwin,cgo +//go:build linux || freebsd || openbsd || darwin +// +build linux freebsd openbsd darwin package mountinfo import ( - "fmt" "os" "path/filepath" @@ -33,13 +32,13 @@ func mountedByStat(path string) (bool, error) { func normalizePath(path string) (realPath string, err error) { if realPath, err = filepath.Abs(path); err != nil { - return "", fmt.Errorf("unable to get absolute path for %q: %w", path, err) + return "", err } if realPath, err = filepath.EvalSymlinks(realPath); err != nil { - return "", fmt.Errorf("failed to canonicalise path for %q: %w", path, err) + return "", err } if _, err := os.Stat(realPath); err != nil { - return "", fmt.Errorf("failed to stat target of %q: %w", path, err) + return "", err } return realPath, nil } diff --git a/vendor/github.com/moby/sys/mountinfo/mountinfo.go b/vendor/github.com/moby/sys/mountinfo/mountinfo.go index 9867a66dd8..574aeb8767 100644 --- a/vendor/github.com/moby/sys/mountinfo/mountinfo.go +++ b/vendor/github.com/moby/sys/mountinfo/mountinfo.go @@ -13,9 +13,9 @@ func GetMounts(f FilterFunc) ([]*Info, error) { // Mounted determines if a specified path is a mount point. In case of any // error, false (and an error) is returned. // -// The non-existent path returns an error. If a caller is not interested -// in this particular error, it should handle it separately using e.g. -// errors.Is(err, os.ErrNotExist). +// If a non-existent path is specified, an appropriate error is returned. +// In case the caller is not interested in this particular error, it should +// be handled separately using e.g. errors.Is(err, fs.ErrNotExist). func Mounted(path string) (bool, error) { // root is always mounted if path == string(os.PathSeparator) { diff --git a/vendor/github.com/moby/sys/mountinfo/mountinfo_bsd.go b/vendor/github.com/moby/sys/mountinfo/mountinfo_bsd.go index d5513a26d2..8420f58c7a 100644 --- a/vendor/github.com/moby/sys/mountinfo/mountinfo_bsd.go +++ b/vendor/github.com/moby/sys/mountinfo/mountinfo_bsd.go @@ -1,53 +1,37 @@ -//go:build (freebsd && cgo) || (openbsd && cgo) || (darwin && cgo) -// +build freebsd,cgo openbsd,cgo darwin,cgo +//go:build freebsd || openbsd || darwin +// +build freebsd openbsd darwin package mountinfo -/* -#include -#include -#include -*/ -import "C" - -import ( - "fmt" - "reflect" - "unsafe" -) +import "golang.org/x/sys/unix" // parseMountTable returns information about mounted filesystems func parseMountTable(filter FilterFunc) ([]*Info, error) { - var rawEntries *C.struct_statfs - - count := int(C.getmntinfo(&rawEntries, C.MNT_WAIT)) - if count == 0 { - return nil, fmt.Errorf("failed to call getmntinfo") + count, err := unix.Getfsstat(nil, unix.MNT_WAIT) + if err != nil { + return nil, err } - var entries []C.struct_statfs - header := (*reflect.SliceHeader)(unsafe.Pointer(&entries)) - header.Cap = count - header.Len = count - header.Data = uintptr(unsafe.Pointer(rawEntries)) + entries := make([]unix.Statfs_t, count) + _, err = unix.Getfsstat(entries, unix.MNT_WAIT) + if err != nil { + return nil, err + } var out []*Info for _, entry := range entries { - var mountinfo Info var skip, stop bool - mountinfo.Mountpoint = C.GoString(&entry.f_mntonname[0]) - mountinfo.FSType = C.GoString(&entry.f_fstypename[0]) - mountinfo.Source = C.GoString(&entry.f_mntfromname[0]) + mountinfo := getMountinfo(&entry) if filter != nil { // filter out entries we're not interested in - skip, stop = filter(&mountinfo) + skip, stop = filter(mountinfo) if skip { continue } } - out = append(out, &mountinfo) + out = append(out, mountinfo) if stop { break } diff --git a/vendor/github.com/moby/sys/mountinfo/mountinfo_freebsdlike.go b/vendor/github.com/moby/sys/mountinfo/mountinfo_freebsdlike.go new file mode 100644 index 0000000000..ecaaa7a9c1 --- /dev/null +++ b/vendor/github.com/moby/sys/mountinfo/mountinfo_freebsdlike.go @@ -0,0 +1,14 @@ +//go:build freebsd || darwin +// +build freebsd darwin + +package mountinfo + +import "golang.org/x/sys/unix" + +func getMountinfo(entry *unix.Statfs_t) *Info { + return &Info{ + Mountpoint: unix.ByteSliceToString(entry.Mntonname[:]), + FSType: unix.ByteSliceToString(entry.Fstypename[:]), + Source: unix.ByteSliceToString(entry.Mntfromname[:]), + } +} diff --git a/vendor/github.com/moby/sys/mountinfo/mountinfo_linux.go b/vendor/github.com/moby/sys/mountinfo/mountinfo_linux.go index 59332b07bf..b32b5c9b15 100644 --- a/vendor/github.com/moby/sys/mountinfo/mountinfo_linux.go +++ b/vendor/github.com/moby/sys/mountinfo/mountinfo_linux.go @@ -5,15 +5,19 @@ import ( "fmt" "io" "os" + "runtime" "strconv" "strings" + "sync" + + "golang.org/x/sys/unix" ) // GetMountsFromReader retrieves a list of mounts from the // reader provided, with an optional filter applied (use nil // for no filter). This can be useful in tests or benchmarks // that provide fake mountinfo data, or when a source other -// than /proc/self/mountinfo needs to be read from. +// than /proc/thread-self/mountinfo needs to be read from. // // This function is Linux-specific. func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) { @@ -127,8 +131,40 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) { return out, nil } -func parseMountTable(filter FilterFunc) ([]*Info, error) { - f, err := os.Open("/proc/self/mountinfo") +var ( + haveProcThreadSelf bool + haveProcThreadSelfOnce sync.Once +) + +func parseMountTable(filter FilterFunc) (_ []*Info, err error) { + haveProcThreadSelfOnce.Do(func() { + _, err := os.Stat("/proc/thread-self/mountinfo") + haveProcThreadSelf = err == nil + }) + + // We need to lock ourselves to the current OS thread in order to make sure + // that the thread referenced by /proc/thread-self stays alive until we + // finish parsing the file. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + var f *os.File + if haveProcThreadSelf { + f, err = os.Open("/proc/thread-self/mountinfo") + } else { + // On pre-3.17 kernels (such as CentOS 7), we don't have + // /proc/thread-self/ so we need to manually construct + // /proc/self/task// as a fallback. + f, err = os.Open("/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/mountinfo") + if os.IsNotExist(err) { + // If /proc/self/task/... failed, it means that our active pid + // namespace doesn't match the pid namespace of the /proc mount. In + // this case we just have to make do with /proc/self, since there + // is no other way of figuring out our tid in a parent pid + // namespace on pre-3.17 kernels. + f, err = os.Open("/proc/self/mountinfo") + } + } if err != nil { return nil, err } @@ -158,10 +194,10 @@ func PidMountInfo(pid int) ([]*Info, error) { // A few specific characters in mountinfo path entries (root and mountpoint) // are escaped using a backslash followed by a character's ascii code in octal. // -// space -- as \040 -// tab (aka \t) -- as \011 -// newline (aka \n) -- as \012 -// backslash (aka \\) -- as \134 +// space -- as \040 +// tab (aka \t) -- as \011 +// newline (aka \n) -- as \012 +// backslash (aka \\) -- as \134 // // This function converts path from mountinfo back, i.e. it unescapes the above sequences. func unescape(path string) (string, error) { diff --git a/vendor/github.com/moby/sys/mountinfo/mountinfo_openbsd.go b/vendor/github.com/moby/sys/mountinfo/mountinfo_openbsd.go new file mode 100644 index 0000000000..f682c2d3b5 --- /dev/null +++ b/vendor/github.com/moby/sys/mountinfo/mountinfo_openbsd.go @@ -0,0 +1,11 @@ +package mountinfo + +import "golang.org/x/sys/unix" + +func getMountinfo(entry *unix.Statfs_t) *Info { + return &Info{ + Mountpoint: unix.ByteSliceToString(entry.F_mntonname[:]), + FSType: unix.ByteSliceToString(entry.F_fstypename[:]), + Source: unix.ByteSliceToString(entry.F_mntfromname[:]), + } +} diff --git a/vendor/github.com/moby/sys/mountinfo/mountinfo_unsupported.go b/vendor/github.com/moby/sys/mountinfo/mountinfo_unsupported.go index 95769a76da..c2e64bc81c 100644 --- a/vendor/github.com/moby/sys/mountinfo/mountinfo_unsupported.go +++ b/vendor/github.com/moby/sys/mountinfo/mountinfo_unsupported.go @@ -1,5 +1,5 @@ -//go:build (!windows && !linux && !freebsd && !openbsd && !darwin) || (freebsd && !cgo) || (openbsd && !cgo) || (darwin && !cgo) -// +build !windows,!linux,!freebsd,!openbsd,!darwin freebsd,!cgo openbsd,!cgo darwin,!cgo +//go:build !windows && !linux && !freebsd && !openbsd && !darwin +// +build !windows,!linux,!freebsd,!openbsd,!darwin package mountinfo diff --git a/vendor/github.com/opencontainers/runc/NOTICE b/vendor/github.com/opencontainers/runc/NOTICE index 5c97abce4b..c29775c0d9 100644 --- a/vendor/github.com/opencontainers/runc/NOTICE +++ b/vendor/github.com/opencontainers/runc/NOTICE @@ -8,9 +8,9 @@ The following is courtesy of our legal counsel: Use and transfer of Docker may be subject to certain restrictions by the -United States and other governments. +United States and other governments. It is your responsibility to ensure that your use and/or transfer does not -violate applicable laws. +violate applicable laws. For more information, please see http://www.bis.doc.gov diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go index ba2b2266c9..b9ba889b7a 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go @@ -1,9 +1,24 @@ package cgroups import ( + "errors" + "github.com/opencontainers/runc/libcontainer/configs" ) +var ( + // ErrDevicesUnsupported is an error returned when a cgroup manager + // is not configured to set device rules. + ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules") + + // DevicesSetV1 and DevicesSetV2 are functions to set devices for + // cgroup v1 and v2, respectively. Unless libcontainer/cgroups/devices + // package is imported, it is set to nil, so cgroup managers can't + // manage devices. + DevicesSetV1 func(path string, r *configs.Resources) error + DevicesSetV2 func(path string, r *configs.Resources) error +) + type Manager interface { // Apply creates a cgroup, if not yet created, and adds a process // with the specified pid into that cgroup. A special value of -1 diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go deleted file mode 100644 index 6c61ee4c03..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go +++ /dev/null @@ -1,386 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -/* - * Copyright (C) 2020 Aleksa Sarai - * Copyright (C) 2020 SUSE LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package devices - -import ( - "bufio" - "fmt" - "io" - "sort" - "strconv" - "strings" - - "github.com/opencontainers/runc/libcontainer/devices" -) - -// deviceMeta is a Rule without the Allow or Permissions fields, and no -// wildcard-type support. It's effectively the "match" portion of a metadata -// rule, for the purposes of our emulation. -type deviceMeta struct { - node devices.Type - major int64 - minor int64 -} - -// deviceRule is effectively the tuple (deviceMeta, Permissions). -type deviceRule struct { - meta deviceMeta - perms devices.Permissions -} - -// deviceRules is a mapping of device metadata rules to the associated -// permissions in the ruleset. -type deviceRules map[deviceMeta]devices.Permissions - -func (r deviceRules) orderedEntries() []deviceRule { - var rules []deviceRule - for meta, perms := range r { - rules = append(rules, deviceRule{meta: meta, perms: perms}) - } - sort.Slice(rules, func(i, j int) bool { - // Sort by (major, minor, type). - a, b := rules[i].meta, rules[j].meta - return a.major < b.major || - (a.major == b.major && a.minor < b.minor) || - (a.major == b.major && a.minor == b.minor && a.node < b.node) - }) - return rules -} - -type Emulator struct { - defaultAllow bool - rules deviceRules -} - -func (e *Emulator) IsBlacklist() bool { - return e.defaultAllow -} - -func (e *Emulator) IsAllowAll() bool { - return e.IsBlacklist() && len(e.rules) == 0 -} - -func parseLine(line string) (*deviceRule, error) { - // Input: node major:minor perms. - fields := strings.FieldsFunc(line, func(r rune) bool { - return r == ' ' || r == ':' - }) - if len(fields) != 4 { - return nil, fmt.Errorf("malformed devices.list rule %s", line) - } - - var ( - rule deviceRule - node = fields[0] - major = fields[1] - minor = fields[2] - perms = fields[3] - ) - - // Parse the node type. - switch node { - case "a": - // Super-special case -- "a" always means every device with every - // access mode. In fact, for devices.list this actually indicates that - // the cgroup is in black-list mode. - // TODO: Double-check that the entire file is "a *:* rwm". - return nil, nil - case "b": - rule.meta.node = devices.BlockDevice - case "c": - rule.meta.node = devices.CharDevice - default: - return nil, fmt.Errorf("unknown device type %q", node) - } - - // Parse the major number. - if major == "*" { - rule.meta.major = devices.Wildcard - } else { - val, err := strconv.ParseUint(major, 10, 32) - if err != nil { - return nil, fmt.Errorf("invalid major number: %w", err) - } - rule.meta.major = int64(val) - } - - // Parse the minor number. - if minor == "*" { - rule.meta.minor = devices.Wildcard - } else { - val, err := strconv.ParseUint(minor, 10, 32) - if err != nil { - return nil, fmt.Errorf("invalid minor number: %w", err) - } - rule.meta.minor = int64(val) - } - - // Parse the access permissions. - rule.perms = devices.Permissions(perms) - if !rule.perms.IsValid() || rule.perms.IsEmpty() { - return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms) - } - return &rule, nil -} - -func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam - if e.rules == nil { - e.rules = make(map[deviceMeta]devices.Permissions) - } - - // Merge with any pre-existing permissions. - oldPerms := e.rules[rule.meta] - newPerms := rule.perms.Union(oldPerms) - e.rules[rule.meta] = newPerms - return nil -} - -func (e *Emulator) rmRule(rule deviceRule) error { - // Give an error if any of the permissions requested to be removed are - // present in a partially-matching wildcard rule, because such rules will - // be ignored by cgroupv1. - // - // This is a diversion from cgroupv1, but is necessary to avoid leading - // users into a false sense of security. cgroupv1 will silently(!) ignore - // requests to remove partial exceptions, but we really shouldn't do that. - // - // It may seem like we could just "split" wildcard rules which hit this - // issue, but unfortunately there are 2^32 possible major and minor - // numbers, which would exhaust kernel memory quickly if we did this. Not - // to mention it'd be really slow (the kernel side is implemented as a - // linked-list of exceptions). - for _, partialMeta := range []deviceMeta{ - {node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor}, - {node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard}, - {node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard}, - } { - // This wildcard rule is equivalent to the requested rule, so skip it. - if rule.meta == partialMeta { - continue - } - // Only give an error if the set of permissions overlap. - partialPerms := e.rules[partialMeta] - if !partialPerms.Intersection(rule.perms).IsEmpty() { - return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms) - } - } - - // Subtract all of the permissions listed from the full match rule. If the - // rule didn't exist, all of this is a no-op. - newPerms := e.rules[rule.meta].Difference(rule.perms) - if newPerms.IsEmpty() { - delete(e.rules, rule.meta) - } else { - e.rules[rule.meta] = newPerms - } - // TODO: The actual cgroup code doesn't care if an exception didn't exist - // during removal, so not erroring out here is /accurate/ but quite - // worrying. Maybe we should do additional validation, but again we - // have to worry about backwards-compatibility. - return nil -} - -func (e *Emulator) allow(rule *deviceRule) error { - // This cgroup is configured as a black-list. Reset the entire emulator, - // and put is into black-list mode. - if rule == nil || rule.meta.node == devices.WildcardDevice { - *e = Emulator{ - defaultAllow: true, - rules: nil, - } - return nil - } - - var err error - if e.defaultAllow { - err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception") - } else { - err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception") - } - return err -} - -func (e *Emulator) deny(rule *deviceRule) error { - // This cgroup is configured as a white-list. Reset the entire emulator, - // and put is into white-list mode. - if rule == nil || rule.meta.node == devices.WildcardDevice { - *e = Emulator{ - defaultAllow: false, - rules: nil, - } - return nil - } - - var err error - if e.defaultAllow { - err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception") - } else { - err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception") - } - return err -} - -func (e *Emulator) Apply(rule devices.Rule) error { - if !rule.Type.CanCgroup() { - return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type) - } - - innerRule := &deviceRule{ - meta: deviceMeta{ - node: rule.Type, - major: rule.Major, - minor: rule.Minor, - }, - perms: rule.Permissions, - } - if innerRule.meta.node == devices.WildcardDevice { - innerRule = nil - } - - if rule.Allow { - return e.allow(innerRule) - } - - return e.deny(innerRule) -} - -// EmulatorFromList takes a reader to a "devices.list"-like source, and returns -// a new Emulator that represents the state of the devices cgroup. Note that -// black-list devices cgroups cannot be fully reconstructed, due to limitations -// in the devices cgroup API. Instead, such cgroups are always treated as -// "allow all" cgroups. -func EmulatorFromList(list io.Reader) (*Emulator, error) { - // Normally cgroups are in black-list mode by default, but the way we - // figure out the current mode is whether or not devices.list has an - // allow-all rule. So we default to a white-list, and the existence of an - // "a *:* rwm" entry will tell us otherwise. - e := &Emulator{ - defaultAllow: false, - } - - // Parse the "devices.list". - s := bufio.NewScanner(list) - for s.Scan() { - line := s.Text() - deviceRule, err := parseLine(line) - if err != nil { - return nil, fmt.Errorf("error parsing line %q: %w", line, err) - } - // "devices.list" is an allow list. Note that this means that in - // black-list mode, we have no idea what rules are in play. As a - // result, we need to be very careful in Transition(). - if err := e.allow(deviceRule); err != nil { - return nil, fmt.Errorf("error adding devices.list rule: %w", err) - } - } - if err := s.Err(); err != nil { - return nil, fmt.Errorf("error reading devices.list lines: %w", err) - } - return e, nil -} - -// Transition calculates what is the minimally-disruptive set of rules need to -// be applied to a devices cgroup in order to transition to the given target. -// This means that any already-existing rules will not be applied, and -// disruptive rules (like denying all device access) will only be applied if -// necessary. -// -// This function is the sole reason for all of Emulator -- to allow us -// to figure out how to update a containers' cgroups without causing spurious -// device errors (if possible). -func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) { - var transitionRules []*devices.Rule - oldRules := source.rules - - // If the default policy doesn't match, we need to include a "disruptive" - // rule (either allow-all or deny-all) in order to switch the cgroup to the - // correct default policy. - // - // However, due to a limitation in "devices.list" we cannot be sure what - // deny rules are in place in a black-list cgroup. Thus if the source is a - // black-list we also have to include a disruptive rule. - if source.IsBlacklist() || source.defaultAllow != target.defaultAllow { - transitionRules = append(transitionRules, &devices.Rule{ - Type: 'a', - Major: -1, - Minor: -1, - Permissions: devices.Permissions("rwm"), - Allow: target.defaultAllow, - }) - // The old rules are only relevant if we aren't starting out with a - // disruptive rule. - oldRules = nil - } - - // NOTE: We traverse through the rules in a sorted order so we always write - // the same set of rules (this is to aid testing). - - // First, we create inverse rules for any old rules not in the new set. - // This includes partial-inverse rules for specific permissions. This is a - // no-op if we added a disruptive rule, since oldRules will be empty. - for _, rule := range oldRules.orderedEntries() { - meta, oldPerms := rule.meta, rule.perms - newPerms := target.rules[meta] - droppedPerms := oldPerms.Difference(newPerms) - if !droppedPerms.IsEmpty() { - transitionRules = append(transitionRules, &devices.Rule{ - Type: meta.node, - Major: meta.major, - Minor: meta.minor, - Permissions: droppedPerms, - Allow: target.defaultAllow, - }) - } - } - - // Add any additional rules which weren't in the old set. We happen to - // filter out rules which are present in both sets, though this isn't - // strictly necessary. - for _, rule := range target.rules.orderedEntries() { - meta, newPerms := rule.meta, rule.perms - oldPerms := oldRules[meta] - gainedPerms := newPerms.Difference(oldPerms) - if !gainedPerms.IsEmpty() { - transitionRules = append(transitionRules, &devices.Rule{ - Type: meta.node, - Major: meta.major, - Minor: meta.minor, - Permissions: gainedPerms, - Allow: !target.defaultAllow, - }) - } - } - return transitionRules, nil -} - -// Rules returns the minimum set of rules necessary to convert a *deny-all* -// cgroup to the emulated filter state (note that this is not the same as a -// default cgroupv1 cgroup -- which is allow-all). This is effectively just a -// wrapper around Transition() with the source emulator being an empty cgroup. -func (e *Emulator) Rules() ([]*devices.Rule, error) { - defaultCgroup := &Emulator{defaultAllow: false} - return defaultCgroup.Transition(e) -} - -func wrapErr(err error, text string) error { - if err == nil { - return nil - } - return fmt.Errorf(text+": %w", err) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go deleted file mode 100644 index 4e69b35bcd..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go +++ /dev/null @@ -1,208 +0,0 @@ -// Package devicefilter contains eBPF device filter program -// -// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c -// -// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) -// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 -package devicefilter - -import ( - "errors" - "fmt" - "math" - "strconv" - - "github.com/cilium/ebpf/asm" - devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices" - "github.com/opencontainers/runc/libcontainer/devices" - "golang.org/x/sys/unix" -) - -const ( - // license string format is same as kernel MODULE_LICENSE macro - license = "Apache" -) - -// DeviceFilter returns eBPF device filter program and its license string -func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { - // Generate the minimum ruleset for the device rules we are given. While we - // don't care about minimum transitions in cgroupv2, using the emulator - // gives us a guarantee that the behaviour of devices filtering is the same - // as cgroupv1, including security hardenings to avoid misconfiguration - // (such as punching holes in wildcard rules). - emu := new(devicesemulator.Emulator) - for _, rule := range rules { - if err := emu.Apply(*rule); err != nil { - return nil, "", err - } - } - cleanRules, err := emu.Rules() - if err != nil { - return nil, "", err - } - - p := &program{ - defaultAllow: emu.IsBlacklist(), - } - p.init() - - for idx, rule := range cleanRules { - if rule.Type == devices.WildcardDevice { - // We can safely skip over wildcard entries because there should - // only be one (at most) at the very start to instruct cgroupv1 to - // go into allow-list mode. However we do double-check this here. - if idx != 0 || rule.Allow != emu.IsBlacklist() { - return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString()) - } - continue - } - if rule.Allow == p.defaultAllow { - // There should be no rules which have an action equal to the - // default action, the emulator removes those. - return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString()) - } - if err := p.appendRule(rule); err != nil { - return nil, "", err - } - } - return p.finalize(), license, nil -} - -type program struct { - insts asm.Instructions - defaultAllow bool - blockID int -} - -func (p *program) init() { - // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 - /* - u32 access_type - u32 major - u32 minor - */ - // R2 <- type (lower 16 bit of u32 access_type at R1[0]) - p.insts = append(p.insts, - asm.LoadMem(asm.R2, asm.R1, 0, asm.Word), - asm.And.Imm32(asm.R2, 0xFFFF)) - - // R3 <- access (upper 16 bit of u32 access_type at R1[0]) - p.insts = append(p.insts, - asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), - // RSh: bitwise shift right - asm.RSh.Imm32(asm.R3, 16)) - - // R4 <- major (u32 major at R1[4]) - p.insts = append(p.insts, - asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) - - // R5 <- minor (u32 minor at R1[8]) - p.insts = append(p.insts, - asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) -} - -// appendRule rule converts an OCI rule to the relevant eBPF block and adds it -// to the in-progress filter program. In order to operate properly, it must be -// called with a "clean" rule list (generated by devices.Emulator.Rules() -- -// with any "a" rules removed). -func (p *program) appendRule(rule *devices.Rule) error { - if p.blockID < 0 { - return errors.New("the program is finalized") - } - - var bpfType int32 - switch rule.Type { - case devices.CharDevice: - bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) - case devices.BlockDevice: - bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) - default: - // We do not permit 'a', nor any other types we don't know about. - return fmt.Errorf("invalid type %q", string(rule.Type)) - } - if rule.Major > math.MaxUint32 { - return fmt.Errorf("invalid major %d", rule.Major) - } - if rule.Minor > math.MaxUint32 { - return fmt.Errorf("invalid minor %d", rule.Major) - } - hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 - hasMinor := rule.Minor >= 0 - bpfAccess := int32(0) - for _, r := range rule.Permissions { - switch r { - case 'r': - bpfAccess |= unix.BPF_DEVCG_ACC_READ - case 'w': - bpfAccess |= unix.BPF_DEVCG_ACC_WRITE - case 'm': - bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD - default: - return fmt.Errorf("unknown device access %v", r) - } - } - // If the access is rwm, skip the check. - hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) - - var ( - blockSym = "block-" + strconv.Itoa(p.blockID) - nextBlockSym = "block-" + strconv.Itoa(p.blockID+1) - prevBlockLastIdx = len(p.insts) - 1 - ) - p.insts = append(p.insts, - // if (R2 != bpfType) goto next - asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), - ) - if hasAccess { - p.insts = append(p.insts, - // if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next - asm.Mov.Reg32(asm.R1, asm.R3), - asm.And.Imm32(asm.R1, bpfAccess), - asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym), - ) - } - if hasMajor { - p.insts = append(p.insts, - // if (R4 != major) goto next - asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), - ) - } - if hasMinor { - p.insts = append(p.insts, - // if (R5 != minor) goto next - asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), - ) - } - p.insts = append(p.insts, acceptBlock(rule.Allow)...) - // set blockSym to the first instruction we added in this iteration - p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) - p.blockID++ - return nil -} - -func (p *program) finalize() asm.Instructions { - var v int32 - if p.defaultAllow { - v = 1 - } - blockSym := "block-" + strconv.Itoa(p.blockID) - p.insts = append(p.insts, - // R0 <- v - asm.Mov.Imm32(asm.R0, v).Sym(blockSym), - asm.Return(), - ) - p.blockID = -1 - return p.insts -} - -func acceptBlock(accept bool) asm.Instructions { - var v int32 - if accept { - v = 1 - } - return []asm.Instruction{ - // R0 <- v - asm.Mov.Imm32(asm.R0, v), - asm.Return(), - } -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go deleted file mode 100644 index 35b00aaf05..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go +++ /dev/null @@ -1,253 +0,0 @@ -package ebpf - -import ( - "errors" - "fmt" - "os" - "runtime" - "sync" - "unsafe" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/link" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -func nilCloser() error { - return nil -} - -func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) { - type bpfAttrQuery struct { - TargetFd uint32 - AttachType uint32 - QueryType uint32 - AttachFlags uint32 - ProgIds uint64 // __aligned_u64 - ProgCnt uint32 - } - - // Currently you can only have 64 eBPF programs attached to a cgroup. - size := 64 - retries := 0 - for retries < 10 { - progIds := make([]uint32, size) - query := bpfAttrQuery{ - TargetFd: uint32(dirFd), - AttachType: uint32(unix.BPF_CGROUP_DEVICE), - ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))), - ProgCnt: uint32(len(progIds)), - } - - // Fetch the list of program ids. - _, _, errno := unix.Syscall(unix.SYS_BPF, - uintptr(unix.BPF_PROG_QUERY), - uintptr(unsafe.Pointer(&query)), - unsafe.Sizeof(query)) - size = int(query.ProgCnt) - runtime.KeepAlive(query) - if errno != 0 { - // On ENOSPC we get the correct number of programs. - if errno == unix.ENOSPC { - retries++ - continue - } - return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno) - } - - // Convert the ids to program handles. - progIds = progIds[:size] - programs := make([]*ebpf.Program, 0, len(progIds)) - for _, progId := range progIds { - program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId)) - if err != nil { - // We skip over programs that give us -EACCES or -EPERM. This - // is necessary because there may be BPF programs that have - // been attached (such as with --systemd-cgroup) which have an - // LSM label that blocks us from interacting with the program. - // - // Because additional BPF_CGROUP_DEVICE programs only can add - // restrictions, there's no real issue with just ignoring these - // programs (and stops runc from breaking on distributions with - // very strict SELinux policies). - if errors.Is(err, os.ErrPermission) { - logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err) - continue - } - return nil, fmt.Errorf("cannot fetch program from id: %w", err) - } - programs = append(programs, program) - } - runtime.KeepAlive(progIds) - return programs, nil - } - - return nil, errors.New("could not get complete list of CGROUP_DEVICE programs") -} - -var ( - haveBpfProgReplaceBool bool - haveBpfProgReplaceOnce sync.Once -) - -// Loosely based on the BPF_F_REPLACE support check in -// https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go. -// -// TODO: move this logic to cilium/ebpf -func haveBpfProgReplace() bool { - haveBpfProgReplaceOnce.Do(func() { - prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ - Type: ebpf.CGroupDevice, - License: "MIT", - Instructions: asm.Instructions{ - asm.Mov.Imm(asm.R0, 0), - asm.Return(), - }, - }) - if err != nil { - logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err) - return - } - defer prog.Close() - - devnull, err := os.Open("/dev/null") - if err != nil { - logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err) - return - } - defer devnull.Close() - - // We know that we have BPF_PROG_ATTACH since we can load - // BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL - // we know that the feature isn't present. - err = link.RawAttachProgram(link.RawAttachProgramOptions{ - // We rely on this fd being checked after attachFlags. - Target: int(devnull.Fd()), - // Attempt to "replace" bad fds with this program. - Program: prog, - Attach: ebpf.AttachCGroupDevice, - Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE, - }) - if errors.Is(err, unix.EINVAL) { - // not supported - return - } - // attach_flags test succeeded. - if !errors.Is(err, unix.EBADF) { - logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err) - } - haveBpfProgReplaceBool = true - }) - return haveBpfProgReplaceBool -} - -// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/ directory. -// -// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 . -// -// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92 -func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) { - // Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167). - // This limit is not inherited into the container. - memlockLimit := &unix.Rlimit{ - Cur: unix.RLIM_INFINITY, - Max: unix.RLIM_INFINITY, - } - _ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit) - - // Get the list of existing programs. - oldProgs, err := findAttachedCgroupDeviceFilters(dirFd) - if err != nil { - return nilCloser, err - } - useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1 - - // Generate new program. - spec := &ebpf.ProgramSpec{ - Type: ebpf.CGroupDevice, - Instructions: insts, - License: license, - } - prog, err := ebpf.NewProgram(spec) - if err != nil { - return nilCloser, err - } - - // If there is only one old program, we can just replace it directly. - var ( - replaceProg *ebpf.Program - attachFlags uint32 = unix.BPF_F_ALLOW_MULTI - ) - if useReplaceProg { - replaceProg = oldProgs[0] - attachFlags |= unix.BPF_F_REPLACE - } - err = link.RawAttachProgram(link.RawAttachProgramOptions{ - Target: dirFd, - Program: prog, - Replace: replaceProg, - Attach: ebpf.AttachCGroupDevice, - Flags: attachFlags, - }) - if err != nil { - return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err) - } - closer := func() error { - err = link.RawDetachProgram(link.RawDetachProgramOptions{ - Target: dirFd, - Program: prog, - Attach: ebpf.AttachCGroupDevice, - }) - if err != nil { - return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err) - } - // TODO: Should we attach the old filters back in this case? Otherwise - // we fail-open on a security feature, which is a bit scary. - return nil - } - if !useReplaceProg { - logLevel := logrus.DebugLevel - // If there was more than one old program, give a warning (since this - // really shouldn't happen with runc-managed cgroups) and then detach - // all the old programs. - if len(oldProgs) > 1 { - // NOTE: Ideally this should be a warning but it turns out that - // systemd-managed cgroups trigger this warning (apparently - // systemd doesn't delete old non-systemd programs when - // setting properties). - logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs)) - logLevel = logrus.InfoLevel - } - for idx, oldProg := range oldProgs { - // Output some extra debug info. - if info, err := oldProg.Info(); err == nil { - fields := logrus.Fields{ - "type": info.Type.String(), - "tag": info.Tag, - "name": info.Name, - } - if id, ok := info.ID(); ok { - fields["id"] = id - } - if runCount, ok := info.RunCount(); ok { - fields["run_count"] = runCount - } - if runtime, ok := info.Runtime(); ok { - fields["runtime"] = runtime.String() - } - logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx) - } - err = link.RawDetachProgram(link.RawDetachProgramOptions{ - Target: dirFd, - Program: oldProg, - Attach: ebpf.AttachCGroupDevice, - }) - if err != nil { - return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err) - } - } - } - return closer, nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go index f6e1b73bd9..16aae5a3b7 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go @@ -50,24 +50,13 @@ func WriteFile(dir, file, data string) error { return err } defer fd.Close() - if err := retryingWriteFile(fd, data); err != nil { + if _, err := fd.WriteString(data); err != nil { // Having data in the error message helps in debugging. return fmt.Errorf("failed to write %q: %w", data, err) } return nil } -func retryingWriteFile(fd *os.File, data string) error { - for { - _, err := fd.Write([]byte(data)) - if errors.Is(err, unix.EINTR) { - logrus.Infof("interrupted while writing %s to %s", data, fd.Name()) - continue - } - return err - } -} - const ( cgroupfsDir = "/sys/fs/cgroup" cgroupfsPrefix = cgroupfsDir + "/" @@ -90,7 +79,7 @@ func prepareOpenat2() error { }) if err != nil { prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} - if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare + if err != unix.ENOSYS { logrus.Warnf("falling back to securejoin: %s", prepErr) } else { logrus.Debug("openat2 not available, falling back to securejoin") @@ -148,8 +137,9 @@ func openFile(dir, file string, flags int) (*os.File, error) { // // TODO: if such usage will ever be common, amend this // to reopen cgroupRootHandle and retry openat2. - fdStr := strconv.Itoa(int(cgroupRootHandle.Fd())) - fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr) + fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd()))) + defer closer() + fdDest, _ := os.Readlink(fdPath) if fdDest != cgroupfsDir { // Wrap the error so it is clear that cgroupRootHandle // is opened to an unexpected/wrong directory. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go index bbbae4d58c..8ee49d499f 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go @@ -2,16 +2,19 @@ package fs2 import ( "bufio" + "errors" "os" "strconv" + "golang.org/x/sys/unix" + "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" ) func isCpuSet(r *configs.Resources) bool { - return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 + return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil || r.CpuBurst != nil } func setCpu(dirPath string, r *configs.Resources) error { @@ -19,6 +22,12 @@ func setCpu(dirPath string, r *configs.Resources) error { return nil } + if r.CPUIdle != nil { + if err := cgroups.WriteFile(dirPath, "cpu.idle", strconv.FormatInt(*r.CPUIdle, 10)); err != nil { + return err + } + } + // NOTE: .CpuShares is not used here. Conversion is the caller's responsibility. if r.CpuWeight != 0 { if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil { @@ -26,6 +35,23 @@ func setCpu(dirPath string, r *configs.Resources) error { } } + var burst string + if r.CpuBurst != nil { + burst = strconv.FormatUint(*r.CpuBurst, 10) + if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { + // Sometimes when the burst to be set is larger + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_burst exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } else { + burst = "" + } + } if r.CpuQuota != 0 || r.CpuPeriod != 0 { str := "max" if r.CpuQuota > 0 { @@ -41,6 +67,11 @@ func setCpu(dirPath string, r *configs.Resources) error { if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil { return err } + if burst != "" { + if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { + return err + } + } } return nil diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go index 9c949c91f0..8ac8312017 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go @@ -55,6 +55,9 @@ func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) { return filepath.Join(root, innerPath), nil } + // we don't need to use /proc/thread-self here because runc always runs + // with every thread in the same cgroup. This lets us avoid having to do + // runtime.LockOSThread. ownCgroup, err := parseCgroupFile("/proc/self/cgroup") if err != nil { return "", err diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go deleted file mode 100644 index 0d23456072..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go +++ /dev/null @@ -1,75 +0,0 @@ -package fs2 - -import ( - "fmt" - - "golang.org/x/sys/unix" - - "github.com/opencontainers/runc/libcontainer/cgroups/ebpf" - "github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/opencontainers/runc/libcontainer/userns" -) - -func isRWM(perms devices.Permissions) bool { - var r, w, m bool - for _, perm := range perms { - switch perm { - case 'r': - r = true - case 'w': - w = true - case 'm': - m = true - } - } - return r && w && m -} - -// This is similar to the logic applied in crun for handling errors from bpf(2) -// . -func canSkipEBPFError(r *configs.Resources) bool { - // If we're running in a user namespace we can ignore eBPF rules because we - // usually cannot use bpf(2), as well as rootless containers usually don't - // have the necessary privileges to mknod(2) device inodes or access - // host-level instances (though ideally we would be blocking device access - // for rootless containers anyway). - if userns.RunningInUserNS() { - return true - } - - // We cannot ignore an eBPF load error if any rule if is a block rule or it - // doesn't permit all access modes. - // - // NOTE: This will sometimes trigger in cases where access modes are split - // between different rules but to handle this correctly would require - // using ".../libcontainer/cgroup/devices".Emulator. - for _, dev := range r.Devices { - if !dev.Allow || !isRWM(dev.Permissions) { - return false - } - } - return true -} - -func setDevices(dirPath string, r *configs.Resources) error { - if r.SkipDevices { - return nil - } - insts, license, err := devicefilter.DeviceFilter(r.Devices) - if err != nil { - return err - } - dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600) - if err != nil { - return fmt.Errorf("cannot get dir FD for %s", dirPath) - } - defer unix.Close(dirFD) - if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { - if !canSkipEBPFError(r) { - return err - } - } - return nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go index 492778e310..0760be74b9 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go @@ -13,7 +13,7 @@ import ( type parseError = fscommon.ParseError -type manager struct { +type Manager struct { config *configs.Cgroup // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" dirPath string @@ -25,7 +25,7 @@ type manager struct { // NewManager creates a manager for cgroup v2 unified hierarchy. // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope". // If dirPath is empty, it is automatically set using config. -func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) { +func NewManager(config *configs.Cgroup, dirPath string) (*Manager, error) { if dirPath == "" { var err error dirPath, err = defaultDirPath(config) @@ -34,14 +34,14 @@ func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) } } - m := &manager{ + m := &Manager{ config: config, dirPath: dirPath, } return m, nil } -func (m *manager) getControllers() error { +func (m *Manager) getControllers() error { if m.controllers != nil { return nil } @@ -62,7 +62,7 @@ func (m *manager) getControllers() error { return nil } -func (m *manager) Apply(pid int) error { +func (m *Manager) Apply(pid int) error { if err := CreateCgroupPath(m.dirPath, m.config); err != nil { // Related tests: // - "runc create (no limits + no cgrouppath + no permission) succeeds" @@ -84,15 +84,15 @@ func (m *manager) Apply(pid int) error { return nil } -func (m *manager) GetPids() ([]int, error) { +func (m *Manager) GetPids() ([]int, error) { return cgroups.GetPids(m.dirPath) } -func (m *manager) GetAllPids() ([]int, error) { +func (m *Manager) GetAllPids() ([]int, error) { return cgroups.GetAllPids(m.dirPath) } -func (m *manager) GetStats() (*cgroups.Stats, error) { +func (m *Manager) GetStats() (*cgroups.Stats, error) { var errs []error st := cgroups.NewStats() @@ -114,6 +114,17 @@ func (m *manager) GetStats() (*cgroups.Stats, error) { if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { errs = append(errs, err) } + // PSI (since kernel 4.20). + var err error + if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { + errs = append(errs, err) + } + if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { + errs = append(errs, err) + } + if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { + errs = append(errs, err) + } // hugetlb (since kernel 5.6) if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { errs = append(errs, err) @@ -122,13 +133,17 @@ func (m *manager) GetStats() (*cgroups.Stats, error) { if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { errs = append(errs, err) } + // misc (since kernel 5.13) + if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } if len(errs) > 0 && !m.config.Rootless { return st, fmt.Errorf("error while statting cgroup v2: %+v", errs) } return st, nil } -func (m *manager) Freeze(state configs.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { if m.config.Resources == nil { return errors.New("cannot toggle freezer: cgroups not configured for container") } @@ -139,15 +154,15 @@ func (m *manager) Freeze(state configs.FreezerState) error { return nil } -func (m *manager) Destroy() error { +func (m *Manager) Destroy() error { return cgroups.RemovePath(m.dirPath) } -func (m *manager) Path(_ string) string { +func (m *Manager) Path(_ string) string { return m.dirPath } -func (m *manager) Set(r *configs.Resources) error { +func (m *Manager) Set(r *configs.Resources) error { if r == nil { return nil } @@ -175,8 +190,10 @@ func (m *manager) Set(r *configs.Resources) error { // When rootless is true, errors from the device subsystem are ignored because it is really not expected to work. // However, errors from other subsystems are not ignored. // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" - if err := setDevices(m.dirPath, r); err != nil && !m.config.Rootless { - return err + if err := setDevices(m.dirPath, r); err != nil { + if !m.config.Rootless || errors.Is(err, cgroups.ErrDevicesUnsupported) { + return err + } } // cpuset (since kernel 5.0) if err := setCpuset(m.dirPath, r); err != nil { @@ -201,7 +218,17 @@ func (m *manager) Set(r *configs.Resources) error { return nil } -func (m *manager) setUnified(res map[string]string) error { +func setDevices(dirPath string, r *configs.Resources) error { + if cgroups.DevicesSetV2 == nil { + if len(r.Devices) > 0 { + return cgroups.ErrDevicesUnsupported + } + return nil + } + return cgroups.DevicesSetV2(dirPath, r) +} + +func (m *Manager) setUnified(res map[string]string) error { for k, v := range res { if strings.Contains(k, "/") { return fmt.Errorf("unified resource %q must be a file name (no slashes)", k) @@ -227,21 +254,21 @@ func (m *manager) setUnified(res map[string]string) error { return nil } -func (m *manager) GetPaths() map[string]string { +func (m *Manager) GetPaths() map[string]string { paths := make(map[string]string, 1) paths[""] = m.dirPath return paths } -func (m *manager) GetCgroups() (*configs.Cgroup, error) { +func (m *Manager) GetCgroups() (*configs.Cgroup, error) { return m.config, nil } -func (m *manager) GetFreezerState() (configs.FreezerState, error) { +func (m *Manager) GetFreezerState() (configs.FreezerState, error) { return getFreezer(m.dirPath) } -func (m *manager) Exists() bool { +func (m *Manager) Exists() bool { return cgroups.PathExists(m.dirPath) } @@ -249,7 +276,7 @@ func OOMKillCount(path string) (uint64, error) { return fscommon.GetValueByKey(path, "memory.events", "oom_kill") } -func (m *manager) OOMKillCount() (uint64, error) { +func (m *Manager) OOMKillCount() (uint64, error) { c, err := OOMKillCount(m.dirPath) if err != nil && m.config.Rootless && os.IsNotExist(err) { err = nil @@ -257,3 +284,35 @@ func (m *manager) OOMKillCount() (uint64, error) { return c, err } + +func CheckMemoryUsage(dirPath string, r *configs.Resources) error { + if !r.MemoryCheckBeforeUpdate { + return nil + } + + if r.Memory <= 0 && r.MemorySwap <= 0 { + return nil + } + + usage, err := fscommon.GetCgroupParamUint(dirPath, "memory.current") + if err != nil { + // This check is on best-effort basis, so if we can't read the + // current usage (cgroup not yet created, or any other error), + // we should not fail. + return nil + } + + if r.MemorySwap > 0 { + if uint64(r.MemorySwap) <= usage { + return fmt.Errorf("rejecting memory+swap limit %d <= usage %d", r.MemorySwap, usage) + } + } + + if r.Memory > 0 { + if uint64(r.Memory) <= usage { + return fmt.Errorf("rejecting memory limit %d <= usage %d", r.Memory, usage) + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go index 01fe7d8e12..2965659742 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go @@ -40,6 +40,11 @@ func setMemory(dirPath string, r *configs.Resources) error { if !isMemorySet(r) { return nil } + + if err := CheckMemoryUsage(dirPath, r); err != nil { + return err + } + swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) if err != nil { return err diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go new file mode 100644 index 0000000000..f0b292aa01 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go @@ -0,0 +1,52 @@ +package fs2 + +import ( + "bufio" + "os" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" +) + +func statMisc(dirPath string, stats *cgroups.Stats) error { + for _, file := range []string{"current", "events"} { + fd, err := cgroups.OpenFile(dirPath, "misc."+file, os.O_RDONLY) + if err != nil { + return err + } + + s := bufio.NewScanner(fd) + for s.Scan() { + key, value, err := fscommon.ParseKeyValue(s.Text()) + if err != nil { + fd.Close() + return err + } + + key = strings.TrimSuffix(key, ".max") + + if _, ok := stats.MiscStats[key]; !ok { + stats.MiscStats[key] = cgroups.MiscStats{} + } + + tmp := stats.MiscStats[key] + + switch file { + case "current": + tmp.Usage = value + case "events": + tmp.Events = value + } + + stats.MiscStats[key] = tmp + } + fd.Close() + + if err := s.Err(); err != nil { + return err + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go new file mode 100644 index 0000000000..09f3488851 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go @@ -0,0 +1,89 @@ +package fs2 + +import ( + "bufio" + "errors" + "fmt" + "os" + "strconv" + "strings" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/cgroups" +) + +func statPSI(dirPath string, file string) (*cgroups.PSIStats, error) { + f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + // Kernel < 4.20, or CONFIG_PSI is not set, + // or PSI stats are turned off for the cgroup + // ("echo 0 > cgroup.pressure", kernel >= 6.1). + return nil, nil + } + return nil, err + } + defer f.Close() + + var psistats cgroups.PSIStats + sc := bufio.NewScanner(f) + for sc.Scan() { + parts := strings.Fields(sc.Text()) + var pv *cgroups.PSIData + switch parts[0] { + case "some": + pv = &psistats.Some + case "full": + pv = &psistats.Full + } + if pv != nil { + *pv, err = parsePSIData(parts[1:]) + if err != nil { + return nil, &parseError{Path: dirPath, File: file, Err: err} + } + } + } + if err := sc.Err(); err != nil { + if errors.Is(err, unix.ENOTSUP) { + // Some kernels (e.g. CS9) may return ENOTSUP on read + // if psi=1 kernel cmdline parameter is required. + return nil, nil + } + return nil, &parseError{Path: dirPath, File: file, Err: err} + } + return &psistats, nil +} + +func parsePSIData(psi []string) (cgroups.PSIData, error) { + data := cgroups.PSIData{} + for _, f := range psi { + kv := strings.SplitN(f, "=", 2) + if len(kv) != 2 { + return data, fmt.Errorf("invalid psi data: %q", f) + } + var pv *float64 + switch kv[0] { + case "avg10": + pv = &data.Avg10 + case "avg60": + pv = &data.Avg60 + case "avg300": + pv = &data.Avg300 + case "total": + v, err := strconv.ParseUint(kv[1], 10, 64) + if err != nil { + return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err) + } + data.Total = v + } + if pv != nil { + v, err := strconv.ParseFloat(kv[1], 64) + if err != nil { + return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err) + } + *pv = v + } + } + return data, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go index 0d8371b05f..b475567d82 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go @@ -32,9 +32,22 @@ type CpuUsage struct { UsageInUsermode uint64 `json:"usage_in_usermode"` } +type PSIData struct { + Avg10 float64 `json:"avg10"` + Avg60 float64 `json:"avg60"` + Avg300 float64 `json:"avg300"` + Total uint64 `json:"total"` +} + +type PSIStats struct { + Some PSIData `json:"some,omitempty"` + Full PSIData `json:"full,omitempty"` +} + type CpuStats struct { CpuUsage CpuUsage `json:"cpu_usage,omitempty"` ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` } type CPUSetStats struct { @@ -91,6 +104,7 @@ type MemoryStats struct { UseHierarchy bool `json:"use_hierarchy"` Stats map[string]uint64 `json:"stats,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` } type PageUsageByNUMA struct { @@ -135,6 +149,7 @@ type BlkioStats struct { IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` } type HugetlbStats struct { @@ -157,6 +172,13 @@ type RdmaStats struct { RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"` } +type MiscStats struct { + // current resource usage for a key in misc + Usage uint64 `json:"usage,omitempty"` + // number of times the resource usage was about to go over the max boundary + Events uint64 `json:"events,omitempty"` +} + type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` @@ -166,10 +188,13 @@ type Stats struct { // the map is in the format "size of hugepage: stats of the hugepage" HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` RdmaStats RdmaStats `json:"rdma_stats,omitempty"` + // the map is in the format "misc resource name: stats of the key" + MiscStats map[string]MiscStats `json:"misc_stats,omitempty"` } func NewStats() *Stats { memoryStats := MemoryStats{Stats: make(map[string]uint64)} hugetlbStats := make(map[string]HugetlbStats) - return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} + miscStats := make(map[string]MiscStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats} } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index fc4ae44a48..186cbc6413 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -36,13 +36,13 @@ func IsCgroup2UnifiedMode() bool { var st unix.Statfs_t err := unix.Statfs(unifiedMountpoint, &st) if err != nil { + level := logrus.WarnLevel if os.IsNotExist(err) && userns.RunningInUserNS() { - // ignore the "not found" error if running in userns - logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint) - isUnified = false - return + // For rootless containers, sweep it under the rug. + level = logrus.DebugLevel } - panic(fmt.Sprintf("cannot statfs cgroup root: %s", err)) + logrus.StandardLogger().Logf(level, + "statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err) } isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC }) @@ -217,21 +217,26 @@ func PathExists(path string) bool { return true } -func EnterPid(cgroupPaths map[string]string, pid int) error { - for _, path := range cgroupPaths { - if PathExists(path) { - if err := WriteCgroupProc(path, pid); err != nil { - return err - } - } - } - return nil -} +// rmdir tries to remove a directory, optionally retrying on EBUSY. +func rmdir(path string, retry bool) error { + delay := time.Millisecond + tries := 10 -func rmdir(path string) error { +again: err := unix.Rmdir(path) - if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare + switch err { // nolint:errorlint // unix errors are bare + case nil, unix.ENOENT: return nil + case unix.EINTR: + goto again + case unix.EBUSY: + if retry && tries > 0 { + time.Sleep(delay) + delay *= 2 + tries-- + goto again + + } } return &os.PathError{Op: "rmdir", Path: path, Err: err} } @@ -239,68 +244,42 @@ func rmdir(path string) error { // RemovePath aims to remove cgroup path. It does so recursively, // by removing any subdirectories (sub-cgroups) first. func RemovePath(path string) error { - // try the fast path first - if err := rmdir(path); err == nil { + // Try the fast path first. + if err := rmdir(path, false); err == nil { return nil } infos, err := os.ReadDir(path) - if err != nil { - if os.IsNotExist(err) { - err = nil - } + if err != nil && !os.IsNotExist(err) { return err } for _, info := range infos { if info.IsDir() { - // We should remove subcgroups dir first + // We should remove subcgroup first. if err = RemovePath(filepath.Join(path, info.Name())); err != nil { break } } } if err == nil { - err = rmdir(path) + err = rmdir(path, true) } return err } // RemovePaths iterates over the provided paths removing them. -// We trying to remove all paths five times with increasing delay between tries. -// If after all there are not removed cgroups - appropriate error will be -// returned. func RemovePaths(paths map[string]string) (err error) { - const retries = 5 - delay := 10 * time.Millisecond - for i := 0; i < retries; i++ { - if i != 0 { - time.Sleep(delay) - delay *= 2 - } - for s, p := range paths { - if err := RemovePath(p); err != nil { - // do not log intermediate iterations - switch i { - case 0: - logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)") - case retries - 1: - logrus.WithError(err).Error("Failed to remove cgroup") - } - } - _, err := os.Stat(p) - // We need this strange way of checking cgroups existence because - // RemoveAll almost always returns error, even on already removed - // cgroups - if os.IsNotExist(err) { - delete(paths, s) - } - } - if len(paths) == 0 { - //nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506 - paths = make(map[string]string) - return nil + for s, p := range paths { + if err := RemovePath(p); err == nil { + delete(paths, s) } } + if len(paths) == 0 { + //nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506 + // TODO: switch to clear once Go < 1.21 is not supported. + paths = make(map[string]string) + return nil + } return fmt.Errorf("Failed to remove paths: %v", paths) } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go index 47c75f22b4..81193e2098 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go @@ -99,11 +99,12 @@ func tryDefaultPath(cgroupPath, subsystem string) string { // expensive), so it is assumed that cgroup mounts are not being changed. func readCgroupMountinfo() ([]*mountinfo.Info, error) { readMountinfoOnce.Do(func() { + // mountinfo.GetMounts uses /proc/thread-self, so we can use it without + // issues. cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( mountinfo.FSTypeFilter("cgroup"), ) }) - return cgroupMountinfo, readMountinfoErr } @@ -196,6 +197,9 @@ func getCgroupMountsV1(all bool) ([]Mount, error) { return nil, err } + // We don't need to use /proc/thread-self here because runc always runs + // with every thread in the same cgroup. This lets us avoid having to do + // runtime.LockOSThread. allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { return nil, err @@ -214,6 +218,10 @@ func GetOwnCgroup(subsystem string) (string, error) { if IsCgroup2UnifiedMode() { return "", errUnified } + + // We don't need to use /proc/thread-self here because runc always runs + // with every thread in the same cgroup. This lets us avoid having to do + // runtime.LockOSThread. cgroups, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { return "", err @@ -236,27 +244,6 @@ func GetOwnCgroupPath(subsystem string) (string, error) { return getCgroupPathHelper(subsystem, cgroup) } -func GetInitCgroup(subsystem string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - cgroups, err := ParseCgroupFile("/proc/1/cgroup") - if err != nil { - return "", err - } - - return getControllerPath(subsystem, cgroups) -} - -func GetInitCgroupPath(subsystem string) (string, error) { - cgroup, err := GetInitCgroup(subsystem) - if err != nil { - return "", err - } - - return getCgroupPathHelper(subsystem, cgroup) -} - func getCgroupPathHelper(subsystem, cgroup string) (string, error) { mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) if err != nil { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go index fa195bf90f..865344f99c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go @@ -2,8 +2,8 @@ package configs import "fmt" -// blockIODevice holds major:minor format supported in blkio cgroup -type blockIODevice struct { +// BlockIODevice holds major:minor format supported in blkio cgroup. +type BlockIODevice struct { // Major is the device's major number Major int64 `json:"major"` // Minor is the device's minor number @@ -12,7 +12,7 @@ type blockIODevice struct { // WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair type WeightDevice struct { - blockIODevice + BlockIODevice // Weight is the bandwidth rate for the device, range is from 10 to 1000 Weight uint16 `json:"weight"` // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only @@ -41,7 +41,7 @@ func (wd *WeightDevice) LeafWeightString() string { // ThrottleDevice struct holds a `major:minor rate_per_second` pair type ThrottleDevice struct { - blockIODevice + BlockIODevice // Rate is the IO rate limit per cgroup per device Rate uint64 `json:"rate"` } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go index 2d4a898710..4a34cf76fc 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go @@ -69,6 +69,9 @@ type Resources struct { // CPU hardcap limit (in usecs). Allowed cpu time in a given period. CpuQuota int64 `json:"cpu_quota"` + // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. + CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive + // CPU period to be used for hardcapping (in usecs). 0 to use system default. CpuPeriod uint64 `json:"cpu_period"` @@ -84,6 +87,9 @@ type Resources struct { // MEM to use CpusetMems string `json:"cpuset_mems"` + // cgroup SCHED_IDLE + CPUIdle *int64 `json:"cpu_idle,omitempty"` + // Process limit; set <= `0' to disable limit. PidsLimit int64 `json:"pids_limit"` @@ -155,4 +161,9 @@ type Resources struct { // during Set() to figure out whether the freeze is required. Those // methods may be relatively slow, thus this flag. SkipFreezeOnSet bool `json:"-"` + + // MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check + // if the new memory limits (Memory and MemorySwap) being set are lower + // than the current memory usage, and reject if so. + MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"` } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index 6ebf5ec7b6..22fe0f9b4c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -8,6 +8,7 @@ import ( "time" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runtime-spec/specs-go" @@ -31,12 +32,13 @@ type IDMap struct { // for syscalls. Additional architectures can be added by specifying them in // Architectures. type Seccomp struct { - DefaultAction Action `json:"default_action"` - Architectures []string `json:"architectures"` - Syscalls []*Syscall `json:"syscalls"` - DefaultErrnoRet *uint `json:"default_errno_ret"` - ListenerPath string `json:"listener_path,omitempty"` - ListenerMetadata string `json:"listener_metadata,omitempty"` + DefaultAction Action `json:"default_action"` + Architectures []string `json:"architectures"` + Flags []specs.LinuxSeccompFlag `json:"flags"` + Syscalls []*Syscall `json:"syscalls"` + DefaultErrnoRet *uint `json:"default_errno_ret"` + ListenerPath string `json:"listener_path,omitempty"` + ListenerMetadata string `json:"listener_metadata,omitempty"` } // Action is taken upon rule match in Seccomp @@ -83,9 +85,6 @@ type Syscall struct { Args []*Arg `json:"args"` } -// TODO Windows. Many of these fields should be factored out into those parts -// which are common across platforms, and those which are platform specific. - // Config defines configuration options for executing a process inside a contained environment. type Config struct { // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs @@ -121,6 +120,9 @@ type Config struct { // Hostname optionally sets the container's hostname if provided Hostname string `json:"hostname"` + // Domainname optionally sets the container's domainname if provided + Domainname string `json:"domainname"` + // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process Namespaces Namespaces `json:"namespaces"` @@ -158,11 +160,11 @@ type Config struct { // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ OomScoreAdj *int `json:"oom_score_adj,omitempty"` - // UidMappings is an array of User ID mappings for User Namespaces - UidMappings []IDMap `json:"uid_mappings"` + // UIDMappings is an array of User ID mappings for User Namespaces + UIDMappings []IDMap `json:"uid_mappings"` - // GidMappings is an array of Group ID mappings for User Namespaces - GidMappings []IDMap `json:"gid_mappings"` + // GIDMappings is an array of Group ID mappings for User Namespaces + GIDMappings []IDMap `json:"gid_mappings"` // MaskPaths specifies paths within the container's rootfs to mask over with a bind // mount pointing to /dev/null as to prevent reads of the file. @@ -211,8 +213,87 @@ type Config struct { // RootlessCgroups is set when unlikely to have the full access to cgroups. // When RootlessCgroups is set, cgroups errors are ignored. RootlessCgroups bool `json:"rootless_cgroups,omitempty"` + + // TimeOffsets specifies the offset for supporting time namespaces. + TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"` + + // Scheduler represents the scheduling attributes for a process. + Scheduler *Scheduler `json:"scheduler,omitempty"` + + // Personality contains configuration for the Linux personality syscall. + Personality *LinuxPersonality `json:"personality,omitempty"` + + // IOPriority is the container's I/O priority. + IOPriority *IOPriority `json:"io_priority,omitempty"` } +// Scheduler is based on the Linux sched_setattr(2) syscall. +type Scheduler = specs.Scheduler + +// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr. +func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) { + var policy uint32 + switch scheduler.Policy { + case specs.SchedOther: + policy = 0 + case specs.SchedFIFO: + policy = 1 + case specs.SchedRR: + policy = 2 + case specs.SchedBatch: + policy = 3 + case specs.SchedISO: + policy = 4 + case specs.SchedIdle: + policy = 5 + case specs.SchedDeadline: + policy = 6 + default: + return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy) + } + + var flags uint64 + for _, flag := range scheduler.Flags { + switch flag { + case specs.SchedFlagResetOnFork: + flags |= 0x01 + case specs.SchedFlagReclaim: + flags |= 0x02 + case specs.SchedFlagDLOverrun: + flags |= 0x04 + case specs.SchedFlagKeepPolicy: + flags |= 0x08 + case specs.SchedFlagKeepParams: + flags |= 0x10 + case specs.SchedFlagUtilClampMin: + flags |= 0x20 + case specs.SchedFlagUtilClampMax: + flags |= 0x40 + default: + return nil, fmt.Errorf("invalid scheduler flag: %s", flag) + } + } + + return &unix.SchedAttr{ + Size: unix.SizeofSchedAttr, + Policy: policy, + Flags: flags, + Nice: scheduler.Nice, + Priority: uint32(scheduler.Priority), + Runtime: scheduler.Runtime, + Deadline: scheduler.Deadline, + Period: scheduler.Period, + }, nil +} + +var IOPrioClassMapping = map[specs.IOPriorityClass]int{ + specs.IOPRIO_CLASS_RT: 1, + specs.IOPRIO_CLASS_BE: 2, + specs.IOPRIO_CLASS_IDLE: 3, +} + +type IOPriority = specs.LinuxIOPriority + type ( HookName string HookList []Hook @@ -277,6 +358,7 @@ type Capabilities struct { Ambient []string } +// Deprecated: use (Hooks).Run instead. func (hooks HookList) RunHooks(state *specs.State) error { for i, h := range hooks { if err := h.Run(state); err != nil { @@ -333,6 +415,18 @@ func (hooks *Hooks) MarshalJSON() ([]byte, error) { }) } +// Run executes all hooks for the given hook name. +func (hooks Hooks) Run(name HookName, state *specs.State) error { + list := hooks[name] + for i, h := range list { + if err := h.Run(state); err != nil { + return fmt.Errorf("error running %s hook #%d: %w", name, i, err) + } + } + + return nil +} + type Hook interface { // Run executes the hook with the provided state. Run(*specs.State) error @@ -393,7 +487,7 @@ func (c Command) Run(s *specs.State) error { go func() { err := cmd.Wait() if err != nil { - err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) + err = fmt.Errorf("%w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) } errC <- err }() diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go index 51fe940748..e401f5331b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go @@ -7,22 +7,33 @@ import ( ) var ( - errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.") - errNoUserMap = errors.New("User namespaces enabled, but no user mapping found.") - errNoGIDMap = errors.New("User namespaces enabled, but no gid mappings found.") - errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.") + errNoUIDMap = errors.New("user namespaces enabled, but no uid mappings found") + errNoGIDMap = errors.New("user namespaces enabled, but no gid mappings found") ) +// Please check https://man7.org/linux/man-pages/man2/personality.2.html for const details. +// https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/personality.h +const ( + PerLinux = 0x0000 + PerLinux32 = 0x0008 +) + +type LinuxPersonality struct { + // Domain for the personality + // can only contain values "LINUX" and "LINUX32" + Domain int `json:"domain"` +} + // HostUID gets the translated uid for the process on host which could be // different when user namespaces are enabled. func (c Config) HostUID(containerId int) (int, error) { if c.Namespaces.Contains(NEWUSER) { - if c.UidMappings == nil { + if len(c.UIDMappings) == 0 { return -1, errNoUIDMap } - id, found := c.hostIDFromMapping(int64(containerId), c.UidMappings) + id, found := c.hostIDFromMapping(int64(containerId), c.UIDMappings) if !found { - return -1, errNoUserMap + return -1, fmt.Errorf("user namespaces enabled, but no mapping found for uid %d", containerId) } // If we are a 32-bit binary running on a 64-bit system, it's possible // the mapped user is too large to store in an int, which means we @@ -47,12 +58,12 @@ func (c Config) HostRootUID() (int, error) { // different when user namespaces are enabled. func (c Config) HostGID(containerId int) (int, error) { if c.Namespaces.Contains(NEWUSER) { - if c.GidMappings == nil { + if len(c.GIDMappings) == 0 { return -1, errNoGIDMap } - id, found := c.hostIDFromMapping(int64(containerId), c.GidMappings) + id, found := c.hostIDFromMapping(int64(containerId), c.GIDMappings) if !found { - return -1, errNoGroupMap + return -1, fmt.Errorf("user namespaces enabled, but no mapping found for gid %d", containerId) } // If we are a 32-bit binary running on a 64-bit system, it's possible // the mapped user is too large to store in an int, which means we diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go index 784c618205..bfd356e497 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -1,48 +1,7 @@ package configs -import "golang.org/x/sys/unix" - const ( // EXT_COPYUP is a directive to copy up the contents of a directory when // a tmpfs is mounted over it. - EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning + EXT_COPYUP = 1 << iota //nolint:golint,revive // ignore "don't use ALL_CAPS" warning ) - -type Mount struct { - // Source path for the mount. - Source string `json:"source"` - - // Destination path for the mount inside the container. - Destination string `json:"destination"` - - // Device the mount is for. - Device string `json:"device"` - - // Mount flags. - Flags int `json:"flags"` - - // Propagation Flags - PropagationFlags []int `json:"propagation_flags"` - - // Mount data applied to the mount. - Data string `json:"data"` - - // Relabel source if set, "z" indicates shared, "Z" indicates unshared. - Relabel string `json:"relabel"` - - // RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). - RecAttr *unix.MountAttr `json:"rec_attr"` - - // Extensions are additional flags that are specific to runc. - Extensions int `json:"extensions"` - - // Optional Command to be run before Source is mounted. - PremountCmds []Command `json:"premount_cmds"` - - // Optional Command to be run after Source is mounted. - PostmountCmds []Command `json:"postmount_cmds"` -} - -func (m *Mount) IsBind() bool { - return m.Flags&unix.MS_BIND != 0 -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go new file mode 100644 index 0000000000..b69e9ab238 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go @@ -0,0 +1,66 @@ +package configs + +import "golang.org/x/sys/unix" + +type MountIDMapping struct { + // Recursive indicates if the mapping needs to be recursive. + Recursive bool `json:"recursive"` + + // UserNSPath is a path to a user namespace that indicates the necessary + // id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and + // GIDMappings must be set to nil. + UserNSPath string `json:"userns_path,omitempty"` + + // UIDMappings is the uid mapping set for this mount, to be used with + // MOUNT_ATTR_IDMAP. + UIDMappings []IDMap `json:"uid_mappings,omitempty"` + + // GIDMappings is the gid mapping set for this mount, to be used with + // MOUNT_ATTR_IDMAP. + GIDMappings []IDMap `json:"gid_mappings,omitempty"` +} + +type Mount struct { + // Source path for the mount. + Source string `json:"source"` + + // Destination path for the mount inside the container. + Destination string `json:"destination"` + + // Device the mount is for. + Device string `json:"device"` + + // Mount flags. + Flags int `json:"flags"` + + // Mount flags that were explicitly cleared in the configuration (meaning + // the user explicitly requested that these flags *not* be set). + ClearedFlags int `json:"cleared_flags"` + + // Propagation Flags + PropagationFlags []int `json:"propagation_flags"` + + // Mount data applied to the mount. + Data string `json:"data"` + + // Relabel source if set, "z" indicates shared, "Z" indicates unshared. + Relabel string `json:"relabel"` + + // RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). + RecAttr *unix.MountAttr `json:"rec_attr"` + + // Extensions are additional flags that are specific to runc. + Extensions int `json:"extensions"` + + // Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil, + // the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings. + IDMapping *MountIDMapping `json:"id_mapping,omitempty"` +} + +func (m *Mount) IsBind() bool { + return m.Flags&unix.MS_BIND != 0 +} + +func (m *Mount) IsIDMapped() bool { + return m.IDMapping != nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go new file mode 100644 index 0000000000..2154191215 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go @@ -0,0 +1,10 @@ +//go:build !linux +// +build !linux + +package configs + +type Mount struct{} + +func (m *Mount) IsBind() bool { + return false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go index d52d6fcd14..898f96fd0f 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go @@ -14,6 +14,7 @@ const ( NEWIPC NamespaceType = "NEWIPC" NEWUSER NamespaceType = "NEWUSER" NEWCGROUP NamespaceType = "NEWCGROUP" + NEWTIME NamespaceType = "NEWTIME" ) var ( @@ -38,6 +39,8 @@ func NsName(ns NamespaceType) string { return "uts" case NEWCGROUP: return "cgroup" + case NEWTIME: + return "time" } return "" } @@ -56,6 +59,9 @@ func IsNamespaceSupported(ns NamespaceType) bool { if nsFile == "" { return false } + // We don't need to use /proc/thread-self here because the list of + // namespace types is unrelated to the thread. This lets us avoid having to + // do runtime.LockOSThread. _, err := os.Stat("/proc/self/ns/" + nsFile) // a namespace is supported if it exists and we have permissions to read it supported = err == nil @@ -72,6 +78,7 @@ func NamespaceTypes() []NamespaceType { NEWPID, NEWNS, NEWCGROUP, + NEWTIME, } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go index 0516dba8d0..15d8046f3d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -17,6 +17,7 @@ var namespaceInfo = map[NamespaceType]int{ NEWUTS: unix.CLONE_NEWUTS, NEWPID: unix.CLONE_NEWPID, NEWCGROUP: unix.CLONE_NEWCGROUP, + NEWTIME: unix.CLONE_NEWTIME, } // CloneFlags parses the container's Namespaces options to set the correct @@ -31,3 +32,15 @@ func (n *Namespaces) CloneFlags() uintptr { } return uintptr(flag) } + +// IsPrivate tells whether the namespace of type t is configured as private +// (i.e. it exists and is not shared). +func (n Namespaces) IsPrivate(t NamespaceType) bool { + for _, v := range n { + if v.Type == t { + return v.Path == "" + } + } + // Not found, so implicitly sharing a parent namespace. + return false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go deleted file mode 100644 index f95c1409fc..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go +++ /dev/null @@ -1,157 +0,0 @@ -//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris -// +build darwin dragonfly freebsd linux netbsd openbsd solaris - -package user - -import ( - "io" - "os" - "strconv" - - "golang.org/x/sys/unix" -) - -// Unix-specific path to the passwd and group formatted files. -const ( - unixPasswdPath = "/etc/passwd" - unixGroupPath = "/etc/group" -) - -// LookupUser looks up a user by their username in /etc/passwd. If the user -// cannot be found (or there is no /etc/passwd file on the filesystem), then -// LookupUser returns an error. -func LookupUser(username string) (User, error) { - return lookupUserFunc(func(u User) bool { - return u.Name == username - }) -} - -// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot -// be found (or there is no /etc/passwd file on the filesystem), then LookupId -// returns an error. -func LookupUid(uid int) (User, error) { - return lookupUserFunc(func(u User) bool { - return u.Uid == uid - }) -} - -func lookupUserFunc(filter func(u User) bool) (User, error) { - // Get operating system-specific passwd reader-closer. - passwd, err := GetPasswd() - if err != nil { - return User{}, err - } - defer passwd.Close() - - // Get the users. - users, err := ParsePasswdFilter(passwd, filter) - if err != nil { - return User{}, err - } - - // No user entries found. - if len(users) == 0 { - return User{}, ErrNoPasswdEntries - } - - // Assume the first entry is the "correct" one. - return users[0], nil -} - -// LookupGroup looks up a group by its name in /etc/group. If the group cannot -// be found (or there is no /etc/group file on the filesystem), then LookupGroup -// returns an error. -func LookupGroup(groupname string) (Group, error) { - return lookupGroupFunc(func(g Group) bool { - return g.Name == groupname - }) -} - -// LookupGid looks up a group by its group id in /etc/group. If the group cannot -// be found (or there is no /etc/group file on the filesystem), then LookupGid -// returns an error. -func LookupGid(gid int) (Group, error) { - return lookupGroupFunc(func(g Group) bool { - return g.Gid == gid - }) -} - -func lookupGroupFunc(filter func(g Group) bool) (Group, error) { - // Get operating system-specific group reader-closer. - group, err := GetGroup() - if err != nil { - return Group{}, err - } - defer group.Close() - - // Get the users. - groups, err := ParseGroupFilter(group, filter) - if err != nil { - return Group{}, err - } - - // No user entries found. - if len(groups) == 0 { - return Group{}, ErrNoGroupEntries - } - - // Assume the first entry is the "correct" one. - return groups[0], nil -} - -func GetPasswdPath() (string, error) { - return unixPasswdPath, nil -} - -func GetPasswd() (io.ReadCloser, error) { - return os.Open(unixPasswdPath) -} - -func GetGroupPath() (string, error) { - return unixGroupPath, nil -} - -func GetGroup() (io.ReadCloser, error) { - return os.Open(unixGroupPath) -} - -// CurrentUser looks up the current user by their user id in /etc/passwd. If the -// user cannot be found (or there is no /etc/passwd file on the filesystem), -// then CurrentUser returns an error. -func CurrentUser() (User, error) { - return LookupUid(unix.Getuid()) -} - -// CurrentGroup looks up the current user's group by their primary group id's -// entry in /etc/passwd. If the group cannot be found (or there is no -// /etc/group file on the filesystem), then CurrentGroup returns an error. -func CurrentGroup() (Group, error) { - return LookupGid(unix.Getgid()) -} - -func currentUserSubIDs(fileName string) ([]SubID, error) { - u, err := CurrentUser() - if err != nil { - return nil, err - } - filter := func(entry SubID) bool { - return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid) - } - return ParseSubIDFileFilter(fileName, filter) -} - -func CurrentUserSubUIDs() ([]SubID, error) { - return currentUserSubIDs("/etc/subuid") -} - -func CurrentUserSubGIDs() ([]SubID, error) { - return currentUserSubIDs("/etc/subgid") -} - -func CurrentProcessUIDMap() ([]IDMap, error) { - return ParseIDMapFile("/proc/self/uid_map") -} - -func CurrentProcessGIDMap() ([]IDMap, error) { - return ParseIDMapFile("/proc/self/gid_map") -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go deleted file mode 100644 index 984466d1ab..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go +++ /dev/null @@ -1,605 +0,0 @@ -package user - -import ( - "bufio" - "bytes" - "errors" - "fmt" - "io" - "os" - "strconv" - "strings" -) - -const ( - minID = 0 - maxID = 1<<31 - 1 // for 32-bit systems compatibility -) - -var ( - // ErrNoPasswdEntries is returned if no matching entries were found in /etc/group. - ErrNoPasswdEntries = errors.New("no matching entries in passwd file") - // ErrNoGroupEntries is returned if no matching entries were found in /etc/passwd. - ErrNoGroupEntries = errors.New("no matching entries in group file") - // ErrRange is returned if a UID or GID is outside of the valid range. - ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minID, maxID) -) - -type User struct { - Name string - Pass string - Uid int - Gid int - Gecos string - Home string - Shell string -} - -type Group struct { - Name string - Pass string - Gid int - List []string -} - -// SubID represents an entry in /etc/sub{u,g}id -type SubID struct { - Name string - SubID int64 - Count int64 -} - -// IDMap represents an entry in /proc/PID/{u,g}id_map -type IDMap struct { - ID int64 - ParentID int64 - Count int64 -} - -func parseLine(line []byte, v ...interface{}) { - parseParts(bytes.Split(line, []byte(":")), v...) -} - -func parseParts(parts [][]byte, v ...interface{}) { - if len(parts) == 0 { - return - } - - for i, p := range parts { - // Ignore cases where we don't have enough fields to populate the arguments. - // Some configuration files like to misbehave. - if len(v) <= i { - break - } - - // Use the type of the argument to figure out how to parse it, scanf() style. - // This is legit. - switch e := v[i].(type) { - case *string: - *e = string(p) - case *int: - // "numbers", with conversion errors ignored because of some misbehaving configuration files. - *e, _ = strconv.Atoi(string(p)) - case *int64: - *e, _ = strconv.ParseInt(string(p), 10, 64) - case *[]string: - // Comma-separated lists. - if len(p) != 0 { - *e = strings.Split(string(p), ",") - } else { - *e = []string{} - } - default: - // Someone goof'd when writing code using this function. Scream so they can hear us. - panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e)) - } - } -} - -func ParsePasswdFile(path string) ([]User, error) { - passwd, err := os.Open(path) - if err != nil { - return nil, err - } - defer passwd.Close() - return ParsePasswd(passwd) -} - -func ParsePasswd(passwd io.Reader) ([]User, error) { - return ParsePasswdFilter(passwd, nil) -} - -func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) { - passwd, err := os.Open(path) - if err != nil { - return nil, err - } - defer passwd.Close() - return ParsePasswdFilter(passwd, filter) -} - -func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { - if r == nil { - return nil, errors.New("nil source for passwd-formatted data") - } - - var ( - s = bufio.NewScanner(r) - out = []User{} - ) - - for s.Scan() { - line := bytes.TrimSpace(s.Bytes()) - if len(line) == 0 { - continue - } - - // see: man 5 passwd - // name:password:UID:GID:GECOS:directory:shell - // Name:Pass:Uid:Gid:Gecos:Home:Shell - // root:x:0:0:root:/root:/bin/bash - // adm:x:3:4:adm:/var/adm:/bin/false - p := User{} - parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell) - - if filter == nil || filter(p) { - out = append(out, p) - } - } - if err := s.Err(); err != nil { - return nil, err - } - - return out, nil -} - -func ParseGroupFile(path string) ([]Group, error) { - group, err := os.Open(path) - if err != nil { - return nil, err - } - - defer group.Close() - return ParseGroup(group) -} - -func ParseGroup(group io.Reader) ([]Group, error) { - return ParseGroupFilter(group, nil) -} - -func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) { - group, err := os.Open(path) - if err != nil { - return nil, err - } - defer group.Close() - return ParseGroupFilter(group, filter) -} - -func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { - if r == nil { - return nil, errors.New("nil source for group-formatted data") - } - rd := bufio.NewReader(r) - out := []Group{} - - // Read the file line-by-line. - for { - var ( - isPrefix bool - wholeLine []byte - err error - ) - - // Read the next line. We do so in chunks (as much as reader's - // buffer is able to keep), check if we read enough columns - // already on each step and store final result in wholeLine. - for { - var line []byte - line, isPrefix, err = rd.ReadLine() - - if err != nil { - // We should return no error if EOF is reached - // without a match. - if err == io.EOF { - err = nil - } - return out, err - } - - // Simple common case: line is short enough to fit in a - // single reader's buffer. - if !isPrefix && len(wholeLine) == 0 { - wholeLine = line - break - } - - wholeLine = append(wholeLine, line...) - - // Check if we read the whole line already. - if !isPrefix { - break - } - } - - // There's no spec for /etc/passwd or /etc/group, but we try to follow - // the same rules as the glibc parser, which allows comments and blank - // space at the beginning of a line. - wholeLine = bytes.TrimSpace(wholeLine) - if len(wholeLine) == 0 || wholeLine[0] == '#' { - continue - } - - // see: man 5 group - // group_name:password:GID:user_list - // Name:Pass:Gid:List - // root:x:0:root - // adm:x:4:root,adm,daemon - p := Group{} - parseLine(wholeLine, &p.Name, &p.Pass, &p.Gid, &p.List) - - if filter == nil || filter(p) { - out = append(out, p) - } - } -} - -type ExecUser struct { - Uid int - Gid int - Sgids []int - Home string -} - -// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the -// given file paths and uses that data as the arguments to GetExecUser. If the -// files cannot be opened for any reason, the error is ignored and a nil -// io.Reader is passed instead. -func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { - var passwd, group io.Reader - - if passwdFile, err := os.Open(passwdPath); err == nil { - passwd = passwdFile - defer passwdFile.Close() - } - - if groupFile, err := os.Open(groupPath); err == nil { - group = groupFile - defer groupFile.Close() - } - - return GetExecUser(userSpec, defaults, passwd, group) -} - -// GetExecUser parses a user specification string (using the passwd and group -// readers as sources for /etc/passwd and /etc/group data, respectively). In -// the case of blank fields or missing data from the sources, the values in -// defaults is used. -// -// GetExecUser will return an error if a user or group literal could not be -// found in any entry in passwd and group respectively. -// -// Examples of valid user specifications are: -// - "" -// - "user" -// - "uid" -// - "user:group" -// - "uid:gid -// - "user:gid" -// - "uid:group" -// -// It should be noted that if you specify a numeric user or group id, they will -// not be evaluated as usernames (only the metadata will be filled). So attempting -// to parse a user with user.Name = "1337" will produce the user with a UID of -// 1337. -func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) { - if defaults == nil { - defaults = new(ExecUser) - } - - // Copy over defaults. - user := &ExecUser{ - Uid: defaults.Uid, - Gid: defaults.Gid, - Sgids: defaults.Sgids, - Home: defaults.Home, - } - - // Sgids slice *cannot* be nil. - if user.Sgids == nil { - user.Sgids = []int{} - } - - // Allow for userArg to have either "user" syntax, or optionally "user:group" syntax - var userArg, groupArg string - parseLine([]byte(userSpec), &userArg, &groupArg) - - // Convert userArg and groupArg to be numeric, so we don't have to execute - // Atoi *twice* for each iteration over lines. - uidArg, uidErr := strconv.Atoi(userArg) - gidArg, gidErr := strconv.Atoi(groupArg) - - // Find the matching user. - users, err := ParsePasswdFilter(passwd, func(u User) bool { - if userArg == "" { - // Default to current state of the user. - return u.Uid == user.Uid - } - - if uidErr == nil { - // If the userArg is numeric, always treat it as a UID. - return uidArg == u.Uid - } - - return u.Name == userArg - }) - - // If we can't find the user, we have to bail. - if err != nil && passwd != nil { - if userArg == "" { - userArg = strconv.Itoa(user.Uid) - } - return nil, fmt.Errorf("unable to find user %s: %w", userArg, err) - } - - var matchedUserName string - if len(users) > 0 { - // First match wins, even if there's more than one matching entry. - matchedUserName = users[0].Name - user.Uid = users[0].Uid - user.Gid = users[0].Gid - user.Home = users[0].Home - } else if userArg != "" { - // If we can't find a user with the given username, the only other valid - // option is if it's a numeric username with no associated entry in passwd. - - if uidErr != nil { - // Not numeric. - return nil, fmt.Errorf("unable to find user %s: %w", userArg, ErrNoPasswdEntries) - } - user.Uid = uidArg - - // Must be inside valid uid range. - if user.Uid < minID || user.Uid > maxID { - return nil, ErrRange - } - - // Okay, so it's numeric. We can just roll with this. - } - - // On to the groups. If we matched a username, we need to do this because of - // the supplementary group IDs. - if groupArg != "" || matchedUserName != "" { - groups, err := ParseGroupFilter(group, func(g Group) bool { - // If the group argument isn't explicit, we'll just search for it. - if groupArg == "" { - // Check if user is a member of this group. - for _, u := range g.List { - if u == matchedUserName { - return true - } - } - return false - } - - if gidErr == nil { - // If the groupArg is numeric, always treat it as a GID. - return gidArg == g.Gid - } - - return g.Name == groupArg - }) - if err != nil && group != nil { - return nil, fmt.Errorf("unable to find groups for spec %v: %w", matchedUserName, err) - } - - // Only start modifying user.Gid if it is in explicit form. - if groupArg != "" { - if len(groups) > 0 { - // First match wins, even if there's more than one matching entry. - user.Gid = groups[0].Gid - } else { - // If we can't find a group with the given name, the only other valid - // option is if it's a numeric group name with no associated entry in group. - - if gidErr != nil { - // Not numeric. - return nil, fmt.Errorf("unable to find group %s: %w", groupArg, ErrNoGroupEntries) - } - user.Gid = gidArg - - // Must be inside valid gid range. - if user.Gid < minID || user.Gid > maxID { - return nil, ErrRange - } - - // Okay, so it's numeric. We can just roll with this. - } - } else if len(groups) > 0 { - // Supplementary group ids only make sense if in the implicit form. - user.Sgids = make([]int, len(groups)) - for i, group := range groups { - user.Sgids[i] = group.Gid - } - } - } - - return user, nil -} - -// GetAdditionalGroups looks up a list of groups by name or group id -// against the given /etc/group formatted data. If a group name cannot -// be found, an error will be returned. If a group id cannot be found, -// or the given group data is nil, the id will be returned as-is -// provided it is in the legal range. -func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) { - groups := []Group{} - if group != nil { - var err error - groups, err = ParseGroupFilter(group, func(g Group) bool { - for _, ag := range additionalGroups { - if g.Name == ag || strconv.Itoa(g.Gid) == ag { - return true - } - } - return false - }) - if err != nil { - return nil, fmt.Errorf("Unable to find additional groups %v: %w", additionalGroups, err) - } - } - - gidMap := make(map[int]struct{}) - for _, ag := range additionalGroups { - var found bool - for _, g := range groups { - // if we found a matched group either by name or gid, take the - // first matched as correct - if g.Name == ag || strconv.Itoa(g.Gid) == ag { - if _, ok := gidMap[g.Gid]; !ok { - gidMap[g.Gid] = struct{}{} - found = true - break - } - } - } - // we asked for a group but didn't find it. let's check to see - // if we wanted a numeric group - if !found { - gid, err := strconv.ParseInt(ag, 10, 64) - if err != nil { - // Not a numeric ID either. - return nil, fmt.Errorf("Unable to find group %s: %w", ag, ErrNoGroupEntries) - } - // Ensure gid is inside gid range. - if gid < minID || gid > maxID { - return nil, ErrRange - } - gidMap[int(gid)] = struct{}{} - } - } - gids := []int{} - for gid := range gidMap { - gids = append(gids, gid) - } - return gids, nil -} - -// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups -// that opens the groupPath given and gives it as an argument to -// GetAdditionalGroups. -func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { - var group io.Reader - - if groupFile, err := os.Open(groupPath); err == nil { - group = groupFile - defer groupFile.Close() - } - return GetAdditionalGroups(additionalGroups, group) -} - -func ParseSubIDFile(path string) ([]SubID, error) { - subid, err := os.Open(path) - if err != nil { - return nil, err - } - defer subid.Close() - return ParseSubID(subid) -} - -func ParseSubID(subid io.Reader) ([]SubID, error) { - return ParseSubIDFilter(subid, nil) -} - -func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) { - subid, err := os.Open(path) - if err != nil { - return nil, err - } - defer subid.Close() - return ParseSubIDFilter(subid, filter) -} - -func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) { - if r == nil { - return nil, errors.New("nil source for subid-formatted data") - } - - var ( - s = bufio.NewScanner(r) - out = []SubID{} - ) - - for s.Scan() { - line := bytes.TrimSpace(s.Bytes()) - if len(line) == 0 { - continue - } - - // see: man 5 subuid - p := SubID{} - parseLine(line, &p.Name, &p.SubID, &p.Count) - - if filter == nil || filter(p) { - out = append(out, p) - } - } - if err := s.Err(); err != nil { - return nil, err - } - - return out, nil -} - -func ParseIDMapFile(path string) ([]IDMap, error) { - r, err := os.Open(path) - if err != nil { - return nil, err - } - defer r.Close() - return ParseIDMap(r) -} - -func ParseIDMap(r io.Reader) ([]IDMap, error) { - return ParseIDMapFilter(r, nil) -} - -func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) { - r, err := os.Open(path) - if err != nil { - return nil, err - } - defer r.Close() - return ParseIDMapFilter(r, filter) -} - -func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) { - if r == nil { - return nil, errors.New("nil source for idmap-formatted data") - } - - var ( - s = bufio.NewScanner(r) - out = []IDMap{} - ) - - for s.Scan() { - line := bytes.TrimSpace(s.Bytes()) - if len(line) == 0 { - continue - } - - // see: man 7 user_namespaces - p := IDMap{} - parseParts(bytes.Fields(line), &p.ID, &p.ParentID, &p.Count) - - if filter == nil || filter(p) { - out = append(out, p) - } - } - if err := s.Err(); err != nil { - return nil, err - } - - return out, nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go deleted file mode 100644 index e018eae614..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go +++ /dev/null @@ -1,43 +0,0 @@ -//go:build gofuzz -// +build gofuzz - -package user - -import ( - "io" - "strings" -) - -func IsDivisbleBy(n int, divisibleby int) bool { - return (n % divisibleby) == 0 -} - -func FuzzUser(data []byte) int { - if len(data) == 0 { - return -1 - } - if !IsDivisbleBy(len(data), 5) { - return -1 - } - - var divided [][]byte - - chunkSize := len(data) / 5 - - for i := 0; i < len(data); i += chunkSize { - end := i + chunkSize - - divided = append(divided, data[i:end]) - } - - _, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil) - - var passwd, group io.Reader - - group = strings.NewReader(string(divided[1])) - _, _ = GetAdditionalGroups([]string{string(divided[2])}, group) - - passwd = strings.NewReader(string(divided[3])) - _, _ = GetExecUser(string(divided[4]), nil, passwd, group) - return 1 -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go index f6cb98e5e4..b225f18f2e 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go @@ -1,5 +1,4 @@ package userns // RunningInUserNS detects whether we are currently running in a user namespace. -// Originally copied from github.com/lxc/lxd/shared/util.go var RunningInUserNS = runningInUserNS diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go index 1e00ab8b50..bff03f8d85 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go @@ -3,14 +3,7 @@ package userns -import ( - "strings" - - "github.com/opencontainers/runc/libcontainer/user" -) - -func FuzzUIDMap(data []byte) int { - uidmap, _ := user.ParseIDMap(strings.NewReader(string(data))) - _ = uidMapInUserNS(uidmap) +func FuzzUIDMap(uidmap []byte) int { + _ = uidMapInUserNS(string(uidmap)) return 1 } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go index 724e6df012..a6710b321b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go @@ -1,9 +1,10 @@ package userns import ( + "bufio" + "fmt" + "os" "sync" - - "github.com/opencontainers/runc/libcontainer/user" ) var ( @@ -12,26 +13,43 @@ var ( ) // runningInUserNS detects whether we are currently running in a user namespace. -// Originally copied from github.com/lxc/lxd/shared/util.go +// +// Originally copied from https://github.com/lxc/incus/blob/e45085dd42f826b3c8c3228e9733c0b6f998eafe/shared/util.go#L678-L700. func runningInUserNS() bool { nsOnce.Do(func() { - uidmap, err := user.CurrentProcessUIDMap() + file, err := os.Open("/proc/self/uid_map") + if err != nil { + // This kernel-provided file only exists if user namespaces are supported. + return + } + defer file.Close() + + buf := bufio.NewReader(file) + l, _, err := buf.ReadLine() if err != nil { - // This kernel-provided file only exists if user namespaces are supported return } - inUserNS = uidMapInUserNS(uidmap) + + inUserNS = uidMapInUserNS(string(l)) }) return inUserNS } -func uidMapInUserNS(uidmap []user.IDMap) bool { - /* - * We assume we are in the initial user namespace if we have a full - * range - 4294967295 uids starting at uid 0. - */ - if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 { +func uidMapInUserNS(uidMap string) bool { + if uidMap == "" { + // File exist but empty (the initial state when userns is created, + // see user_namespaces(7)). + return true + } + + var a, b, c int64 + if _, err := fmt.Sscanf(uidMap, "%d %d %d", &a, &b, &c); err != nil { + // Assume we are in a regular, non user namespace. return false } - return true + + // As per user_namespaces(7), /proc/self/uid_map of + // the initial user namespace shows 0 0 4294967295. + initNS := a == 0 && b == 0 && c == 4294967295 + return !initNS } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go index f35c13a10e..391c811c68 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go @@ -3,8 +3,6 @@ package userns -import "github.com/opencontainers/runc/libcontainer/user" - // runningInUserNS is a stub for non-Linux systems // Always returns false func runningInUserNS() bool { @@ -13,6 +11,6 @@ func runningInUserNS() bool { // uidMapInUserNS is a stub for non-Linux systems // Always returns false -func uidMapInUserNS(uidmap []user.IDMap) bool { +func uidMapInUserNS(uidMap string) bool { return false } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go new file mode 100644 index 0000000000..2eb64cf76c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go @@ -0,0 +1,156 @@ +package userns + +import ( + "fmt" + "os" + "sort" + "strings" + "sync" + "syscall" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Mapping struct { + UIDMappings []configs.IDMap + GIDMappings []configs.IDMap +} + +func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) { + for _, uid := range m.UIDMappings { + uids = append(uids, syscall.SysProcIDMap{ + ContainerID: int(uid.ContainerID), + HostID: int(uid.HostID), + Size: int(uid.Size), + }) + } + for _, gid := range m.GIDMappings { + gids = append(gids, syscall.SysProcIDMap{ + ContainerID: int(gid.ContainerID), + HostID: int(gid.HostID), + Size: int(gid.Size), + }) + } + return +} + +// id returns a unique identifier for this mapping, agnostic of the order of +// the uid and gid mappings (because the order doesn't matter to the kernel). +// The set of userns handles is indexed using this ID. +func (m Mapping) id() string { + var uids, gids []string + for _, idmap := range m.UIDMappings { + uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) + } + for _, idmap := range m.GIDMappings { + gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) + } + // We don't care about the sort order -- just sort them. + sort.Strings(uids) + sort.Strings(gids) + return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",") +} + +type Handles struct { + m sync.Mutex + maps map[string]*os.File +} + +// Release all resources associated with this Handle. All existing files +// returned from Get() will continue to work even after calling Release(). The +// same Handles can be re-used after calling Release(). +func (hs *Handles) Release() { + hs.m.Lock() + defer hs.m.Unlock() + + // Close the files for good measure, though GC will do that for us anyway. + for _, file := range hs.maps { + _ = file.Close() + } + hs.maps = nil +} + +func spawnProc(req Mapping) (*os.Process, error) { + // We need to spawn a subprocess with the requested mappings, which is + // unfortunately quite expensive. The "safe" way of doing this is natively + // with Go (and then spawning something like "sleep infinity"), but + // execve() is a waste of cycles because we just need some process to have + // the right mapping, we don't care what it's executing. The "unsafe" + // option of doing a clone() behind the back of Go is probably okay in + // theory as long as we just do kill(getpid(), SIGSTOP). However, if we + // tell Go to put the new process into PTRACE_TRACEME mode, we can avoid + // the exec and not have to faff around with the mappings. + // + // Note that Go's stdlib does not support newuidmap, but in the case of + // id-mapped mounts, it seems incredibly unlikely that the user will be + // requesting us to do a remapping as an unprivileged user with mappings + // they have privileges over. + logrus.Debugf("spawning dummy process for id-mapping %s", req.id()) + uidMappings, gidMappings := req.toSys() + // We don't need to use /proc/thread-self here because the exe mm of a + // thread-group is guaranteed to be the same for all threads by definition. + // This lets us avoid having to do runtime.LockOSThread. + return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{ + Sys: &syscall.SysProcAttr{ + Cloneflags: unix.CLONE_NEWUSER, + UidMappings: uidMappings, + GidMappings: gidMappings, + GidMappingsEnableSetgroups: false, + // Put the process into PTRACE_TRACEME mode to allow us to get the + // userns without having a proper execve() target. + Ptrace: true, + }, + }) +} + +func dupFile(f *os.File) (*os.File, error) { + newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) + if err != nil { + return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) + } + return os.NewFile(uintptr(newFd), f.Name()), nil +} + +// Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested +// mapping. The processes spawned to produce userns nsfds are cached, so if +// equivalent user namespace mappings are requested, the same user namespace +// will be returned. The caller is responsible for closing the returned file +// descriptor. +func (hs *Handles) Get(req Mapping) (file *os.File, err error) { + hs.m.Lock() + defer hs.m.Unlock() + + if hs.maps == nil { + hs.maps = make(map[string]*os.File) + } + + file, ok := hs.maps[req.id()] + if !ok { + proc, err := spawnProc(req) + if err != nil { + return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err) + } + // Make sure we kill the helper process. We ignore errors because + // there's not much we can do about them anyway, and ultimately + defer func() { + _ = proc.Kill() + _, _ = proc.Wait() + }() + + // Stash away a handle to the userns file. This is neater than keeping + // the process alive, because Go's GC can handle files much better than + // leaked processes, and having long-living useless processes seems + // less than ideal. + file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid)) + if err != nil { + return nil, err + } + hs.maps[req.id()] = file + } + // Duplicate the file, to make sure the lifecycle of each *os.File we + // return is independent. + return dupFile(file) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go index 7ef9da21fd..2edd1417af 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go @@ -19,13 +19,14 @@ package utils import ( "fmt" "os" + "runtime" "golang.org/x/sys/unix" ) -// MaxSendfdLen is the maximum length of the name of a file descriptor being -// sent using SendFd. The name of the file handle returned by RecvFd will never -// be larger than this value. +// MaxNameLen is the maximum length of the name of a file descriptor being sent +// using SendFile. The name of the file handle returned by RecvFile will never be +// larger than this value. const MaxNameLen = 4096 // oobSpace is the size of the oob slice required to store a single FD. Note @@ -33,26 +34,21 @@ const MaxNameLen = 4096 // so sizeof(fd) = 4. var oobSpace = unix.CmsgSpace(4) -// RecvFd waits for a file descriptor to be sent over the given AF_UNIX +// RecvFile waits for a file descriptor to be sent over the given AF_UNIX // socket. The file name of the remote file descriptor will be recreated // locally (it is sent as non-auxiliary data in the same payload). -func RecvFd(socket *os.File) (*os.File, error) { - // For some reason, unix.Recvmsg uses the length rather than the capacity - // when passing the msg_controllen and other attributes to recvmsg. So we - // have to actually set the length. +func RecvFile(socket *os.File) (_ *os.File, Err error) { name := make([]byte, MaxNameLen) oob := make([]byte, oobSpace) sockfd := socket.Fd() - n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) + n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC) if err != nil { return nil, err } - if n >= MaxNameLen || oobn != oobSpace { - return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) + return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) } - // Truncate. name = name[:n] oob = oob[:oobn] @@ -61,36 +57,63 @@ func RecvFd(socket *os.File) (*os.File, error) { if err != nil { return nil, err } - if len(scms) != 1 { - return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) + + // We cannot control how many SCM_RIGHTS we receive, and upon receiving + // them all of the descriptors are installed in our fd table, so we need to + // parse all of the SCM_RIGHTS we received in order to close all of the + // descriptors on error. + var fds []int + defer func() { + for i, fd := range fds { + if i == 0 && Err == nil { + // Only close the first one on error. + continue + } + // Always close extra ones. + _ = unix.Close(fd) + } + }() + var lastErr error + for _, scm := range scms { + if scm.Header.Type == unix.SCM_RIGHTS { + scmFds, err := unix.ParseUnixRights(&scm) + if err != nil { + lastErr = err + } else { + fds = append(fds, scmFds...) + } + } + } + if lastErr != nil { + return nil, lastErr } - scm := scms[0] - fds, err := unix.ParseUnixRights(&scm) - if err != nil { - return nil, err + // We do this after collecting the fds to make sure we close them all when + // returning an error here. + if len(scms) != 1 { + return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) } if len(fds) != 1 { return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds)) } - fd := uintptr(fds[0]) - - return os.NewFile(fd, string(name)), nil + return os.NewFile(uintptr(fds[0]), string(name)), nil } -// SendFd sends a file descriptor over the given AF_UNIX socket. In -// addition, the file.Name() of the given file will also be sent as -// non-auxiliary data in the same payload (allowing to send contextual -// information for a file descriptor). -func SendFd(socket *os.File, name string, fd uintptr) error { +// SendFile sends a file over the given AF_UNIX socket. file.Name() is also +// included so that if the other end uses RecvFile, the file will have the same +// name information. +func SendFile(socket *os.File, file *os.File) error { + name := file.Name() if len(name) >= MaxNameLen { return fmt.Errorf("sendfd: filename too long: %s", name) } - return SendFds(socket, []byte(name), int(fd)) + err := SendRawFd(socket, name, file.Fd()) + runtime.KeepAlive(file) + return err } -// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket. -func SendFds(socket *os.File, msg []byte, fds ...int) error { - oob := unix.UnixRights(fds...) - return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0) +// SendRawFd sends a specific file descriptor over the given AF_UNIX socket. +func SendRawFd(socket *os.File, msg string, fd uintptr) error { + oob := unix.UnixRights(int(fd)) + return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0) } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go index 6b9fc34352..1b523d8ac5 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go @@ -3,15 +3,12 @@ package utils import ( "encoding/binary" "encoding/json" - "fmt" "io" "os" "path/filepath" - "strconv" "strings" "unsafe" - securejoin "github.com/cyphar/filepath-securejoin" "golang.org/x/sys/unix" ) @@ -43,6 +40,9 @@ func ExitStatus(status unix.WaitStatus) int { } // WriteJSON writes the provided struct v to w using standard json marshaling +// without a trailing newline. This is used instead of json.Encoder because +// there might be a problem in json decoder in some cases, see: +// https://github.com/docker/docker/issues/14203#issuecomment-174177790 func WriteJSON(w io.Writer, v interface{}) error { data, err := json.Marshal(v) if err != nil { @@ -99,52 +99,16 @@ func stripRoot(root, path string) string { return CleanPath("/" + path) } -// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) -// corresponding to the unsafePath resolved within the root. Before passing the -// fd, this path is verified to have been inside the root -- so operating on it -// through the passed fdpath should be safe. Do not access this path through -// the original path strings, and do not attempt to use the pathname outside of -// the passed closure (the file handle will be freed once the closure returns). -func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { - // Remove the root then forcefully resolve inside the root. - unsafePath = stripRoot(root, unsafePath) - path, err := securejoin.SecureJoin(root, unsafePath) - if err != nil { - return fmt.Errorf("resolving path inside rootfs failed: %w", err) - } - - // Open the target path. - fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) - if err != nil { - return fmt.Errorf("open o_path procfd: %w", err) - } - defer fh.Close() - - // Double-check the path is the one we expected. - procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd())) - if realpath, err := os.Readlink(procfd); err != nil { - return fmt.Errorf("procfd verification failed: %w", err) - } else if realpath != path { - return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) - } - - // Run the closure. - return fn(procfd) -} - -// SearchLabels searches a list of key-value pairs for the provided key and -// returns the corresponding value. The pairs must be separated with '='. -func SearchLabels(labels []string, query string) string { - for _, l := range labels { - parts := strings.SplitN(l, "=", 2) - if len(parts) < 2 { - continue - } - if parts[0] == query { - return parts[1] +// SearchLabels searches through a list of key=value pairs for a given key, +// returning its value, and the binary flag telling whether the key exist. +func SearchLabels(labels []string, key string) (string, bool) { + key += "=" + for _, s := range labels { + if strings.HasPrefix(s, key) { + return s[len(key):], true } } - return "" + return "", false } // Annotations returns the bundle path and user defined annotations from the diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go index bf3237a291..f57f0874a0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go @@ -5,10 +5,16 @@ package utils import ( "fmt" + "math" "os" + "path/filepath" + "runtime" "strconv" + "sync" _ "unsafe" // for go:linkname + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -24,12 +30,39 @@ func EnsureProcHandle(fh *os.File) error { return nil } +var ( + haveCloseRangeCloexecBool bool + haveCloseRangeCloexecOnce sync.Once +) + +func haveCloseRangeCloexec() bool { + haveCloseRangeCloexecOnce.Do(func() { + // Make sure we're not closing a random file descriptor. + tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0) + if err != nil { + return + } + defer unix.Close(tmpFd) + + err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC) + // Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC). + // -ENOSYS and -EINVAL ultimately mean we don't have support, but any + // other potential error would imply that even the most basic close + // operation wouldn't work. + haveCloseRangeCloexecBool = err == nil + }) + return haveCloseRangeCloexecBool +} + type fdFunc func(fd int) // fdRangeFrom calls the passed fdFunc for each file descriptor that is open in // the current process. func fdRangeFrom(minFd int, fn fdFunc) error { - fdDir, err := os.Open("/proc/self/fd") + procSelfFd, closer := ProcThreadSelf("fd") + defer closer() + + fdDir, err := os.Open(procSelfFd) if err != nil { return err } @@ -67,6 +100,12 @@ func fdRangeFrom(minFd int, fn fdFunc) error { // CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or // equal to minFd in the current process. func CloseExecFrom(minFd int) error { + // Use close_range(CLOSE_RANGE_CLOEXEC) if possible. + if haveCloseRangeCloexec() { + err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC) + return os.NewSyscallError("close_range", err) + } + // Otherwise, fall back to the standard loop. return fdRangeFrom(minFd, unix.CloseOnExec) } @@ -89,7 +128,8 @@ func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive // *os.File operations would apply to the wrong file). This function is only // intended to be called from the last stage of runc init. func UnsafeCloseFrom(minFd int) error { - // We must not close some file descriptors. + // We cannot use close_range(2) even if it is available, because we must + // not close some file descriptors. return fdRangeFrom(minFd, func(fd int) { if runtime_IsPollDescriptor(uintptr(fd)) { // These are the Go runtimes internal netpoll file descriptors. @@ -107,11 +147,117 @@ func UnsafeCloseFrom(minFd int) error { }) } -// NewSockPair returns a new unix socket pair -func NewSockPair(name string) (parent *os.File, child *os.File, err error) { +// NewSockPair returns a new SOCK_STREAM unix socket pair. +func NewSockPair(name string) (parent, child *os.File, err error) { fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) if err != nil { return nil, nil, err } return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil } + +// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) +// corresponding to the unsafePath resolved within the root. Before passing the +// fd, this path is verified to have been inside the root -- so operating on it +// through the passed fdpath should be safe. Do not access this path through +// the original path strings, and do not attempt to use the pathname outside of +// the passed closure (the file handle will be freed once the closure returns). +func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { + // Remove the root then forcefully resolve inside the root. + unsafePath = stripRoot(root, unsafePath) + path, err := securejoin.SecureJoin(root, unsafePath) + if err != nil { + return fmt.Errorf("resolving path inside rootfs failed: %w", err) + } + + procSelfFd, closer := ProcThreadSelf("fd/") + defer closer() + + // Open the target path. + fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("open o_path procfd: %w", err) + } + defer fh.Close() + + procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd()))) + // Double-check the path is the one we expected. + if realpath, err := os.Readlink(procfd); err != nil { + return fmt.Errorf("procfd verification failed: %w", err) + } else if realpath != path { + return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) + } + + return fn(procfd) +} + +type ProcThreadSelfCloser func() + +var ( + haveProcThreadSelf bool + haveProcThreadSelfOnce sync.Once +) + +// ProcThreadSelf returns a string that is equivalent to +// /proc/thread-self/, with a graceful fallback on older kernels where +// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin, +// meaning that the passed string needs to be trusted. The caller _must_ call +// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread) +// *only once* after it has finished using the returned path string. +func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) { + haveProcThreadSelfOnce.Do(func() { + if _, err := os.Stat("/proc/thread-self/"); err == nil { + haveProcThreadSelf = true + } else { + logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/", err) + } + }) + + // We need to lock our thread until the caller is done with the path string + // because any non-atomic operation on the path (such as opening a file, + // then reading it) could be interrupted by the Go runtime where the + // underlying thread is swapped out and the original thread is killed, + // resulting in pull-your-hair-out-hard-to-debug issues in the caller. In + // addition, the pre-3.17 fallback makes everything non-atomic because the + // same thing could happen between unix.Gettid() and the path operations. + // + // In theory, we don't need to lock in the atomic user case when using + // /proc/thread-self/, but it's better to be safe than sorry (and there are + // only one or two truly atomic users of /proc/thread-self/). + runtime.LockOSThread() + + threadSelf := "/proc/thread-self/" + if !haveProcThreadSelf { + // Pre-3.17 kernels did not have /proc/thread-self, so do it manually. + threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/" + if _, err := os.Stat(threadSelf); err != nil { + // Unfortunately, this code is called from rootfs_linux.go where we + // are running inside the pid namespace of the container but /proc + // is the host's procfs. Unfortunately there is no real way to get + // the correct tid to use here (the kernel age means we cannot do + // things like set up a private fsopen("proc") -- even scanning + // NSpid in all of the tasks in /proc/self/task/*/status requires + // Linux 4.1). + // + // So, we just have to assume that /proc/self is acceptable in this + // one specific case. + if os.Getpid() == 1 { + logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err) + } else { + // This should never happen, but the fallback should work in most cases... + logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err) + } + threadSelf = "/proc/self/" + } + } + return threadSelf + subpath, runtime.UnlockOSThread +} + +// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to +// create a /proc/thread-self handle for given file descriptor. +// +// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but +// without using fmt.Sprintf to avoid unneeded overhead. +func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) { + return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10)) +} diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go index 4e7717d53f..d1236ba721 100644 --- a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go @@ -187,6 +187,10 @@ type Hook struct { type Hooks struct { // Prestart is Deprecated. Prestart is a list of hooks to be run before the container process is executed. // It is called in the Runtime Namespace + // + // Deprecated: use [Hooks.CreateRuntime], [Hooks.CreateContainer], and + // [Hooks.StartContainer] instead, which allow more granular hook control + // during the create and start phase. Prestart []Hook `json:"prestart,omitempty"` // CreateRuntime is a list of hooks to be run after the container has been created but before pivot_root or any equivalent operation has been called // It is called in the Runtime Namespace @@ -371,6 +375,12 @@ type LinuxMemory struct { // Total memory limit (memory + swap). Swap *int64 `json:"swap,omitempty"` // Kernel memory limit (in bytes). + // + // Deprecated: kernel-memory limits are not supported in cgroups v2, and + // were obsoleted in [kernel v5.4]. This field should no longer be used, + // as it may be ignored by runtimes. + // + // [kernel v5.4]: https://github.com/torvalds/linux/commit/0158115f702b0ba208ab0 Kernel *int64 `json:"kernel,omitempty"` // Kernel memory limit for tcp (in bytes) KernelTCP *int64 `json:"kernelTCP,omitempty"` diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go index b3fca349cb..503971e058 100644 --- a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go @@ -6,7 +6,7 @@ const ( // VersionMajor is for an API incompatible changes VersionMajor = 1 // VersionMinor is for functionality in a backwards-compatible manner - VersionMinor = 1 + VersionMinor = 2 // VersionPatch is for backwards-compatible bug fixes VersionPatch = 0 diff --git a/vendor/github.com/sirupsen/logrus/README.md b/vendor/github.com/sirupsen/logrus/README.md index b042c896f2..d1d4a85fd7 100644 --- a/vendor/github.com/sirupsen/logrus/README.md +++ b/vendor/github.com/sirupsen/logrus/README.md @@ -9,7 +9,7 @@ the last thing you want from your Logging library (again...). This does not mean Logrus is dead. Logrus will continue to be maintained for security, (backwards compatible) bug fixes, and performance (where we are -limited by the interface). +limited by the interface). I believe Logrus' biggest contribution is to have played a part in today's widespread use of structured logging in Golang. There doesn't seem to be a @@ -43,7 +43,7 @@ plain text): With `log.SetFormatter(&log.JSONFormatter{})`, for easy parsing by logstash or Splunk: -```json +```text {"animal":"walrus","level":"info","msg":"A group of walrus emerges from the ocean","size":10,"time":"2014-03-10 19:57:38.562264131 -0400 EDT"} @@ -99,7 +99,7 @@ time="2015-03-26T01:27:38-04:00" level=fatal method=github.com/sirupsen/arcticcr ``` Note that this does add measurable overhead - the cost will depend on the version of Go, but is between 20 and 40% in recent tests with 1.6 and 1.7. You can validate this in your -environment via benchmarks: +environment via benchmarks: ``` go test -bench=.*CallerTracing ``` @@ -317,6 +317,8 @@ log.SetLevel(log.InfoLevel) It may be useful to set `log.Level = logrus.DebugLevel` in a debug or verbose environment if your application has that. +Note: If you want different log levels for global (`log.SetLevel(...)`) and syslog logging, please check the [syslog hook README](hooks/syslog/README.md#different-log-levels-for-local-and-remote-logging). + #### Entries Besides the fields added with `WithField` or `WithFields` some fields are diff --git a/vendor/github.com/sirupsen/logrus/writer.go b/vendor/github.com/sirupsen/logrus/writer.go index 72e8e3a1b6..074fd4b8bd 100644 --- a/vendor/github.com/sirupsen/logrus/writer.go +++ b/vendor/github.com/sirupsen/logrus/writer.go @@ -4,6 +4,7 @@ import ( "bufio" "io" "runtime" + "strings" ) // Writer at INFO level. See WriterLevel for details. @@ -20,15 +21,18 @@ func (logger *Logger) WriterLevel(level Level) *io.PipeWriter { return NewEntry(logger).WriterLevel(level) } +// Writer returns an io.Writer that writes to the logger at the info log level func (entry *Entry) Writer() *io.PipeWriter { return entry.WriterLevel(InfoLevel) } +// WriterLevel returns an io.Writer that writes to the logger at the given log level func (entry *Entry) WriterLevel(level Level) *io.PipeWriter { reader, writer := io.Pipe() var printFunc func(args ...interface{}) + // Determine which log function to use based on the specified log level switch level { case TraceLevel: printFunc = entry.Trace @@ -48,23 +52,51 @@ func (entry *Entry) WriterLevel(level Level) *io.PipeWriter { printFunc = entry.Print } + // Start a new goroutine to scan the input and write it to the logger using the specified print function. + // It splits the input into chunks of up to 64KB to avoid buffer overflows. go entry.writerScanner(reader, printFunc) + + // Set a finalizer function to close the writer when it is garbage collected runtime.SetFinalizer(writer, writerFinalizer) return writer } +// writerScanner scans the input from the reader and writes it to the logger func (entry *Entry) writerScanner(reader *io.PipeReader, printFunc func(args ...interface{})) { scanner := bufio.NewScanner(reader) + + // Set the buffer size to the maximum token size to avoid buffer overflows + scanner.Buffer(make([]byte, bufio.MaxScanTokenSize), bufio.MaxScanTokenSize) + + // Define a split function to split the input into chunks of up to 64KB + chunkSize := bufio.MaxScanTokenSize // 64KB + splitFunc := func(data []byte, atEOF bool) (int, []byte, error) { + if len(data) >= chunkSize { + return chunkSize, data[:chunkSize], nil + } + + return bufio.ScanLines(data, atEOF) + } + + // Use the custom split function to split the input + scanner.Split(splitFunc) + + // Scan the input and write it to the logger using the specified print function for scanner.Scan() { - printFunc(scanner.Text()) + printFunc(strings.TrimRight(scanner.Text(), "\r\n")) } + + // If there was an error while scanning the input, log an error if err := scanner.Err(); err != nil { entry.Errorf("Error while reading from Writer: %s", err) } + + // Close the reader when we are done reader.Close() } +// WriterFinalizer is a finalizer function that closes then given writer when it is garbage collected func writerFinalizer(writer *io.PipeWriter) { writer.Close() } diff --git a/vendor/modules.txt b/vendor/modules.txt index 0fe0b16b89..78377da212 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -14,9 +14,6 @@ github.com/NVIDIA/go-nvml/pkg/nvml # github.com/StackExchange/wmi v1.2.1 ## explicit; go 1.13 github.com/StackExchange/wmi -# github.com/aquasecurity/libbpfgo v0.6.0-libbpf-1.3 -## explicit; go 1.18 -github.com/aquasecurity/libbpfgo # github.com/beorn7/perks v1.0.1 ## explicit; go 1.11 github.com/beorn7/perks/quantile @@ -26,15 +23,20 @@ github.com/bits-and-blooms/bitset # github.com/cespare/xxhash/v2 v2.2.0 ## explicit; go 1.11 github.com/cespare/xxhash/v2 -# github.com/cilium/ebpf v0.9.1 -## explicit; go 1.17 +# github.com/cilium/ebpf v0.15.0 +## explicit; go 1.21.0 github.com/cilium/ebpf github.com/cilium/ebpf/asm github.com/cilium/ebpf/btf github.com/cilium/ebpf/internal +github.com/cilium/ebpf/internal/kallsyms +github.com/cilium/ebpf/internal/kconfig github.com/cilium/ebpf/internal/sys +github.com/cilium/ebpf/internal/sysenc +github.com/cilium/ebpf/internal/tracefs github.com/cilium/ebpf/internal/unix github.com/cilium/ebpf/link +github.com/cilium/ebpf/rlimit # github.com/containerd/cgroups v1.1.0 ## explicit; go 1.17 github.com/containerd/cgroups @@ -84,7 +86,7 @@ github.com/go-openapi/swag # github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 ## explicit; go 1.13 github.com/go-task/slim-sprig -# github.com/godbus/dbus/v5 v5.0.6 +# github.com/godbus/dbus/v5 v5.1.0 ## explicit; go 1.12 github.com/godbus/dbus/v5 # github.com/gogo/protobuf v1.3.2 @@ -185,7 +187,7 @@ github.com/mitchellh/go-homedir ## explicit; go 1.13 github.com/moby/spdystream github.com/moby/spdystream/spdy -# github.com/moby/sys/mountinfo v0.5.0 +# github.com/moby/sys/mountinfo v0.7.1 ## explicit; go 1.16 github.com/moby/sys/mountinfo # github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd @@ -231,20 +233,16 @@ github.com/onsi/gomega/matchers/support/goraph/edge github.com/onsi/gomega/matchers/support/goraph/node github.com/onsi/gomega/matchers/support/goraph/util github.com/onsi/gomega/types -# github.com/opencontainers/runc v1.1.12 -## explicit; go 1.17 +# github.com/opencontainers/runc v1.2.0-rc.1 +## explicit; go 1.20 github.com/opencontainers/runc/libcontainer/cgroups -github.com/opencontainers/runc/libcontainer/cgroups/devices -github.com/opencontainers/runc/libcontainer/cgroups/ebpf -github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter github.com/opencontainers/runc/libcontainer/cgroups/fs2 github.com/opencontainers/runc/libcontainer/cgroups/fscommon github.com/opencontainers/runc/libcontainer/configs github.com/opencontainers/runc/libcontainer/devices -github.com/opencontainers/runc/libcontainer/user github.com/opencontainers/runc/libcontainer/userns github.com/opencontainers/runc/libcontainer/utils -# github.com/opencontainers/runtime-spec v1.1.0 +# github.com/opencontainers/runtime-spec v1.2.0 ## explicit github.com/opencontainers/runtime-spec/specs-go # github.com/pkg/errors v0.9.1 @@ -282,7 +280,7 @@ github.com/prometheus/prometheus/model/labels github.com/prometheus/prometheus/model/textparse github.com/prometheus/prometheus/model/value github.com/prometheus/prometheus/prompb/io/prometheus/client -# github.com/sirupsen/logrus v1.9.0 +# github.com/sirupsen/logrus v1.9.3 ## explicit; go 1.13 github.com/sirupsen/logrus # github.com/spf13/pflag v1.0.5