diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000000000..8805283a42271 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,22 @@ +name: "lint" + +on: + pull_request: + push: + branches: + - master + +jobs: + shellcheck: + # This workflow gets injected into other Linux repositories, but we don't + # want it to run there. + if: ${{ github.repository == 'kernel-patches/vmtest' }} + name: ShellCheck + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master + env: + SHELLCHECK_OPTS: --severity=warning --exclude=SC1091 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000000000..2b3d642ee7c96 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,356 @@ +name: bpf-ci + +on: + pull_request: + push: + branches: + - bpf_base + - bpf-next_base + +concurrency: + group: ci-test-${{ github.ref_name }} + cancel-in-progress: true + +jobs: + set-matrix: + runs-on: ubuntu-latest + outputs: + build-matrix: ${{ steps.set-matrix-impl.outputs.build_matrix }} + test-matrix: ${{ steps.set-matrix-impl.outputs.test_matrix }} + steps: + - id: set-matrix-impl + shell: python3 -I {0} + run: | + from json import dumps + from enum import Enum + import os + + class Arch(Enum): + """ + CPU architecture supported by CI. + """ + aarch64 = "aarch64" + s390x = "s390x" + x86_64 = "x86_64" + + def set_output(name, value): + """Write an output variable to the GitHub output file.""" + with open(os.getenv("GITHUB_OUTPUT"), "a") as f: + f.write(f"{name}={value}\n") + + def generate_test_config(test): + """Create the configuration for the provided test.""" + experimental = test.endswith("_parallel") + config = { + "test": test, + "continue_on_error": experimental, + # While in experimental mode, parallel jobs may get stuck + # anywhere, including in user space where the kernel won't detect + # a problem and panic. We add a second layer of (smaller) timeouts + # here such that if we get stuck in a parallel run, we hit this + # timeout and fail without affecting the overall job success (as + # would be the case if we hit the job-wide timeout). For + # non-experimental jobs, 360 is the default which will be + # superseded by the overall workflow timeout (but we need to + # specify something). + "timeout_minutes": 30 if experimental else 360, + } + return config + + matrix = [ + {"kernel": "LATEST", "runs_on": [], "arch": Arch.x86_64.value, "toolchain": "gcc", "llvm-version": "16"}, + {"kernel": "LATEST", "runs_on": [], "arch": Arch.x86_64.value, "toolchain": "llvm", "llvm-version": "15"}, + {"kernel": "LATEST", "runs_on": [], "arch": Arch.x86_64.value, "toolchain": "llvm", "llvm-version": "16"}, + {"kernel": "LATEST", "runs_on": [], "arch": Arch.aarch64.value, "toolchain": "gcc", "llvm-version": "16"}, + {"kernel": "LATEST", "runs_on": [], "arch": Arch.aarch64.value, "toolchain": "llvm", "llvm-version": "15"}, + {"kernel": "LATEST", "runs_on": [], "arch": Arch.aarch64.value, "toolchain": "llvm", "llvm-version": "16"}, + {"kernel": "LATEST", "runs_on": [], "arch": Arch.s390x.value, "toolchain": "gcc", "llvm-version": "16", "parallel_tests": False}, + ] + self_hosted_repos = [ + "kernel-patches/bpf", + "kernel-patches/vmtest", + ] + + for idx in range(len(matrix) - 1, -1, -1): + if matrix[idx]['toolchain'] == 'gcc': + matrix[idx]['toolchain_full'] = 'gcc' + else: + matrix[idx]['toolchain_full'] = 'llvm-' + matrix[idx]['llvm-version'] + # Only a few repository within "kernel-patches" use self-hosted runners. + if "${{ github.repository_owner }}" != "kernel-patches" or "${{ github.repository }}" not in self_hosted_repos: + # Outside of those repositories, we only run on x86_64 GH hosted runners (ubuntu-latest) + for idx in range(len(matrix) - 1, -1, -1): + if matrix[idx]["arch"] != Arch.x86_64.value: + del matrix[idx] + else: + matrix[idx]["runs_on"] = ["ubuntu-latest"] + else: + # Otherwise, run on (self-hosted, arch) runners + for idx in range(len(matrix) - 1, -1, -1): + matrix[idx]["runs_on"].extend(["self-hosted", matrix[idx]["arch"]]) + + build_matrix = {"include": matrix} + set_output("build_matrix", dumps(build_matrix)) + + def get_tests(config): + tests = [ + "test_progs", + "test_progs_parallel", + "test_progs_no_alu32", + "test_progs_no_alu32_parallel", + "test_maps", + "test_verifier", + ] + if config.get("parallel_tests", True): + return tests + return [test for test in tests if not test.endswith("parallel") ] + + test_matrix = {"include": [{**config, **generate_test_config(test)} + for config in matrix + for test in get_tests(config) + ]} + set_output("test_matrix", dumps(test_matrix)) + build: + name: build for ${{ matrix.arch }} with ${{ matrix.toolchain_full }} + needs: set-matrix + runs-on: ${{ matrix.runs_on }} + timeout-minutes: 100 + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.set-matrix.outputs.build-matrix) }} + env: + KERNEL: ${{ matrix.kernel }} + REPO_ROOT: ${{ github.workspace }} + REPO_PATH: "" + KBUILD_OUTPUT: kbuild-output/ + steps: + - uses: actions/checkout@v3 + # We fetch an actual bit of history here to facilitate incremental + # builds (which may check out some earlier upstream change). + with: + fetch-depth: 50 + - if: ${{ github.repository == 'kernel-patches/vmtest' }} + name: Download bpf-next tree + uses: libbpf/ci/get-linux-source@master + with: + dest: '.kernel' + - if: ${{ github.repository == 'kernel-patches/vmtest' }} + name: Move linux source in place + shell: bash + run: | + rm -rf .kernel/.git + cp -rf .kernel/. . + rm -rf .kernel + - name: Get commit meta-data + id: get-commit-metadata + shell: bash + run: | + if [ ${{ github.event_name }} = 'push' ]; then + branch="${{ github.ref_name }}" + echo "branch=${branch}" >> "${GITHUB_OUTPUT}" + else + branch="${{ github.base_ref }}" + echo "branch=${branch}" >> "${GITHUB_OUTPUT}" + fi + + upstream=$(echo "${branch}" | sed 's@_base$@@') + commit="$( + git rev-parse "origin/${upstream}" &> /dev/null \ + || ( + git fetch --quiet --prune --no-tags --depth=1 --no-recurse-submodules origin +refs/heads/${upstream}:refs/remotes/origin/${upstream} \ + && git rev-parse "origin/${upstream}" + ) + )" + + echo "timestamp=$(TZ=utc git show --format='%cd' --no-patch --date=iso-strict-local ${commit})" >> "${GITHUB_OUTPUT}" + echo "commit=${commit}" >> "${GITHUB_OUTPUT}" + echo "Most recent upstream commit is ${commit}" + - name: Pull recent KBUILD_OUTPUT contents + uses: actions/cache@v3 + with: + path: ${{ env.KBUILD_OUTPUT }} + key: kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}-${{ steps.get-commit-metadata.outputs.timestamp }}-${{ steps.get-commit-metadata.outputs.commit }} + restore-keys: | + kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}-${{ steps.get-commit-metadata.outputs.timestamp }}- + kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}- + kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}- + - name: Prepare incremental build + shell: bash + run: | + set -e -u + + # $1 - the SHA-1 to fetch and check out + fetch_and_checkout() { + local build_base_sha="${1}" + + # If cached artifacts became stale for one reason or another, we + # may not have the build base SHA available. Fetch it and retry. + git fetch origin "${build_base_sha}" && git checkout --quiet "${build_base_sha}" + } + + # $1 - value of KBUILD_OUTPUT + clear_cache_artifacts() { + local kbuild_output="${1}" + echo "Unable to find earlier upstream ref. Discarding KBUILD_OUTPUT contents..." + rm --recursive --force "${kbuild_output}" + mkdir "${kbuild_output}" + false + } + + # $1 - value of KBUILD_OUTPUT + # $2 - current time in ISO 8601 format + restore_source_code_times() { + local kbuild_output="${1}" + local current_time="${2}" + local src_time="$(date --iso-8601=ns --date="${current_time} - 2 minutes")" + local obj_time="$(date --iso-8601=ns --date="${current_time} - 1 minute")" + + git ls-files | xargs --max-args=10000 touch -m --no-create --date="${src_time}" + find "${kbuild_output}" -type f | xargs --max-args=10000 touch -m --no-create --date="${obj_time}" + git checkout --quiet - + echo "Adjusted src and obj time stamps relative to system time" + } + + mkdir --parents "${KBUILD_OUTPUT}" + current_time="$(date --iso-8601=ns)" + + if [ -f "${KBUILD_OUTPUT}/.build-base-sha" ]; then + build_base_sha="$(cat "${KBUILD_OUTPUT}/.build-base-sha")" + echo "Setting up base build state for ${build_base_sha}" + + ( + git checkout --quiet "${build_base_sha}" \ + || fetch_and_checkout "${build_base_sha}" \ + || clear_cache_artifacts "${KBUILD_OUTPUT}" + ) && restore_source_code_times "${KBUILD_OUTPUT}" "${current_time}" + else + echo "No previous build data found" + fi + + echo -n "${{ steps.get-commit-metadata.outputs.commit }}" > "${KBUILD_OUTPUT}/.build-base-sha" + - uses: libbpf/ci/patch-kernel@master + with: + patches-root: '${{ github.workspace }}/ci/diffs' + repo-root: '${{ github.workspace }}' + - name: Setup build environment + uses: libbpf/ci/setup-build-env@llvm-version + with: + llvm-version: ${{ matrix.llvm-version }} + - name: Build kernel image + uses: libbpf/ci/build-linux@llvm-version + with: + arch: ${{ matrix.arch }} + toolchain: ${{ matrix.toolchain }} + kbuild-output: ${{ env.KBUILD_OUTPUT }} + max-make-jobs: 32 + llvm-version: ${{ matrix.llvm-version }} + - if: ${{ github.event_name != 'push' }} + name: Build selftests + uses: libbpf/ci/build-selftests@llvm-version + with: + toolchain: ${{ matrix.toolchain }} + kbuild-output: ${{ env.KBUILD_OUTPUT }} + max-make-jobs: 32 + llvm-version: ${{ matrix.llvm-version }} + - if: ${{ github.event_name != 'push' }} + name: Build samples + uses: libbpf/ci/build-samples@llvm-version + with: + toolchain: ${{ matrix.toolchain }} + kbuild-output: ${{ env.KBUILD_OUTPUT }} + max-make-jobs: 32 + llvm-version: ${{ matrix.llvm-version }} + - if: ${{ github.event_name != 'push' }} + name: Tar artifacts + run: | + # Remove intermediate object files that we have no use for. Ideally + # we'd just exclude them from tar below, but it does not provide + # options to express the precise constraints. + find selftests/ -name "*.o" -a ! -name "*.bpf.o" -print0 | \ + xargs --null --max-args=10000 rm + + # Strip debug information, which is excessively large (consuming + # bandwidth) while not actually being used (the kernel does not use + # DWARF to symbolize stacktraces). + strip --strip-debug "${KBUILD_OUTPUT}"/vmlinux + + file_list="" + if [ "${{ github.repository }}" == "kernel-patches/vmtest" ]; then + # Package up a bunch of additional infrastructure to support running + # 'make kernelrelease' and bpf tool checks later on. + file_list="$(find . -iname Makefile | xargs) \ + scripts/ \ + tools/testing/selftests/bpf/ \ + tools/include/ \ + tools/bpf/bpftool/"; + fi + # zstd is installed by default in the runner images. + tar -cf - \ + "${KBUILD_OUTPUT}"/.config \ + "${KBUILD_OUTPUT}"/$(KBUILD_OUTPUT="${KBUILD_OUTPUT}" make -s image_name) \ + "${KBUILD_OUTPUT}"/include/config/auto.conf \ + "${KBUILD_OUTPUT}"/include/generated/autoconf.h \ + "${KBUILD_OUTPUT}"/vmlinux \ + ${file_list} \ + --exclude '*.cmd' \ + --exclude '*.d' \ + --exclude '*.h' \ + --exclude '*.output' \ + selftests/bpf/ | zstd -T0 -19 -o vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst + - if: ${{ github.event_name != 'push' }} + name: Remove KBUILD_OUTPUT contents + shell: bash + run: | + # Remove $KBUILD_OUTPUT to prevent cache creation for pull requests. + # Only on pushed changes are build artifacts actually cached, because + # of github.com/actions/cache's cache isolation logic. + rm -rf "${KBUILD_OUTPUT}" + - if: ${{ github.event_name != 'push' }} + uses: actions/upload-artifact@v3 + with: + name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }} + if-no-files-found: error + path: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst + test: + if: ${{ github.event_name != 'push' }} + name: ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }} + needs: [set-matrix, build] + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.set-matrix.outputs.test-matrix) }} + runs-on: ${{ matrix.runs_on }} + timeout-minutes: 100 + env: + KERNEL: ${{ matrix.kernel }} + REPO_ROOT: ${{ github.workspace }} + REPO_PATH: "" + KBUILD_OUTPUT: kbuild-output/ + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + with: + name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }} + path: . + - name: Untar artifacts + # zstd is installed by default in the runner images. + run: zstd -d -T0 vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst --stdout | tar -xf - + - name: Prepare rootfs + uses: libbpf/ci/prepare-rootfs@master + with: + project-name: 'libbpf' + arch: ${{ matrix.arch }} + kernel: ${{ matrix.kernel }} + kernel-root: '.' + kbuild-output: ${{ env.KBUILD_OUTPUT }} + image-output: '/tmp/root.img' + test: ${{ matrix.test }} + - name: Run selftests + uses: libbpf/ci/run-qemu@master + continue-on-error: ${{ matrix.continue_on_error }} + timeout-minutes: ${{ matrix.timeout_minutes }} + with: + arch: ${{ matrix.arch}} + img: '/tmp/root.img' + vmlinuz: '${{ github.workspace }}/vmlinuz' + kernel-root: '.' + max-cpu: 8 diff --git a/README b/README index 669ac7c322927..e69de29bb2d1d 100644 --- a/README +++ b/README @@ -1,18 +0,0 @@ -Linux kernel -============ - -There are several guides for kernel developers and users. These guides can -be rendered in a number of formats, like HTML and PDF. Please read -Documentation/admin-guide/README.rst first. - -In order to build the documentation, use ``make htmldocs`` or -``make pdfdocs``. The formatted documentation can also be read online at: - - https://www.kernel.org/doc/html/latest/ - -There are various text files in the Documentation/ subdirectory, -several of them using the Restructured Text markup notation. - -Please read the Documentation/process/changes.rst file, as it contains the -requirements for building and running the kernel, and information about -the problems which may result by upgrading your kernel. diff --git a/ci/diffs/.keep b/ci/diffs/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch b/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch new file mode 100644 index 0000000000000..3d8ea87a1dbda --- /dev/null +++ b/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch @@ -0,0 +1,30 @@ +From cb50dac513235c6996b9d26f959886ba1d7be607 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Fri, 6 Jan 2023 13:59:26 +0200 +Subject: [PATCH] Revert "arch: fix broken BuildID for arm64 and riscv" + +This reverts commit 99cb0d917ffa1ab628bb67364ca9b162c07699b1. +--- + include/asm-generic/vmlinux.lds.h | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index 659bf3b31c91..a94219e9916f 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -891,12 +891,7 @@ + #define PRINTK_INDEX + #endif + +-/* +- * Discard .note.GNU-stack, which is emitted as PROGBITS by the compiler. +- * Otherwise, the type of .notes section would become PROGBITS instead of NOTES. +- */ + #define NOTES \ +- /DISCARD/ : { *(.note.GNU-stack) } \ + .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ + BOUNDED_SECTION_BY(.note.*, _notes) \ + } NOTES_HEADERS \ +-- +2.39.0 + diff --git a/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch b/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch new file mode 100644 index 0000000000000..4fcc0146effc3 --- /dev/null +++ b/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch @@ -0,0 +1,41 @@ +From 74bc3a5acc82f020d2e126f56c535d02d1e74e37 Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Fri, 20 Jan 2023 13:21:48 +0100 +Subject: [PATCH] bpf: Add missing btf_put to register_btf_id_dtor_kfuncs + +We take the BTF reference before we register dtors and we need +to put it back when it's done. + +We probably won't se a problem with kernel BTF, but module BTF +would stay loaded (because of the extra ref) even when its module +is removed. + +Cc: Kumar Kartikeya Dwivedi +Fixes: 5ce937d613a4 ("bpf: Populate pairs of btf_id and destructor kfunc in btf") +Acked-by: Kumar Kartikeya Dwivedi +Signed-off-by: Jiri Olsa +Link: https://lore.kernel.org/r/20230120122148.1522359-1-jolsa@kernel.org +Signed-off-by: Alexei Starovoitov +--- + kernel/bpf/btf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c +index f7dd8af06413..b7017cae6fd1 100644 +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -7782,9 +7782,9 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c + + sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); + +- return 0; + end: +- btf_free_dtor_kfunc_tab(btf); ++ if (ret) ++ btf_free_dtor_kfunc_tab(btf); + btf_put(btf); + return ret; + } +-- +2.39.1 + diff --git a/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch b/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch new file mode 100644 index 0000000000000..669bde57d04f0 --- /dev/null +++ b/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch @@ -0,0 +1,45 @@ +From 345d24a91c79f408e355c8b7e873ccde0f097eea Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Wed, 22 Feb 2023 10:50:48 +0800 +Subject: [PATCH] bpf: Include missing nospec.h to avoid build error. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Commit 74e19ef0ff80 ("uaccess: Add speculation barrier to copy_from_user()") +defines a default barrier_nospec() and removes the +such a build error: + + CC kernel/bpf/core.o +kernel/bpf/core.c: In function ‘___bpf_prog_run’: +kernel/bpf/core.c:1913:3: error: implicit declaration of function ‘barrier_nospec’; did you mean ‘barrier_data’? [-Werror=implicit-function-declaration] + barrier_nospec(); + ^~~~~~~~~~~~~~ + barrier_data +cc1: some warnings being treated as errors + +So include nospec.h to avoid the build error. + +Fixes: 74e19ef0ff80 ("uaccess: Add speculation barrier to copy_from_user()") +Signed-off-by: Huacai Chen +Link: https://lore.kernel.org/r/20230222025048.3677315-1-chenhuacai@loongson.cn +Signed-off-by: Alexei Starovoitov +--- + kernel/bpf/core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 933869983e2a..b297e9f60ca1 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + #include + #include + +-- +2.30.2 + diff --git a/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch b/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch new file mode 100644 index 0000000000000..bfb7de10b4793 --- /dev/null +++ b/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch @@ -0,0 +1,45 @@ +From 0dd340f3549863e1289a872057743c9a177d1e3f Mon Sep 17 00:00:00 2001 +From: Pu Lehui +Date: Wed, 2 Nov 2022 16:40:34 +0800 +Subject: [PATCH 1/2] bpftool: Fix NULL pointer dereference when pin {PROG, + MAP, LINK} without FILE + +When using bpftool to pin {PROG, MAP, LINK} without FILE, +segmentation fault will occur. The reson is that the lack +of FILE will cause strlen to trigger NULL pointer dereference. +The corresponding stacktrace is shown below: + +do_pin + do_pin_any + do_pin_fd + mount_bpffs_for_pin + strlen(name) <- NULL pointer dereference + +Fix it by adding validation to the common process. + +Fixes: 75a1e792c335 ("tools: bpftool: Allow all prog/map handles for pinning objects") +Signed-off-by: Pu Lehui +Signed-off-by: Daniel Borkmann +Reviewed-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20221102084034.3342995-1-pulehui@huaweicloud.com +--- + tools/bpf/bpftool/common.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c +index e4d33bc8bbbf..653c130a0aaa 100644 +--- a/tools/bpf/bpftool/common.c ++++ b/tools/bpf/bpftool/common.c +@@ -302,6 +302,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) + int err; + int fd; + ++ if (!REQ_ARGS(3)) ++ return -EINVAL; ++ + fd = get_fd(&argc, &argv); + if (fd < 0) + return fd; +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch b/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch new file mode 100644 index 0000000000000..1797384c1b5c8 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch @@ -0,0 +1,207 @@ +From ec99451f0a488e50aaf0ce467db8771411edc407 Mon Sep 17 00:00:00 2001 +From: Manu Bretelle +Date: Fri, 21 Oct 2022 14:06:59 -0700 +Subject: [PATCH] selftests/bpf: Add config.aarch64 + +config.aarch64, similarly to config.{s390x,x86_64} is a config enabling +building a kernel on aarch64 to be used in bpf's +selftests/kernel-patches CI. + +Signed-off-by: Manu Bretelle +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20221021210701.728135-3-chantr4@gmail.com +--- + tools/testing/selftests/bpf/config.aarch64 | 181 +++++++++++++++++++++ + 1 file changed, 181 insertions(+) + create mode 100644 tools/testing/selftests/bpf/config.aarch64 + +diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 +new file mode 100644 +index 000000000000..1f0437644186 +--- /dev/null ++++ b/tools/testing/selftests/bpf/config.aarch64 +@@ -0,0 +1,181 @@ ++CONFIG_9P_FS=y ++CONFIG_ARCH_VEXPRESS=y ++CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y ++CONFIG_ARM_SMMU_V3=y ++CONFIG_ATA=y ++CONFIG_AUDIT=y ++CONFIG_BINFMT_MISC=y ++CONFIG_BLK_CGROUP=y ++CONFIG_BLK_DEV_BSGLIB=y ++CONFIG_BLK_DEV_INITRD=y ++CONFIG_BLK_DEV_IO_TRACE=y ++CONFIG_BLK_DEV_RAM=y ++CONFIG_BLK_DEV_SD=y ++CONFIG_BONDING=y ++CONFIG_BPFILTER=y ++CONFIG_BPF_JIT_ALWAYS_ON=y ++CONFIG_BPF_JIT_DEFAULT_ON=y ++CONFIG_BPF_PRELOAD_UMD=y ++CONFIG_BPF_PRELOAD=y ++CONFIG_BRIDGE=m ++CONFIG_CGROUP_CPUACCT=y ++CONFIG_CGROUP_DEVICE=y ++CONFIG_CGROUP_FREEZER=y ++CONFIG_CGROUP_HUGETLB=y ++CONFIG_CGROUP_NET_CLASSID=y ++CONFIG_CGROUP_PERF=y ++CONFIG_CGROUP_PIDS=y ++CONFIG_CGROUP_SCHED=y ++CONFIG_CGROUPS=y ++CONFIG_CHECKPOINT_RESTORE=y ++CONFIG_CHR_DEV_SG=y ++CONFIG_COMPAT=y ++CONFIG_CPUSETS=y ++CONFIG_CRASH_DUMP=y ++CONFIG_CRYPTO_USER_API_RNG=y ++CONFIG_CRYPTO_USER_API_SKCIPHER=y ++CONFIG_DEBUG_ATOMIC_SLEEP=y ++CONFIG_DEBUG_INFO_BTF=y ++CONFIG_DEBUG_INFO_DWARF4=y ++CONFIG_DEBUG_LIST=y ++CONFIG_DEBUG_LOCKDEP=y ++CONFIG_DEBUG_NOTIFIERS=y ++CONFIG_DEBUG_PAGEALLOC=y ++CONFIG_DEBUG_SECTION_MISMATCH=y ++CONFIG_DEBUG_SG=y ++CONFIG_DETECT_HUNG_TASK=y ++CONFIG_DEVTMPFS_MOUNT=y ++CONFIG_DEVTMPFS=y ++CONFIG_DRM_VIRTIO_GPU=y ++CONFIG_DRM=y ++CONFIG_DUMMY=y ++CONFIG_EXPERT=y ++CONFIG_EXT4_FS_POSIX_ACL=y ++CONFIG_EXT4_FS_SECURITY=y ++CONFIG_EXT4_FS=y ++CONFIG_FANOTIFY=y ++CONFIG_FB=y ++CONFIG_FUNCTION_PROFILER=y ++CONFIG_FUSE_FS=y ++CONFIG_FW_CFG_SYSFS_CMDLINE=y ++CONFIG_FW_CFG_SYSFS=y ++CONFIG_GDB_SCRIPTS=y ++CONFIG_HAVE_EBPF_JIT=y ++CONFIG_HAVE_KPROBES_ON_FTRACE=y ++CONFIG_HAVE_KPROBES=y ++CONFIG_HAVE_KRETPROBES=y ++CONFIG_HEADERS_INSTALL=y ++CONFIG_HIGH_RES_TIMERS=y ++CONFIG_HUGETLBFS=y ++CONFIG_HW_RANDOM_VIRTIO=y ++CONFIG_HW_RANDOM=y ++CONFIG_HZ_100=y ++CONFIG_IDLE_PAGE_TRACKING=y ++CONFIG_IKHEADERS=y ++CONFIG_INET6_ESP=y ++CONFIG_INET_ESP=y ++CONFIG_INET=y ++CONFIG_INPUT_EVDEV=y ++CONFIG_IP_ADVANCED_ROUTER=y ++CONFIG_IP_MULTICAST=y ++CONFIG_IP_MULTIPLE_TABLES=y ++CONFIG_IP_NF_IPTABLES=y ++CONFIG_IPV6_SEG6_LWTUNNEL=y ++CONFIG_IPVLAN=y ++CONFIG_JUMP_LABEL=y ++CONFIG_KERNEL_UNCOMPRESSED=y ++CONFIG_KPROBES_ON_FTRACE=y ++CONFIG_KPROBES=y ++CONFIG_KRETPROBES=y ++CONFIG_KSM=y ++CONFIG_LATENCYTOP=y ++CONFIG_LIVEPATCH=y ++CONFIG_LOCK_STAT=y ++CONFIG_MACVLAN=y ++CONFIG_MACVTAP=y ++CONFIG_MAGIC_SYSRQ=y ++CONFIG_MAILBOX=y ++CONFIG_MEMCG=y ++CONFIG_MEMORY_HOTPLUG=y ++CONFIG_MEMORY_HOTREMOVE=y ++CONFIG_NAMESPACES=y ++CONFIG_NET_9P_VIRTIO=y ++CONFIG_NET_9P=y ++CONFIG_NET_ACT_BPF=y ++CONFIG_NET_ACT_GACT=y ++CONFIG_NETDEVICES=y ++CONFIG_NETFILTER_XT_MATCH_BPF=y ++CONFIG_NETFILTER_XT_TARGET_MARK=y ++CONFIG_NET_KEY=y ++CONFIG_NET_SCH_FQ=y ++CONFIG_NET_VRF=y ++CONFIG_NET=y ++CONFIG_NF_TABLES=y ++CONFIG_NLMON=y ++CONFIG_NO_HZ_IDLE=y ++CONFIG_NR_CPUS=256 ++CONFIG_NUMA=y ++CONFIG_OVERLAY_FS=y ++CONFIG_PACKET_DIAG=y ++CONFIG_PACKET=y ++CONFIG_PANIC_ON_OOPS=y ++CONFIG_PARTITION_ADVANCED=y ++CONFIG_PCI_HOST_GENERIC=y ++CONFIG_PCI=y ++CONFIG_PL320_MBOX=y ++CONFIG_POSIX_MQUEUE=y ++CONFIG_PROC_KCORE=y ++CONFIG_PROFILING=y ++CONFIG_PROVE_LOCKING=y ++CONFIG_PTDUMP_DEBUGFS=y ++CONFIG_RC_DEVICES=y ++CONFIG_RC_LOOPBACK=y ++CONFIG_RTC_CLASS=y ++CONFIG_RTC_DRV_PL031=y ++CONFIG_RT_GROUP_SCHED=y ++CONFIG_SAMPLE_SECCOMP=y ++CONFIG_SAMPLES=y ++CONFIG_SCHED_AUTOGROUP=y ++CONFIG_SCHED_TRACER=y ++CONFIG_SCSI_CONSTANTS=y ++CONFIG_SCSI_LOGGING=y ++CONFIG_SCSI_SCAN_ASYNC=y ++CONFIG_SCSI_VIRTIO=y ++CONFIG_SCSI=y ++CONFIG_SECURITY_NETWORK=y ++CONFIG_SERIAL_AMBA_PL011_CONSOLE=y ++CONFIG_SERIAL_AMBA_PL011=y ++CONFIG_STACK_TRACER=y ++CONFIG_STATIC_KEYS_SELFTEST=y ++CONFIG_SYSVIPC=y ++CONFIG_TASK_DELAY_ACCT=y ++CONFIG_TASK_IO_ACCOUNTING=y ++CONFIG_TASKSTATS=y ++CONFIG_TASK_XACCT=y ++CONFIG_TCG_TIS=y ++CONFIG_TCG_TPM=y ++CONFIG_TCP_CONG_ADVANCED=y ++CONFIG_TCP_CONG_DCTCP=y ++CONFIG_TLS=y ++CONFIG_TMPFS_POSIX_ACL=y ++CONFIG_TMPFS=y ++CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y ++CONFIG_TRANSPARENT_HUGEPAGE=y ++CONFIG_TUN=y ++CONFIG_UNIX=y ++CONFIG_UPROBES=y ++CONFIG_USELIB=y ++CONFIG_USER_NS=y ++CONFIG_VETH=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_CONSOLE=y ++CONFIG_VIRTIO_FS=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VLAN_8021Q=y ++CONFIG_VSOCKETS=y ++CONFIG_XFRM_USER=y +-- +2.38.1 + diff --git a/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch b/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch new file mode 100644 index 0000000000000..11d5233552b07 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch @@ -0,0 +1,43 @@ +From fa95252a62bc120fb1f939c46991280ba1375196 Mon Sep 17 00:00:00 2001 +From: Song Liu +Date: Thu, 2 Mar 2023 13:49:44 -0800 +Subject: [PATCH] selftests/bpf: Adjust expected error message for + test_global_func10.c + +For test programs that are expected to be failed verifier, we use +__failure __msg(...) to specify the expected error message. However, the +error message may change slightly among different versions of llvm. For +example, in [1], the program compiled by llvm-17 gets + + "invalid indirect access to stack ..." + +but the same program compile by llvm-16 gets + + "invalid indirect read from stack ..." + +To avoid such issues, only compares "invalid indirect" part of the error +message for test_global_func10.c. + +[1] https://github.com/kernel-patches/bpf/actions/runs/4288572350/jobs/7533052993 + +Signed-off-by: Song Liu +--- + tools/testing/selftests/bpf/progs/test_global_func10.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c +index 98327bdbbfd2..7a591d946027 100644 +--- a/tools/testing/selftests/bpf/progs/test_global_func10.c ++++ b/tools/testing/selftests/bpf/progs/test_global_func10.c +@@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big) + } + + SEC("cgroup_skb/ingress") +-__failure __msg("invalid indirect read from stack") ++__failure __msg("invalid indirect") + int global_func10(struct __sk_buff *skb) + { + const struct Small small = {.x = skb->len }; +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch b/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch new file mode 100644 index 0000000000000..14a62c2d5d6c8 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch @@ -0,0 +1,50 @@ +From 11e456cae91e9044cb12c2b037b52c9b268925f7 Mon Sep 17 00:00:00 2001 +From: Rong Tao +Date: Fri, 24 Feb 2023 23:10:02 +0800 +Subject: [PATCH bpf] selftests/bpf: Fix compilation errors: Assign a value to + a constant + +Commit bc292ab00f6c("mm: introduce vma->vm_flags wrapper functions") +turns the vm_flags into a const variable. + +Added bpf_find_vma test in commit f108662b27c9("selftests/bpf: Add tests +for bpf_find_vma") to assign values to variables that declare const in +find_vma_fail1.c programs, which is an error to the compiler and does not +test BPF verifiers. It is better to replace 'const vm_flags_t vm_flags' +with 'unsigned long vm_start' for testing. + + $ make -C tools/testing/selftests/bpf/ -j8 + ... + progs/find_vma_fail1.c:16:16: error: cannot assign to non-static data + member 'vm_flags' with const-qualified type 'const vm_flags_t' (aka + 'const unsigned long') + vma->vm_flags |= 0x55; + ~~~~~~~~~~~~~ ^ + ../tools/testing/selftests/bpf/tools/include/vmlinux.h:1898:20: + note: non-static data member 'vm_flags' declared const here + const vm_flags_t vm_flags; + ~~~~~~~~~~~`~~~~~~^~~~~~~~ + +Signed-off-by: Rong Tao +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/tencent_CB281722B3C1BD504C16CDE586CACC2BE706@qq.com +--- + tools/testing/selftests/bpf/progs/find_vma_fail1.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c +index b3b326b8e2d1..47d5dedff554 100644 +--- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c ++++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c +@@ -13,7 +13,7 @@ static long write_vma(struct task_struct *task, struct vm_area_struct *vma, + struct callback_ctx *data) + { + /* writing to vma, which is illegal */ +- vma->vm_flags |= 0x55; ++ vma->vm_start = 0xffffffffff600000; + + return 0; + } +-- +2.39.0 + diff --git a/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch b/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch new file mode 100644 index 0000000000000..41fd6e38e8678 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch @@ -0,0 +1,36 @@ +From: Ilya Leoshkevich +Subject: [PATCH bpf-next 07/24] selftests/bpf: Fix decap_sanity_ns cleanup +Date: Wed, 25 Jan 2023 22:38:00 +0100 + +decap_sanity prints the following on the 1st run: + + decap_sanity: sh: 1: Syntax error: Bad fd number + +and the following on the 2nd run: + + Cannot create namespace file "/run/netns/decap_sanity_ns": File exists + +The problem is that the cleanup command has a typo and does nothing. +Fix the typo. + +Signed-off-by: Ilya Leoshkevich +--- + tools/testing/selftests/bpf/prog_tests/decap_sanity.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +index 0b2f73b88c53..2853883b7cbb 100644 +--- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c ++++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +@@ -80,6 +80,6 @@ void test_decap_sanity(void) + bpf_tc_hook_destroy(&qdisc_hook); + close_netns(nstoken); + } +- system("ip netns del " NS_TEST " >& /dev/null"); ++ system("ip netns del " NS_TEST " &> /dev/null"); + decap_sanity__destroy(skel); + } +-- +2.39.1 + + diff --git a/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch b/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch new file mode 100644 index 0000000000000..7d3a35de2a636 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch @@ -0,0 +1,118 @@ +From 94d52a19180726ee8ddc70bea75d6605e1dd6029 Mon Sep 17 00:00:00 2001 +From: Manu Bretelle +Date: Fri, 21 Oct 2022 14:07:01 -0700 +Subject: [PATCH] selftests/bpf: Initial DENYLIST for aarch64 + +Those tests are currently failing on aarch64, ignore them until they are +individually addressed. + +Using this deny list, vmtest.sh ran successfully using + +LLVM_STRIP=llvm-strip-16 CLANG=clang-16 \ + tools/testing/selftests/bpf/vmtest.sh -- \ + ./test_progs -d \ + \"$(cat tools/testing/selftests/bpf/DENYLIST{,.aarch64} \ + | cut -d'#' -f1 \ + | sed -e 's/^[[:space:]]*//' \ + -e 's/[[:space:]]*$//' \ + | tr -s '\n' ','\ + )\" + +Signed-off-by: Manu Bretelle +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20221021210701.728135-5-chantr4@gmail.com +--- + tools/testing/selftests/bpf/DENYLIST.aarch64 | 81 ++++++++++++++++++++ + 1 file changed, 81 insertions(+) + create mode 100644 tools/testing/selftests/bpf/DENYLIST.aarch64 + +diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 +new file mode 100644 +index 000000000000..09416d5d2e33 +--- /dev/null ++++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 +@@ -0,0 +1,81 @@ ++bloom_filter_map # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22 ++bpf_cookie/lsm ++bpf_cookie/multi_kprobe_attach_api ++bpf_cookie/multi_kprobe_link_api ++bpf_cookie/trampoline ++bpf_loop/check_callback_fn_stop # link unexpected error: -524 ++bpf_loop/check_invalid_flags ++bpf_loop/check_nested_calls ++bpf_loop/check_non_constant_callback ++bpf_loop/check_nr_loops ++bpf_loop/check_null_callback_ctx ++bpf_loop/check_stack ++bpf_mod_race # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524) ++bpf_tcp_ca/dctcp_fallback ++btf_dump/btf_dump: var_data # find type id unexpected find type id: actual -2 < expected 0 ++cgroup_hierarchical_stats # attach unexpected error: -524 (errno 524) ++d_path/basic # setup attach failed: -524 ++deny_namespace # attach unexpected error: -524 (errno 524) ++fentry_fexit # fentry_attach unexpected error: -1 (errno 524) ++fentry_test # fentry_attach unexpected error: -1 (errno 524) ++fexit_sleep # fexit_attach fexit attach failed: -1 ++fexit_stress # fexit attach unexpected fexit attach: actual -524 < expected 0 ++fexit_test # fexit_attach unexpected error: -1 (errno 524) ++get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline) ++get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline) ++htab_update/reenter_update ++kfree_skb # attach fentry unexpected error: -524 (trampoline) ++kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF ++kfunc_call/subprog_lskel # skel unexpected error: -2 ++kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22 ++kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 ++kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 ++kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 ++kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 ++kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 ++kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 ++kprobe_multi_test/skel_api # kprobe_multi__attach unexpected error: -524 (errno 524) ++ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF ++ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2 ++libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524) ++lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524) ++lru_bug # lru_bug__attach unexpected error: -524 (errno 524) ++modify_return # modify_return__attach failed unexpected error: -524 (errno 524) ++module_attach # skel_attach skeleton attach failed: -524 ++mptcp/base # run_test mptcp unexpected error: -524 (errno 524) ++netcnt # packets unexpected packets: actual 10001 != expected 10000 ++recursion # skel_attach unexpected error: -524 (errno 524) ++ringbuf # skel_attach skeleton attachment failed: -1 ++setget_sockopt # attach_cgroup unexpected error: -524 ++sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (errno 524) ++skc_to_unix_sock # could not attach BPF object unexpected error: -524 (errno 524) ++socket_cookie # prog_attach unexpected error: -524 ++stacktrace_build_id # compare_stack_ips stackmap vs. stack_amap err -1 errno 2 ++task_local_storage/exit_creds # skel_attach unexpected error: -524 (errno 524) ++task_local_storage/recursion # skel_attach unexpected error: -524 (errno 524) ++test_bprm_opts # attach attach failed: -524 ++test_ima # attach attach failed: -524 ++test_local_storage # attach lsm attach failed: -524 ++test_lsm # test_lsm_first_attach unexpected error: -524 (errno 524) ++test_overhead # attach_fentry unexpected error: -524 ++timer # timer unexpected error: -524 (errno 524) ++timer_crash # timer_crash__attach unexpected error: -524 (errno 524) ++timer_mim # timer_mim unexpected error: -524 (errno 524) ++trace_printk # trace_printk__attach unexpected error: -1 (errno 524) ++trace_vprintk # trace_vprintk__attach unexpected error: -1 (errno 524) ++tracing_struct # tracing_struct__attach unexpected error: -524 (errno 524) ++trampoline_count # attach_prog unexpected error: -524 ++unpriv_bpf_disabled # skel_attach unexpected error: -524 (errno 524) ++user_ringbuf/test_user_ringbuf_post_misaligned # misaligned_skel unexpected error: -524 (errno 524) ++user_ringbuf/test_user_ringbuf_post_producer_wrong_offset ++user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz ++user_ringbuf/test_user_ringbuf_basic # ringbuf_basic_skel unexpected error: -524 (errno 524) ++user_ringbuf/test_user_ringbuf_sample_full_ring_buffer ++user_ringbuf/test_user_ringbuf_post_alignment_autoadjust ++user_ringbuf/test_user_ringbuf_overfill ++user_ringbuf/test_user_ringbuf_discards_properly_ignored ++user_ringbuf/test_user_ringbuf_loop ++user_ringbuf/test_user_ringbuf_msg_protocol ++user_ringbuf/test_user_ringbuf_blocking_reserve ++verify_pkcs7_sig # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524) ++vmlinux # skel_attach skeleton attach failed: -524 +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch b/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch new file mode 100644 index 0000000000000..08f2352bc1992 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch @@ -0,0 +1,57 @@ +From 5ed88f81511ce695692f0510ab3ca17eee68eff6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20M=C3=BCller?= +Date: Tue, 25 Oct 2022 23:15:46 +0000 +Subject: [PATCH] selftests/bpf: Panic on hard/soft lockup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When running tests, we should probably accept any help we can get when +it comes to detecting issues early or making them more debuggable. We +have seen a few cases where a test_progs_noalu32 run, for example, +encountered a soft lockup and stopped making progress. It was only +interrupted once we hit the overall test timeout [0]. We can not and do +not want to necessarily rely on test timeouts, because those rely on +infrastructure provided by the environment we run in (and which is not +present in tools/testing/selftests/bpf/vmtest.sh, for example). +To that end, let's enable panics on soft as well as hard lockups to fail +fast should we encounter one. That's happening in the configuration +indented to be used for selftests (including when using vmtest.sh or +when running in BPF CI). + +[0] https://github.com/kernel-patches/bpf/runs/7844499997 + +Signed-off-by: Daniel Müller +Link: https://lore.kernel.org/r/20221025231546.811766-1-deso@posteo.net +Signed-off-by: Alexei Starovoitov +--- + tools/testing/selftests/bpf/config | 2 ++ + tools/testing/selftests/bpf/config.x86_64 | 1 - + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config +index 921356..7a99a6 100644 +--- a/tools/testing/selftests/bpf/config ++++ b/tools/testing/selftests/bpf/config +@@ -1,4 +1,6 @@ + CONFIG_BLK_DEV_LOOP=y ++CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y ++CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y + CONFIG_BPF=y + CONFIG_BPF_EVENTS=y + CONFIG_BPF_JIT=y +diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 +index 21ce5e..dd97d6 100644 +--- a/tools/testing/selftests/bpf/config.x86_64 ++++ b/tools/testing/selftests/bpf/config.x86_64 +@@ -18,7 +18,6 @@ CONFIG_BLK_DEV_RAM=y + CONFIG_BLK_DEV_RAM_SIZE=16384 + CONFIG_BLK_DEV_THROTTLING=y + CONFIG_BONDING=y +-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y + CONFIG_BOOTTIME_TRACING=y + CONFIG_BPF_JIT_ALWAYS_ON=y + CONFIG_BPF_KPROBE_OVERRIDE=y +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch b/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch new file mode 100644 index 0000000000000..e1e5f01a59930 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch @@ -0,0 +1,77 @@ +From de9c8d848d90cf2e53aced50b350827442ca5a4f Mon Sep 17 00:00:00 2001 +From: Martin KaFai Lau +Date: Wed, 12 Oct 2022 15:12:35 -0700 +Subject: [PATCH] selftests/bpf: S/iptables/iptables-legacy/ in the bpf_nf and + xdp_synproxy test + +The recent vm image in CI has reported error in selftests that use +the iptables command. Manu Bretelle has pointed out the difference +in the recent vm image that the iptables is sym-linked to the iptables-nft. +With this knowledge, I can also reproduce the CI error by manually running +with the 'iptables-nft'. + +This patch is to replace the iptables command with iptables-legacy +to unblock the CI tests. + +Signed-off-by: Martin KaFai Lau +Signed-off-by: Andrii Nakryiko +Acked-by: David Vernet +Link: https://lore.kernel.org/bpf/20221012221235.3529719-1-martin.lau@linux.dev +--- + tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 6 +++--- + tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +index 8a838ea8bdf3..c8ba4009e4ab 100644 +--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +@@ -49,14 +49,14 @@ static int connect_to_server(int srv_fd) + + static void test_bpf_nf_ct(int mode) + { +- const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; ++ const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; + int srv_fd = -1, client_fd = -1, srv_client_fd = -1; + struct sockaddr_in peer_addr = {}; + struct test_bpf_nf *skel; + int prog_fd, err; + socklen_t len; + u16 srv_port; +- char cmd[64]; ++ char cmd[128]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), +@@ -69,7 +69,7 @@ static void test_bpf_nf_ct(int mode) + + /* Enable connection tracking */ + snprintf(cmd, sizeof(cmd), iptables, "-A"); +- if (!ASSERT_OK(system(cmd), "iptables")) ++ if (!ASSERT_OK(system(cmd), cmd)) + goto end; + + srv_port = (mode == TEST_XDP) ? 5005 : 5006; +diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +index 75550a40e029..c72083885b6d 100644 +--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +@@ -94,12 +94,12 @@ static void test_synproxy(bool xdp) + SYS("sysctl -w net.ipv4.tcp_syncookies=2"); + SYS("sysctl -w net.ipv4.tcp_timestamps=1"); + SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); +- SYS("iptables -t raw -I PREROUTING \ ++ SYS("iptables-legacy -t raw -I PREROUTING \ + -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); +- SYS("iptables -t filter -A INPUT \ ++ SYS("iptables-legacy -t filter -A INPUT \ + -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ + -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); +- SYS("iptables -t filter -A INPUT \ ++ SYS("iptables-legacy -t filter -A INPUT \ + -i tmp1 -m state --state INVALID -j DROP"); + + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch b/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch new file mode 100644 index 0000000000000..b4fc1bb37dbdc --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch @@ -0,0 +1,45 @@ +From e561fc8365da0215f68cfcffb6c309d1d7eb8c2b Mon Sep 17 00:00:00 2001 +From: Song Liu +Date: Tue, 13 Dec 2022 14:05:00 -0800 +Subject: [PATCH bpf-next] selftests/bpf: Select + CONFIG_FUNCTION_ERROR_INJECTION +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +BPF selftests require CONFIG_FUNCTION_ERROR_INJECTION to work. However, +CONFIG_FUNCTION_ERROR_INJECTION is no longer 'y' by default after recent +changes. As a result, we are seeing errors like the following from BPF CI: + + bpf_testmod_test_read() is not modifiable + __x64_sys_setdomainname is not sleepable + __x64_sys_getpgid is not sleepable + +Fix this by explicitly selecting CONFIG_FUNCTION_ERROR_INJECTION in the +selftest config. + +Fixes: a4412fdd49dc ("error-injection: Add prompt for function error injection") +Reported-by: Daniel Müller +Signed-off-by: Song Liu +Signed-off-by: Andrii Nakryiko +Acked-by: Daniel Müller +Link: https://lore.kernel.org/bpf/20221213220500.3427947-1-song@kernel.org +--- + tools/testing/selftests/bpf/config | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config +index 612f699dc4f7..63cd4ab70171 100644 +--- a/tools/testing/selftests/bpf/config ++++ b/tools/testing/selftests/bpf/config +@@ -16,6 +16,7 @@ CONFIG_CRYPTO_USER_API_HASH=y + CONFIG_DYNAMIC_FTRACE=y + CONFIG_FPROBE=y + CONFIG_FTRACE_SYSCALLS=y ++CONFIG_FUNCTION_ERROR_INJECTION=y + CONFIG_FUNCTION_TRACER=y + CONFIG_GENEVE=y + CONFIG_IKCONFIG=y +-- +2.30.2 + diff --git a/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch b/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch new file mode 100644 index 0000000000000..c5f90daa56d3b --- /dev/null +++ b/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch @@ -0,0 +1,44 @@ +Recently, ld.lld moved from '--undefined-version' to +'--no-undefined-version' as the default, which breaks building the vDSO +when CONFIG_X86_SGX is not set: + + ld.lld: error: version script assignment of 'LINUX_2.6' to symbol '__vdso_sgx_enter_enclave' failed: symbol not defined + +__vdso_sgx_enter_enclave is only included in the vDSO when +CONFIG_X86_SGX is set. Only export it if it will be present in the final +object, which clears up the error. + +Link: https://github.com/ClangBuiltLinux/linux/issues/1756 +Signed-off-by: Nathan Chancellor +--- + +It would be nice if this could be picked up for an -rc release but I +won't argue otherwise. + +Alternatively, we could add '--undefined-version' to the vDSO ldflags +but this does not seem unreasonable to me. + + arch/x86/entry/vdso/vdso.lds.S | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S +index 4bf48462fca7..e8c60ae7a7c8 100644 +--- a/arch/x86/entry/vdso/vdso.lds.S ++++ b/arch/x86/entry/vdso/vdso.lds.S +@@ -27,7 +27,9 @@ VERSION { + __vdso_time; + clock_getres; + __vdso_clock_getres; ++#ifdef CONFIG_X86_SGX + __vdso_sgx_enter_enclave; ++#endif + local: *; + }; + } + +base-commit: f0c4d9fc9cc9462659728d168387191387e903cc + +-- +2.38.1 + + diff --git a/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch b/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch new file mode 100644 index 0000000000000..2db04e0b9670c --- /dev/null +++ b/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch @@ -0,0 +1,39 @@ +From 91c614a38376374ff39c4cc678c2c5cd22cbf8fc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20M=C3=BCller?= +Date: Wed, 26 Oct 2022 13:52:28 -0700 +Subject: [PATCH] selftests/bpf: Set CONFIG_BOOTPARAM_HUNG_TASK_PANIC +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +With commit 5ed88f81511ce ("selftests/bpf: Panic on hard/soft lockup") +we enabled the means to panic test runs quickly when they are stuck +because of a hard or soft lockup. What we did not include is the means +to do the same when a hung task is detected. The reasoning there was +that virtualization effects may lead to delays and, hence, spurious +failures. +However, we see the occasional CI timeout when running the test_progs +selftest with internal parallelism enabled (-j) that is not caused by a +hard or soft lockup but due to a hung task. Hence, it makes sense to +enable this detection as well. But let's give it some mileage first +before upstreaming, though, and only include it in BPF CI. + +Signed-off-by: Daniel Müller +--- + tools/testing/selftests/bpf/config | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config +index 7a99a6..6c6821a 100644 +--- a/tools/testing/selftests/bpf/config ++++ b/tools/testing/selftests/bpf/config +@@ -1,5 +1,6 @@ + CONFIG_BLK_DEV_LOOP=y + CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y ++CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y + CONFIG_BPF=y + CONFIG_BPF_EVENTS=y +-- +2.30.2 + diff --git a/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch b/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch new file mode 100644 index 0000000000000..9070b76442dda --- /dev/null +++ b/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch @@ -0,0 +1,104 @@ +From 038fafe1d1c92b8488e5e71ebea819050219dd6f Mon Sep 17 00:00:00 2001 +From: Andrii Nakryiko +Date: Wed, 2 Nov 2022 11:04:17 -0700 +Subject: [PATCH 2/2] tools headers uapi: pull in stddef.h to fix BPF selftests + build in CI + +With recent sync of linux/in.h tools/include headers are now relying on +__DECLARE_FLEX_ARRAY macro, which isn't itself defined inside +tools/include headers anywhere and is instead assumed to be present in +system-wide UAPI header. This breaks isolated environments that don't +have kernel UAPI headers installed system-wide, like BPF CI ([0]). + +To fix this, bring in include/uapi/linux/stddef.h into tools/include. We +can't just copy/paste it, though, it has to be processed with +scripts/headers_install.sh, which has a dependency on scripts/unifdef. +So the full command to (re-)generate stddef.h for inclusion into +tools/include directory is: + + $ make scripts_unifdef && \ + cp $KBUILD_OUTPUT/scripts/unifdef scripts/ && \ + scripts/headers_install.sh include/uapi/linux/stddef.h tools/include/uapi/linux/stddef.h + +This assumes KBUILD_OUTPUT envvar is set and used for out-of-tree builds. + + [0] https://github.com/kernel-patches/bpf/actions/runs/3379432493/jobs/5610982609 + +Cc: Jakub Kicinski +Cc: Arnaldo Carvalho de Melo +Fixes: 036b8f5b8970 ("tools headers uapi: Update linux/in.h copy") +Signed-off-by: Andrii Nakryiko +--- + tools/include/uapi/linux/in.h | 1 + + tools/include/uapi/linux/stddef.h | 47 +++++++++++++++++++++++++++++++ + 2 files changed, 48 insertions(+) + create mode 100644 tools/include/uapi/linux/stddef.h + +diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h +index f243ce665f74..07a4cb149305 100644 +--- a/tools/include/uapi/linux/in.h ++++ b/tools/include/uapi/linux/in.h +@@ -20,6 +20,7 @@ + #define _UAPI_LINUX_IN_H + + #include ++#include + #include + #include + +diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h +new file mode 100644 +index 000000000000..bb6ea517efb5 +--- /dev/null ++++ b/tools/include/uapi/linux/stddef.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _LINUX_STDDEF_H ++#define _LINUX_STDDEF_H ++ ++ ++ ++#ifndef __always_inline ++#define __always_inline __inline__ ++#endif ++ ++/** ++ * __struct_group() - Create a mirrored named and anonyomous struct ++ * ++ * @TAG: The tag name for the named sub-struct (usually empty) ++ * @NAME: The identifier name of the mirrored sub-struct ++ * @ATTRS: Any struct attributes (usually empty) ++ * @MEMBERS: The member declarations for the mirrored structs ++ * ++ * Used to create an anonymous union of two structs with identical layout ++ * and size: one anonymous and one named. The former's members can be used ++ * normally without sub-struct naming, and the latter can be used to ++ * reason about the start, end, and size of the group of struct members. ++ * The named struct can also be explicitly tagged for layer reuse, as well ++ * as both having struct attributes appended. ++ */ ++#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ ++ union { \ ++ struct { MEMBERS } ATTRS; \ ++ struct TAG { MEMBERS } ATTRS NAME; \ ++ } ++ ++/** ++ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union ++ * ++ * @TYPE: The type of each flexible array element ++ * @NAME: The name of the flexible array member ++ * ++ * In order to have a flexible array member in a union or alone in a ++ * struct, it needs to be wrapped in an anonymous struct with at least 1 ++ * named member, but that member can be empty. ++ */ ++#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ ++ struct { \ ++ struct { } __empty_ ## NAME; \ ++ TYPE NAME[]; \ ++ } ++#endif +-- +2.30.2 + diff --git a/ci/vmtest/configs/DENYLIST b/ci/vmtest/configs/DENYLIST new file mode 100644 index 0000000000000..e53b4640180e8 --- /dev/null +++ b/ci/vmtest/configs/DENYLIST @@ -0,0 +1,7 @@ +# TEMPORARY +btf_dump/btf_dump: syntax +kprobe_multi_bench_attach +core_reloc/enum64val +core_reloc/size___diff_sz +core_reloc/type_based___diff_sz +test_ima # All of CI is broken on it following 6.3-rc1 merge diff --git a/ci/vmtest/configs/DENYLIST.aarch64 b/ci/vmtest/configs/DENYLIST.aarch64 new file mode 100644 index 0000000000000..487b19ede4b61 --- /dev/null +++ b/ci/vmtest/configs/DENYLIST.aarch64 @@ -0,0 +1,4 @@ +cgrp_local_storage # libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22 +core_reloc_btfgen # run_core_reloc_tests:FAIL:run_btfgen unexpected error: 32512 (errno 22) +usdt/multispec # usdt_300_bad_attach unexpected pointer: 0x558c63d8f0 +xdp_bonding # whole test suite is very unstable on aarch64 diff --git a/ci/vmtest/configs/DENYLIST.s390x b/ci/vmtest/configs/DENYLIST.s390x new file mode 100644 index 0000000000000..e6829c94bdaae --- /dev/null +++ b/ci/vmtest/configs/DENYLIST.s390x @@ -0,0 +1,5 @@ +deny_namespace # not yet in bpf denylist +tc_redirect/tc_redirect_dtime # very flaky +lru_bug # not yet in bpf-next denylist +usdt/basic # failing verifier due to bounds check after LLVM update +usdt/multispec # same as above diff --git a/ci/vmtest/configs/DENYLIST.x86_64 b/ci/vmtest/configs/DENYLIST.x86_64 new file mode 100644 index 0000000000000..6fc3413daab9f --- /dev/null +++ b/ci/vmtest/configs/DENYLIST.x86_64 @@ -0,0 +1 @@ +netcnt # with kvm enabled, fail with packets unexpected packets: actual 10001 != expected 10000 diff --git a/ci/vmtest/helpers.sh b/ci/vmtest/helpers.sh new file mode 100755 index 0000000000000..c44d0983156d0 --- /dev/null +++ b/ci/vmtest/helpers.sh @@ -0,0 +1,38 @@ +# shellcheck shell=bash + +# $1 - start or end +# $2 - fold identifier, no spaces +# $3 - fold section description +foldable() { + local YELLOW='\033[1;33m' + local NOCOLOR='\033[0m' + if [ $1 = "start" ]; then + line="::group::$2" + if [ ! -z "${3:-}" ]; then + line="$line - ${YELLOW}$3${NOCOLOR}" + fi + else + line="::endgroup::" + fi + echo -e "$line" +} + +__print() { + local TITLE="" + if [[ -n $2 ]]; then + TITLE=" title=$2" + fi + echo "::$1${TITLE}::$3" +} + +# $1 - title +# $2 - message +print_error() { + __print error $1 $2 +} + +# $1 - title +# $2 - message +print_notice() { + __print notice $1 $2 +} diff --git a/ci/vmtest/run_selftests.sh b/ci/vmtest/run_selftests.sh new file mode 100755 index 0000000000000..0c18a331da75b --- /dev/null +++ b/ci/vmtest/run_selftests.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +# run_selftest.sh will run the tests within /${PROJECT_NAME}/selftests/bpf +# If no specific test names are given, all test will be ran, otherwise, it will +# run the test passed as parameters. +# There is 2 ways to pass test names. +# 1) command-line arguments to this script +# 2) a comma-separated list of test names passed as `run_tests` boot parameters. +# test names passed as any of those methods will be ran. + +set -euo pipefail + +source "$(cd "$(dirname "$0")" && pwd)/helpers.sh" + +ARCH=$(uname -m) + +STATUS_FILE=/exitstatus + +declare -a TEST_NAMES=() + +read_lists() { + (for path in "$@"; do + if [[ -s "$path" ]]; then + cat "$path" + fi; + done) | cut -d'#' -f1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | tr -s '\n' ',' +} + +read_test_names() { + foldable start read_test_names "Reading test names from boot parameters and command line arguments" + # Check if test names were passed as boot parameter. + # We expect `run_tests` to be a comma-separated list of test names. + IFS=',' read -r -a test_names_from_boot <<< \ + "$(sed -n 's/.*run_tests=\([^ ]*\).*/\1/p' /proc/cmdline)" + + echo "${#test_names_from_boot[@]} tests extracted from boot parameters: ${test_names_from_boot[*]}" + # Sort and only keep unique test names from both boot params and arguments + # TEST_NAMES will contain a sorted list of uniq tests to be ran. + # Only do this if any of $test_names_from_boot[@] or $@ has elements as + # "printf '%s\0'" will otherwise generate an empty element. + if [[ ${#test_names_from_boot[@]} -gt 0 || $# -gt 0 ]] + then + readarray -t TEST_NAMES < \ + <(printf '%s\0' "${test_names_from_boot[@]}" "$@" | \ + sort --zero-terminated --unique | \ + xargs --null --max-args=1) + fi + foldable end read_test_names +} + +test_progs_helper() { + local selftest="test_progs${1}" + local args="$2" + + foldable start ${selftest} "Testing ${selftest}" + # "&& true" does not change the return code (it is not executed + # if the Python script fails), but it prevents exiting on a + # failure due to the "set -e". + ./${selftest} ${args} ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} && true + echo "${selftest}:$?" >>"${STATUS_FILE}" + foldable end ${selftest} +} + +test_progs() { + test_progs_helper "" "" +} + +test_progs_parallel() { + test_progs_helper "" "-j" +} + +test_progs_no_alu32() { + test_progs_helper "-no_alu32" "" +} + +test_progs_no_alu32_parallel() { + test_progs_helper "-no_alu32" "-j" +} + +test_maps() { + foldable start test_maps "Testing test_maps" + taskset 0xF ./test_maps && true + echo "test_maps:$?" >>"${STATUS_FILE}" + foldable end test_maps +} + +test_verifier() { + foldable start test_verifier "Testing test_verifier" + ./test_verifier && true + echo "test_verifier:$?" >>"${STATUS_FILE}" + foldable end test_verifier +} + +foldable end vm_init + +foldable start kernel_config "Kconfig" + +zcat /proc/config.gz + +foldable end kernel_config + +configs_path=${PROJECT_NAME}/selftests/bpf +local_configs_path=${PROJECT_NAME}/vmtest/configs +DENYLIST=$(read_lists \ + "$configs_path/DENYLIST" \ + "$configs_path/DENYLIST.${ARCH}" \ + "$local_configs_path/DENYLIST" \ + "$local_configs_path/DENYLIST.${ARCH}" \ +) +ALLOWLIST=$(read_lists \ + "$configs_path/ALLOWLIST" \ + "$configs_path/ALLOWLIST.${ARCH}" \ + "$local_configs_path/ALLOWLIST" \ + "$local_configs_path/ALLOWLIST.${ARCH}" \ +) + +echo "DENYLIST: ${DENYLIST}" +echo "ALLOWLIST: ${ALLOWLIST}" + +cd ${PROJECT_NAME}/selftests/bpf + +# populate TEST_NAMES +read_test_names "$@" +# if we don't have any test name provided to the script, we run all tests. +if [ ${#TEST_NAMES[@]} -eq 0 ]; then + test_progs + test_progs_no_alu32 + test_maps + test_verifier +else + # else we run the tests passed as command-line arguments and through boot + # parameter. + for test_name in "${TEST_NAMES[@]}"; do + "${test_name}" + done +fi diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ec0df059f5620..2d8f3f639e680 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1476,6 +1476,8 @@ struct bpf_link_ops { void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); int (*fill_link_info)(const struct bpf_link *link, struct bpf_link_info *info); + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *old_map); }; struct bpf_tramp_link { @@ -1518,6 +1520,8 @@ struct bpf_struct_ops { void *kdata, const void *udata); int (*reg)(void *kdata); void (*unreg)(void *kdata); + int (*update)(void *kdata, void *old_kdata); + int (*validate)(void *kdata); const struct btf_type *type; const struct btf_type *value_type; const char *name; @@ -1552,6 +1556,7 @@ static inline void bpf_module_put(const void *data, struct module *owner) else module_put(owner); } +int bpf_struct_ops_link_create(union bpf_attr *attr); #ifdef CONFIG_NET /* Define it here to avoid the use of forward declaration */ @@ -1592,6 +1597,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +static inline int bpf_struct_ops_link_create(union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + #endif #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) @@ -1945,6 +1955,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); diff --git a/include/net/tcp.h b/include/net/tcp.h index db9f828e9d1ee..2abb755e6a3a7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1117,6 +1117,9 @@ struct tcp_congestion_ops { int tcp_register_congestion_control(struct tcp_congestion_ops *type); void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); +int tcp_update_congestion_control(struct tcp_congestion_ops *type, + struct tcp_congestion_ops *old_type); +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca); void tcp_assign_congestion_control(struct sock *sk); void tcp_init_congestion_control(struct sock *sk); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 13129df937cde..e3d3b5160d26f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1033,6 +1033,7 @@ enum bpf_attach_type { BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, + BPF_STRUCT_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -1266,6 +1267,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -1507,7 +1511,10 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; union { __u32 target_fd; /* object to attach to */ __u32 target_ifindex; /* target ifindex */ @@ -1548,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { @@ -6379,6 +6397,9 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; }; } __attribute__((aligned(8))); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index ba7a94276e3b8..2b3577422bb55 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -11,11 +11,13 @@ #include #include #include +#include enum bpf_struct_ops_state { BPF_STRUCT_OPS_STATE_INIT, BPF_STRUCT_OPS_STATE_INUSE, BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, }; #define BPF_STRUCT_OPS_COMMON_VALUE \ @@ -58,6 +60,13 @@ struct bpf_struct_ops_map { struct bpf_struct_ops_value kvalue; }; +struct bpf_struct_ops_link { + struct bpf_link link; + struct bpf_map __rcu *map; +}; + +static DEFINE_MUTEX(update_mutex); + #define VALUE_PREFIX "bpf_struct_ops_" #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) @@ -249,6 +258,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; struct bpf_struct_ops_value *uvalue, *kvalue; enum bpf_struct_ops_state state; + s64 refcnt; if (unlikely(*(u32 *)key != 0)) return -ENOENT; @@ -267,7 +277,14 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, uvalue = value; memcpy(uvalue, st_map->uvalue, map->value_size); uvalue->state = state; - refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); + + /* This value offers the user space a general estimate of how + * many sockets are still utilizing this struct_ops for TCP + * congestion control. The number might not be exact, but it + * should sufficiently meet our present goals. + */ + refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt); + refcount_set(&uvalue->refcnt, max_t(s64, refcnt, 0)); return 0; } @@ -491,12 +508,29 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, *(unsigned long *)(udata + moff) = prog->aux->id; } - refcount_set(&kvalue->refcnt, 1); - bpf_map_inc(map); + if (st_map->map.map_flags & BPF_F_LINK) { + err = st_ops->validate(kdata); + if (err) + goto reset_unlock; + set_memory_rox((long)st_map->image, 1); + /* Let bpf_link handle registration & unregistration. + * + * Pair with smp_load_acquire() during lookup_elem(). + */ + smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY); + goto unlock; + } set_memory_rox((long)st_map->image, 1); err = st_ops->reg(kdata); if (likely(!err)) { + /* This refcnt increment on the map here after + * 'st_ops->reg()' is secure since the state of the + * map must be set to INIT at this moment, and thus + * bpf_struct_ops_map_delete_elem() can't unregister + * or transition it to TOBEFREE concurrently. + */ + bpf_map_inc(map); /* Pair with smp_load_acquire() during lookup_elem(). * It ensures the above udata updates (e.g. prog->aux->id) * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. @@ -512,7 +546,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, */ set_memory_nx((long)st_map->image, 1); set_memory_rw((long)st_map->image, 1); - bpf_map_put(map); reset_unlock: bpf_struct_ops_map_put_progs(st_map); @@ -530,14 +563,16 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) struct bpf_struct_ops_map *st_map; st_map = (struct bpf_struct_ops_map *)map; + if (st_map->map.map_flags & BPF_F_LINK) + return -EOPNOTSUPP; + prev_state = cmpxchg(&st_map->kvalue.state, BPF_STRUCT_OPS_STATE_INUSE, BPF_STRUCT_OPS_STATE_TOBEFREE); switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: st_map->st_ops->unreg(&st_map->kvalue.data); - if (refcount_dec_and_test(&st_map->kvalue.refcnt)) - bpf_map_put(map); + bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: return -EINPROGRESS; @@ -570,7 +605,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, kfree(value); } -static void bpf_struct_ops_map_free(struct bpf_map *map) +static void __bpf_struct_ops_map_free(struct bpf_map *map) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; @@ -582,10 +617,28 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) bpf_map_area_free(st_map); } +static void bpf_struct_ops_map_free(struct bpf_map *map) +{ + /* The struct_ops's function may switch to another struct_ops. + * + * For example, bpf_tcp_cc_x->init() may switch to + * another tcp_cc_y by calling + * setsockopt(TCP_CONGESTION, "tcp_cc_y"). + * During the switch, bpf_struct_ops_put(tcp_cc_x) is called + * and its refcount may reach 0 which then free its + * trampoline image while tcp_cc_x is still running. + * + * Thus, a rcu grace period is needed here. + */ + synchronize_rcu_mult(call_rcu, call_rcu_tasks); + + __bpf_struct_ops_map_free(map); +} + static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) { if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || - attr->map_flags || !attr->btf_vmlinux_value_type_id) + (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id) return -EINVAL; return 0; } @@ -609,6 +662,9 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (attr->value_size != vt->size) return ERR_PTR(-EINVAL); + if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update)) + return ERR_PTR(-EOPNOTSUPP); + t = st_ops->type; st_map_size = sizeof(*st_map) + @@ -630,7 +686,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) NUMA_NO_NODE); st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); if (!st_map->uvalue || !st_map->links || !st_map->image) { - bpf_struct_ops_map_free(map); + __bpf_struct_ops_map_free(map); return ERR_PTR(-ENOMEM); } @@ -676,41 +732,175 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = { bool bpf_struct_ops_get(const void *kdata) { struct bpf_struct_ops_value *kvalue; + struct bpf_struct_ops_map *st_map; + struct bpf_map *map; kvalue = container_of(kdata, struct bpf_struct_ops_value, data); + st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); - return refcount_inc_not_zero(&kvalue->refcnt); + map = __bpf_map_inc_not_zero(&st_map->map, false); + return !IS_ERR(map); } -static void bpf_struct_ops_put_rcu(struct rcu_head *head) +void bpf_struct_ops_put(const void *kdata) { + struct bpf_struct_ops_value *kvalue; struct bpf_struct_ops_map *st_map; - st_map = container_of(head, struct bpf_struct_ops_map, rcu); + kvalue = container_of(kdata, struct bpf_struct_ops_value, data); + st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); + bpf_map_put(&st_map->map); } -void bpf_struct_ops_put(const void *kdata) +static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map) { - struct bpf_struct_ops_value *kvalue; + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; - kvalue = container_of(kdata, struct bpf_struct_ops_value, data); - if (refcount_dec_and_test(&kvalue->refcnt)) { - struct bpf_struct_ops_map *st_map; + return map->map_type == BPF_MAP_TYPE_STRUCT_OPS && + map->map_flags & BPF_F_LINK && + /* Pair with smp_store_release() during map_update */ + smp_load_acquire(&st_map->kvalue.state) == BPF_STRUCT_OPS_STATE_READY; +} - st_map = container_of(kvalue, struct bpf_struct_ops_map, - kvalue); - /* The struct_ops's function may switch to another struct_ops. - * - * For example, bpf_tcp_cc_x->init() may switch to - * another tcp_cc_y by calling - * setsockopt(TCP_CONGESTION, "tcp_cc_y"). - * During the switch, bpf_struct_ops_put(tcp_cc_x) is called - * and its map->refcnt may reach 0 which then free its - * trampoline image while tcp_cc_x is still running. - * - * Thus, a rcu grace period is needed here. +static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_struct_ops_map *st_map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + st_map = (struct bpf_struct_ops_map *) + rcu_dereference_protected(st_link->map, true); + if (st_map) { + /* st_link->map can be NULL if + * bpf_struct_ops_link_create() fails to register. */ - call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu); + st_map->st_ops->unreg(&st_map->kvalue.data); + bpf_map_put(&st_map->map); } + kfree(st_link); } + +static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_map *map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + rcu_read_lock(); + map = rcu_dereference(st_link->map); + seq_printf(seq, "map_id:\t%d\n", map->id); + rcu_read_unlock(); +} + +static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_map *map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + rcu_read_lock(); + map = rcu_dereference(st_link->map); + info->struct_ops.map_id = map->id; + rcu_read_unlock(); + return 0; +} + +static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *expected_old_map) +{ + struct bpf_struct_ops_map *st_map, *old_st_map; + struct bpf_map *old_map; + struct bpf_struct_ops_link *st_link; + int err = 0; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + st_map = container_of(new_map, struct bpf_struct_ops_map, map); + + if (!bpf_struct_ops_valid_to_reg(new_map)) + return -EINVAL; + + mutex_lock(&update_mutex); + + old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); + if (expected_old_map && old_map != expected_old_map) { + err = -EPERM; + goto err_out; + } + + old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); + /* The new and old struct_ops must be the same type. */ + if (st_map->st_ops != old_st_map->st_ops) { + err = -EINVAL; + goto err_out; + } + + err = st_map->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + if (err) + goto err_out; + + bpf_map_inc(new_map); + rcu_assign_pointer(st_link->map, new_map); + bpf_map_put(old_map); + +err_out: + mutex_unlock(&update_mutex); + + return err; +} + +static const struct bpf_link_ops bpf_struct_ops_map_lops = { + .dealloc = bpf_struct_ops_map_link_dealloc, + .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, + .fill_link_info = bpf_struct_ops_map_link_fill_link_info, + .update_map = bpf_struct_ops_map_link_update, +}; + +int bpf_struct_ops_link_create(union bpf_attr *attr) +{ + struct bpf_struct_ops_link *link = NULL; + struct bpf_link_primer link_primer; + struct bpf_struct_ops_map *st_map; + struct bpf_map *map; + int err; + + map = bpf_map_get(attr->link_create.map_fd); + if (!map) + return -EINVAL; + + st_map = (struct bpf_struct_ops_map *)map; + + if (!bpf_struct_ops_valid_to_reg(map)) { + err = -EINVAL; + goto err_out; + } + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto err_out; + } + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL); + + err = bpf_link_prime(&link->link, &link_primer); + if (err) + goto err_out; + + err = st_map->st_ops->reg(st_map->kvalue.data); + if (err) { + bpf_link_cleanup(&link_primer); + link = NULL; + goto err_out; + } + RCU_INIT_POINTER(link->map, map); + + return bpf_link_settle(&link_primer); + +err_out: + bpf_map_put(map); + kfree(link); + return err; +} + diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 099e9068bcdd8..b4d758fa5981d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1303,8 +1303,10 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) return map; } -/* map_idr_lock should have been held */ -static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) +/* map_idr_lock should have been held or the map should have been + * protected by rcu read lock. + */ +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) { int refold; @@ -2823,16 +2825,19 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) const struct bpf_prog *prog = link->prog; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "link_type:\t%s\n" - "link_id:\t%u\n" - "prog_tag:\t%s\n" - "prog_id:\t%u\n", + "link_id:\t%u\n", bpf_link_type_strs[link->type], - link->id, - prog_tag, - prog->aux->id); + link->id); + if (prog) { + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); + seq_printf(m, + "prog_tag:\t%s\n" + "prog_id:\t%u\n", + prog_tag, + prog->aux->id); + } if (link->ops->show_fdinfo) link->ops->show_fdinfo(link, m); } @@ -4312,7 +4317,8 @@ static int bpf_link_get_info_by_fd(struct file *file, info.type = link->type; info.id = link->id; - info.prog_id = link->prog->aux->id; + if (link->prog) + info.prog_id = link->prog->aux->id; if (link->ops->fill_link_info) { err = link->ops->fill_link_info(link, &info); @@ -4575,6 +4581,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) if (CHECK_ATTR(BPF_LINK_CREATE)) return -EINVAL; + if (attr->link_create.attach_type == BPF_STRUCT_OPS) + return bpf_struct_ops_link_create(attr); + prog = bpf_prog_get(attr->link_create.prog_fd); if (IS_ERR(prog)) return PTR_ERR(prog); @@ -4673,6 +4682,35 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) return ret; } +static int link_update_map(struct bpf_link *link, union bpf_attr *attr) +{ + struct bpf_map *new_map, *old_map = NULL; + int ret; + + new_map = bpf_map_get(attr->link_update.new_map_fd); + if (IS_ERR(new_map)) + return -EINVAL; + + if (attr->link_update.flags & BPF_F_REPLACE) { + old_map = bpf_map_get(attr->link_update.old_map_fd); + if (IS_ERR(old_map)) { + ret = -EINVAL; + goto out_put; + } + } else if (attr->link_update.old_map_fd) { + ret = -EINVAL; + goto out_put; + } + + ret = link->ops->update_map(link, new_map, old_map); + + if (old_map) + bpf_map_put(old_map); +out_put: + bpf_map_put(new_map); + return ret; +} + #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd static int link_update(union bpf_attr *attr) @@ -4693,6 +4731,11 @@ static int link_update(union bpf_attr *attr) if (IS_ERR(link)) return PTR_ERR(link); + if (link->ops->update_map) { + ret = link_update_map(link, attr); + goto out_put_link; + } + new_prog = bpf_prog_get(attr->link_update.new_prog_fd); if (IS_ERR(new_prog)) { ret = PTR_ERR(new_prog); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 13fc0c185cd92..e8b27826283ea 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -239,8 +239,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name)) <= 0) return -EINVAL; - if (tcp_ca_find(utcp_ca->name)) - return -EEXIST; return 1; } @@ -266,13 +264,25 @@ static void bpf_tcp_ca_unreg(void *kdata) tcp_unregister_congestion_control(kdata); } +static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +{ + return tcp_update_congestion_control(kdata, old_kdata); +} + +static int bpf_tcp_ca_validate(void *kdata) +{ + return tcp_validate_congestion_control(kdata); +} + struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, .unreg = bpf_tcp_ca_unreg, + .update = bpf_tcp_ca_update, .check_member = bpf_tcp_ca_check_member, .init_member = bpf_tcp_ca_init_member, .init = bpf_tcp_ca_init, + .validate = bpf_tcp_ca_validate, .name = "tcp_congestion_ops", }; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db8b4b488c314..e677d0bc12add 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key) return NULL; } -/* - * Attach new congestion control algorithm to the list - * of available options. - */ -int tcp_register_congestion_control(struct tcp_congestion_ops *ca) +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca) { - int ret = 0; - /* all algorithms must implement these */ if (!ca->ssthresh || !ca->undo_cwnd || !(ca->cong_avoid || ca->cong_control)) { @@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) return -EINVAL; } + return 0; +} + +/* Attach new congestion control algorithm to the list + * of available options. + */ +int tcp_register_congestion_control(struct tcp_congestion_ops *ca) +{ + int ret; + + ret = tcp_validate_congestion_control(ca); + if (ret) + return ret; + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); @@ -130,6 +138,49 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); +/* Replace a registered old ca with a new one. + * + * The new ca must have the same name as the old one, that has been + * registered. + */ +int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca) +{ + struct tcp_congestion_ops *existing; + int ret; + + ret = tcp_validate_congestion_control(ca); + if (ret) + return ret; + + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); + + spin_lock(&tcp_cong_list_lock); + existing = tcp_ca_find_key(old_ca->key); + if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) { + pr_notice("%s not registered or non-unique key\n", + ca->name); + ret = -EINVAL; + } else if (existing != old_ca) { + pr_notice("invalid old congestion control algorithm to replace\n"); + ret = -EINVAL; + } else { + /* Add the new one before removing the old one to keep + * one implementation available all the time. + */ + list_add_tail_rcu(&ca->list, &tcp_cong_list); + list_del_rcu(&existing->list); + pr_debug("%s updated\n", ca->name); + } + spin_unlock(&tcp_cong_list_lock); + + /* Wait for outstanding readers to complete before the + * module or struct_ops gets removed entirely. + */ + synchronize_rcu(); + + return ret; +} + u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 13129df937cde..d6c5a022ae28d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1033,6 +1033,7 @@ enum bpf_attach_type { BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, + BPF_STRUCT_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -1266,6 +1267,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -1507,7 +1511,10 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* eBPF struct_ops to attach */ + }; union { __u32 target_fd; /* object to attach to */ __u32 target_ifindex; /* target ifindex */ @@ -1548,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { @@ -6379,6 +6397,9 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; }; } __attribute__((aligned(8))); diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index e750b6f5fcc36..767035900354d 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -794,11 +794,17 @@ int bpf_link_update(int link_fd, int new_prog_fd, if (!OPTS_VALID(opts, bpf_link_update_opts)) return libbpf_err(-EINVAL); + if (OPTS_GET(opts, old_prog_fd, 0) && OPTS_GET(opts, old_map_fd, 0)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); attr.link_update.link_fd = link_fd; attr.link_update.new_prog_fd = new_prog_fd; attr.link_update.flags = OPTS_GET(opts, flags, 0); - attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + if (OPTS_GET(opts, old_prog_fd, 0)) + attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + else if (OPTS_GET(opts, old_map_fd, 0)) + attr.link_update.old_map_fd = OPTS_GET(opts, old_map_fd, 0); ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz); return libbpf_err_errno(ret); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index f0f7863732381..b073e73439efd 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -336,8 +336,9 @@ struct bpf_link_update_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; /* extra flags */ __u32 old_prog_fd; /* expected old program FD */ + __u32 old_map_fd; /* expected old map FD */ }; -#define bpf_link_update_opts__last_field old_prog_fd +#define bpf_link_update_opts__last_field old_map_fd LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5d32aa8ea38ab..f6a071db5c6e5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -116,6 +116,7 @@ static const char * const attach_type_name[] = { [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", [BPF_PERF_EVENT] = "perf_event", [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", + [BPF_STRUCT_OPS] = "struct_ops", }; static const char * const link_type_name[] = { @@ -467,6 +468,7 @@ struct bpf_struct_ops { #define KCONFIG_SEC ".kconfig" #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" +#define STRUCT_OPS_LINK_SEC ".struct_ops.link" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -596,6 +598,7 @@ struct elf_state { Elf64_Ehdr *ehdr; Elf_Data *symbols; Elf_Data *st_ops_data; + Elf_Data *st_ops_link_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; @@ -605,6 +608,7 @@ struct elf_state { int text_shndx; int symbols_shndx; int st_ops_shndx; + int st_ops_link_shndx; }; struct usdt_manager; @@ -1118,7 +1122,8 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) return 0; } -static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) +static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, + int shndx, Elf_Data *data, __u32 map_flags) { const struct btf_type *type, *datasec; const struct btf_var_secinfo *vsi; @@ -1129,15 +1134,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) struct bpf_map *map; __u32 i; - if (obj->efile.st_ops_shndx == -1) + if (shndx == -1) return 0; btf = obj->btf; - datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, + datasec_id = btf__find_by_name_kind(btf, sec_name, BTF_KIND_DATASEC); if (datasec_id < 0) { pr_warn("struct_ops init: DATASEC %s not found\n", - STRUCT_OPS_SEC); + sec_name); return -EINVAL; } @@ -1150,7 +1155,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) type_id = btf__resolve_type(obj->btf, vsi->type); if (type_id < 0) { pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", - vsi->type, STRUCT_OPS_SEC); + vsi->type, sec_name); return -EINVAL; } @@ -1169,7 +1174,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) if (IS_ERR(map)) return PTR_ERR(map); - map->sec_idx = obj->efile.st_ops_shndx; + map->sec_idx = shndx; map->sec_offset = vsi->offset; map->name = strdup(var_name); if (!map->name) @@ -1179,6 +1184,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) map->def.key_size = sizeof(int); map->def.value_size = type->size; map->def.max_entries = 1; + map->def.map_flags = map_flags; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -1191,14 +1197,14 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) return -ENOMEM; - if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { + if (vsi->offset + type->size > data->d_size) { pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", - var_name, STRUCT_OPS_SEC); + var_name, sec_name); return -EINVAL; } memcpy(st_ops->data, - obj->efile.st_ops_data->d_buf + vsi->offset, + data->d_buf + vsi->offset, type->size); st_ops->tname = tname; st_ops->type = type; @@ -1211,6 +1217,19 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) return 0; } +static int bpf_object_init_struct_ops(struct bpf_object *obj) +{ + int err; + + err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx, + obj->efile.st_ops_data, 0); + err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC, + obj->efile.st_ops_link_shndx, + obj->efile.st_ops_link_data, + BPF_F_LINK); + return err; +} + static struct bpf_object *bpf_object__new(const char *path, const void *obj_buf, size_t obj_buf_sz, @@ -1247,6 +1266,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; obj->efile.st_ops_shndx = -1; + obj->efile.st_ops_link_shndx = -1; obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); @@ -1264,6 +1284,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; obj->efile.symbols = NULL; obj->efile.st_ops_data = NULL; + obj->efile.st_ops_link_data = NULL; zfree(&obj->efile.secs); obj->efile.sec_cnt = 0; @@ -2618,7 +2639,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); - err = err ?: bpf_object__init_struct_ops_maps(obj); + err = err ?: bpf_object_init_struct_ops(obj); return err; } @@ -2752,12 +2773,13 @@ static bool libbpf_needs_btf(const struct bpf_object *obj) { return obj->efile.btf_maps_shndx >= 0 || obj->efile.st_ops_shndx >= 0 || + obj->efile.st_ops_link_shndx >= 0 || obj->nr_extern > 0; } static bool kernel_needs_btf(const struct bpf_object *obj) { - return obj->efile.st_ops_shndx >= 0; + return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -3450,6 +3472,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { obj->efile.st_ops_data = data; obj->efile.st_ops_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { + obj->efile.st_ops_link_data = data; + obj->efile.st_ops_link_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -3464,6 +3489,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) /* Only do relo for section with exec instructions */ if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && + strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", idx, name, targ_sec_idx, @@ -6610,7 +6636,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj) return -LIBBPF_ERRNO__INTERNAL; } - if (idx == obj->efile.st_ops_shndx) + if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx) err = bpf_object__collect_st_ops_relos(obj, shdr, data); else if (idx == obj->efile.btf_maps_shndx) err = bpf_object__collect_map_relos(obj, shdr, data); @@ -7686,6 +7712,37 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, return 0; } +static void bpf_map_prepare_vdata(const struct bpf_map *map) +{ + struct bpf_struct_ops *st_ops; + __u32 i; + + st_ops = map->st_ops; + for (i = 0; i < btf_vlen(st_ops->type); i++) { + struct bpf_program *prog = st_ops->progs[i]; + void *kern_data; + int prog_fd; + + if (!prog) + continue; + + prog_fd = bpf_program__fd(prog); + kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; + *(unsigned long *)kern_data = prog_fd; + } +} + +static int bpf_object_prepare_struct_ops(struct bpf_object *obj) +{ + int i; + + for (i = 0; i < obj->nr_maps; i++) + if (bpf_map__is_struct_ops(&obj->maps[i])) + bpf_map_prepare_vdata(&obj->maps[i]); + + return 0; +} + static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) { int err, i; @@ -7711,6 +7768,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); err = err ? : bpf_object__load_progs(obj, extra_log_level); err = err ? : bpf_object_init_prog_arrays(obj); + err = err ? : bpf_object_prepare_struct_ops(obj); if (obj->gen_loader) { /* reset FDs */ @@ -8820,6 +8878,7 @@ const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) } static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, + int sec_idx, size_t offset) { struct bpf_map *map; @@ -8829,7 +8888,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, map = &obj->maps[i]; if (!bpf_map__is_struct_ops(map)) continue; - if (map->sec_offset <= offset && + if (map->sec_idx == sec_idx && + map->sec_offset <= offset && offset - map->sec_offset < map->def.value_size) return map; } @@ -8871,7 +8931,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } name = elf_sym_str(obj, sym->st_name) ?: ""; - map = find_struct_ops_map_by_offset(obj, rel->r_offset); + map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); if (!map) { pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", (size_t)rel->r_offset); @@ -8938,8 +8998,9 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } /* struct_ops BPF prog can be re-used between multiple - * .struct_ops as long as it's the same struct_ops struct - * definition and the same function pointer field + * .struct_ops & .struct_ops.link as long as it's the + * same struct_ops struct definition and the same + * function pointer field */ if (prog->attach_btf_id != st_ops->type_id || prog->expected_attach_type != member_idx) { @@ -11579,22 +11640,30 @@ struct bpf_link *bpf_program__attach(const struct bpf_program *prog) return link; } +struct bpf_link_struct_ops { + struct bpf_link link; + int map_fd; +}; + static int bpf_link__detach_struct_ops(struct bpf_link *link) { + struct bpf_link_struct_ops *st_link; __u32 zero = 0; - if (bpf_map_delete_elem(link->fd, &zero)) - return -errno; + st_link = container_of(link, struct bpf_link_struct_ops, link); - return 0; + if (st_link->map_fd < 0) + /* w/o a real link */ + return bpf_map_delete_elem(link->fd, &zero); + + return close(link->fd); } struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) { - struct bpf_struct_ops *st_ops; - struct bpf_link *link; - __u32 i, zero = 0; - int err; + struct bpf_link_struct_ops *link; + __u32 zero = 0; + int err, fd; if (!bpf_map__is_struct_ops(map) || map->fd == -1) return libbpf_err_ptr(-EINVAL); @@ -11603,31 +11672,72 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) if (!link) return libbpf_err_ptr(-EINVAL); - st_ops = map->st_ops; - for (i = 0; i < btf_vlen(st_ops->type); i++) { - struct bpf_program *prog = st_ops->progs[i]; - void *kern_data; - int prog_fd; + /* kern_vdata should be prepared during the loading phase. */ + err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); + /* It can be EBUSY if the map has been used to create or + * update a link before. We don't allow updating the value of + * a struct_ops once it is set. That ensures that the value + * never changed. So, it is safe to skip EBUSY. + */ + if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { + free(link); + return libbpf_err_ptr(err); + } - if (!prog) - continue; + link->link.detach = bpf_link__detach_struct_ops; - prog_fd = bpf_program__fd(prog); - kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; - *(unsigned long *)kern_data = prog_fd; + if (!(map->def.map_flags & BPF_F_LINK)) { + /* w/o a real link */ + link->link.fd = map->fd; + link->map_fd = -1; + return &link->link; } - err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); - if (err) { - err = -errno; + fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); + if (fd < 0) { free(link); - return libbpf_err_ptr(err); + return libbpf_err_ptr(fd); } - link->detach = bpf_link__detach_struct_ops; - link->fd = map->fd; + link->link.fd = fd; + link->map_fd = map->fd; - return link; + return &link->link; +} + +/* + * Swap the back struct_ops of a link with a new struct_ops map. + */ +int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) +{ + struct bpf_link_struct_ops *st_ops_link; + __u32 zero = 0; + int err; + + if (!bpf_map__is_struct_ops(map) || map->fd < 0) + return -EINVAL; + + st_ops_link = container_of(link, struct bpf_link_struct_ops, link); + /* Ensure the type of a link is correct */ + if (st_ops_link->map_fd < 0) + return -EINVAL; + + err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); + /* It can be EBUSY if the map has been used to create or + * update a link before. We don't allow updating the value of + * a struct_ops once it is set. That ensures that the value + * never changed. So, it is safe to skip EBUSY. + */ + if (err && err != -EBUSY) + return err; + + err = bpf_link_update(link->fd, map->fd, NULL); + if (err < 0) + return err; + + st_ops_link->map_fd = map->fd; + + return 0; } typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index db4992a036f8b..1615e55e2e790 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -719,6 +719,7 @@ bpf_program__attach_freplace(const struct bpf_program *prog, struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); +LIBBPF_API int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 50dde1f6521ef..a5aa3a383d694 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -386,6 +386,7 @@ LIBBPF_1.1.0 { LIBBPF_1.2.0 { global: bpf_btf_get_info_by_fd; + bpf_link__update_map; bpf_link_get_info_by_fd; bpf_map_get_info_by_fd; bpf_prog_get_info_by_fd; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e980188d41246..2c80f9291ceda 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -8,6 +8,7 @@ #include "bpf_dctcp.skel.h" #include "bpf_cubic.skel.h" #include "bpf_tcp_nogpl.skel.h" +#include "tcp_ca_update.skel.h" #include "bpf_dctcp_release.skel.h" #include "tcp_ca_write_sk_pacing.skel.h" #include "tcp_ca_incompl_cong_ops.skel.h" @@ -381,6 +382,155 @@ static void test_unsupp_cong_op(void) libbpf_set_print(old_print_fn); } +static void test_update_ca(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link; + int saved_ca1_cnt; + int err; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + do_test("tcp_ca_update", NULL); + saved_ca1_cnt = skel->bss->ca1_cnt; + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); + + err = bpf_link__update_map(link, skel->maps.ca_update_2); + ASSERT_OK(err, "update_map"); + + do_test("tcp_ca_update", NULL); + ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); + ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); + + bpf_link__destroy(link); + tcp_ca_update__destroy(skel); +} + +static void test_update_wrong(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link; + int saved_ca1_cnt; + int err; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + do_test("tcp_ca_update", NULL); + saved_ca1_cnt = skel->bss->ca1_cnt; + ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); + + err = bpf_link__update_map(link, skel->maps.ca_wrong); + ASSERT_ERR(err, "update_map"); + + do_test("tcp_ca_update", NULL); + ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); + + bpf_link__destroy(link); + tcp_ca_update__destroy(skel); +} + +static void test_mixed_links(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link, *link_nl; + int err; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link); + ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"); + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + do_test("tcp_ca_update", NULL); + ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt"); + + err = bpf_link__update_map(link, skel->maps.ca_no_link); + ASSERT_ERR(err, "update_map"); + + bpf_link__destroy(link); + bpf_link__destroy(link_nl); + tcp_ca_update__destroy(skel); +} + +static void test_multi_links(void) +{ + struct tcp_ca_update *skel; + struct bpf_link *link; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops_1st"); + bpf_link__destroy(link); + + /* A map should be able to be used to create links multiple + * times. + */ + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops_2nd"); + bpf_link__destroy(link); + + tcp_ca_update__destroy(skel); +} + +static void test_link_replace(void) +{ + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts); + struct tcp_ca_update *skel; + struct bpf_link *link; + int err; + + skel = tcp_ca_update__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); + ASSERT_OK_PTR(link, "attach_struct_ops_1st"); + bpf_link__destroy(link); + + link = bpf_map__attach_struct_ops(skel->maps.ca_update_2); + ASSERT_OK_PTR(link, "attach_struct_ops_1st"); + + /* BPF_F_REPLACE with a wrong old map Fd. It should fail! + * + * With BPF_F_REPLACE, the link should be updated only if the + * old map fd given here matches the map backing the link. + */ + opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_1); + opts.flags = BPF_F_REPLACE; + err = bpf_link_update(bpf_link__fd(link), + bpf_map__fd(skel->maps.ca_update_1), + &opts); + ASSERT_ERR(err, "bpf_link_update_fail"); + + /* BPF_F_REPLACE with a correct old map Fd. It should success! */ + opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_2); + err = bpf_link_update(bpf_link__fd(link), + bpf_map__fd(skel->maps.ca_update_1), + &opts); + ASSERT_OK(err, "bpf_link_update_success"); + + bpf_link__destroy(link); + + tcp_ca_update__destroy(skel); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -399,4 +549,14 @@ void test_bpf_tcp_ca(void) test_incompl_cong_ops(); if (test__start_subtest("unsupp_cong_op")) test_unsupp_cong_op(); + if (test__start_subtest("update_ca")) + test_update_ca(); + if (test__start_subtest("update_wrong")) + test_update_wrong(); + if (test__start_subtest("mixed_links")) + test_mixed_links(); + if (test__start_subtest("multi_links")) + test_multi_links(); + if (test__start_subtest("link_replace")) + test_link_replace(); } diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c new file mode 100644 index 0000000000000..b93a0ed330578 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include +#include + +char _license[] SEC("license") = "GPL"; + +int ca1_cnt = 0; +int ca2_cnt = 0; + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/ca_update_1_init") +void BPF_PROG(ca_update_1_init, struct sock *sk) +{ + ca1_cnt++; +} + +SEC("struct_ops/ca_update_2_init") +void BPF_PROG(ca_update_2_init, struct sock *sk) +{ + ca2_cnt++; +} + +SEC("struct_ops/ca_update_cong_control") +void BPF_PROG(ca_update_cong_control, struct sock *sk, + const struct rate_sample *rs) +{ +} + +SEC("struct_ops/ca_update_ssthresh") +__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/ca_update_undo_cwnd") +__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops.link") +struct tcp_congestion_ops ca_update_1 = { + .init = (void *)ca_update_1_init, + .cong_control = (void *)ca_update_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_update", +}; + +SEC(".struct_ops.link") +struct tcp_congestion_ops ca_update_2 = { + .init = (void *)ca_update_2_init, + .cong_control = (void *)ca_update_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_update", +}; + +SEC(".struct_ops.link") +struct tcp_congestion_ops ca_wrong = { + .cong_control = (void *)ca_update_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_wrong", +}; + +SEC(".struct_ops") +struct tcp_congestion_ops ca_no_link = { + .cong_control = (void *)ca_update_cong_control, + .ssthresh = (void *)ca_update_ssthresh, + .undo_cwnd = (void *)ca_update_undo_cwnd, + .name = "tcp_ca_no_link", +};