From 5d3848fba665bda2e7140f6fbe4f170cd4c5aec4 Mon Sep 17 00:00:00 2001
From: Kernel Patches Daemon <kernel-patches-bot@fb.com>
Date: Wed, 22 Mar 2023 17:35:56 -0700
Subject: [PATCH 1/9] adding ci files

---
 .github/workflows/lint.yml                    |  22 ++
 .github/workflows/test.yml                    | 356 ++++++++++++++++++
 README                                        |  18 -
 ci/diffs/.keep                                |   0
 ...x-broken-BuildID-for-arm64-and-riscv.patch |  30 ++
 ...btf_put-to-register_btf_id_dtor_kfun.patch |  41 ++
 ...issing-nospec.h-to-avoid-build-error.patch |  45 +++
 ...-pointer-dereference-when-pin-PROG-M.patch |  45 +++
 ...001-selftests-bpf-Add-config.aarch64.patch | 207 ++++++++++
 ...just-expected-error-message-for-test.patch |  43 +++
 ...x-compilation-errors-Assign-a-value-.patch |  50 +++
 ...ests-bpf-Fix-decap_sanity_ns-cleanup.patch |  36 ++
 ...sts-bpf-Initial-DENYLIST-for-aarch64.patch | 118 ++++++
 ...ftests-bpf-Panic-on-hard-soft-lockup.patch |  57 +++
 ...iptables-iptables-legacy-in-the-bpf_.patch |  77 ++++
 ...lect-CONFIG_FUNCTION_ERROR_INJECTION.patch |  45 +++
 ...ally-export-__vdso_sgx_enter_enclave.patch |  44 +++
 ...Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch |  39 ++
 ...pi-pull-in-stddef.h-to-fix-BPF-selft.patch | 104 +++++
 ci/vmtest/configs/DENYLIST                    |   7 +
 ci/vmtest/configs/DENYLIST.aarch64            |   4 +
 ci/vmtest/configs/DENYLIST.s390x              |   5 +
 ci/vmtest/configs/DENYLIST.x86_64             |   1 +
 ci/vmtest/helpers.sh                          |  38 ++
 ci/vmtest/run_selftests.sh                    | 136 +++++++
 25 files changed, 1550 insertions(+), 18 deletions(-)
 create mode 100644 .github/workflows/lint.yml
 create mode 100644 .github/workflows/test.yml
 create mode 100644 ci/diffs/.keep
 create mode 100644 ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch
 create mode 100644 ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch
 create mode 100644 ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch
 create mode 100644 ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch
 create mode 100644 ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch
 create mode 100644 ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch
 create mode 100644 ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch
 create mode 100644 ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch
 create mode 100644 ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch
 create mode 100644 ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch
 create mode 100644 ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch
 create mode 100644 ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch
 create mode 100644 ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch
 create mode 100644 ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch
 create mode 100644 ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch
 create mode 100644 ci/vmtest/configs/DENYLIST
 create mode 100644 ci/vmtest/configs/DENYLIST.aarch64
 create mode 100644 ci/vmtest/configs/DENYLIST.s390x
 create mode 100644 ci/vmtest/configs/DENYLIST.x86_64
 create mode 100755 ci/vmtest/helpers.sh
 create mode 100755 ci/vmtest/run_selftests.sh

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000000000..8805283a42271
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,22 @@
+name: "lint"
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+
+jobs:
+  shellcheck:
+    # This workflow gets injected into other Linux repositories, but we don't
+    # want it to run there.
+    if: ${{ github.repository == 'kernel-patches/vmtest' }}
+    name: ShellCheck
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Run ShellCheck
+        uses: ludeeus/action-shellcheck@master
+        env:
+          SHELLCHECK_OPTS: --severity=warning --exclude=SC1091
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000000000..2b3d642ee7c96
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,356 @@
+name: bpf-ci
+
+on:
+  pull_request:
+  push:
+    branches:
+      - bpf_base
+      - bpf-next_base
+
+concurrency:
+  group: ci-test-${{ github.ref_name }}
+  cancel-in-progress: true
+
+jobs:
+  set-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      build-matrix: ${{ steps.set-matrix-impl.outputs.build_matrix }}
+      test-matrix: ${{ steps.set-matrix-impl.outputs.test_matrix }}
+    steps:
+      - id: set-matrix-impl
+        shell: python3 -I {0}
+        run: |
+          from json import dumps
+          from enum import Enum
+          import os
+
+          class Arch(Enum):
+            """
+            CPU architecture supported by CI.
+            """
+            aarch64 = "aarch64"
+            s390x = "s390x"
+            x86_64 = "x86_64"
+
+          def set_output(name, value):
+            """Write an output variable to the GitHub output file."""
+            with open(os.getenv("GITHUB_OUTPUT"), "a") as f:
+              f.write(f"{name}={value}\n")
+
+          def generate_test_config(test):
+            """Create the configuration for the provided test."""
+            experimental = test.endswith("_parallel")
+            config = {
+              "test": test,
+              "continue_on_error": experimental,
+              # While in experimental mode, parallel jobs may get stuck
+              # anywhere, including in user space where the kernel won't detect
+              # a problem and panic. We add a second layer of (smaller) timeouts
+              # here such that if we get stuck in a parallel run, we hit this
+              # timeout and fail without affecting the overall job success (as
+              # would be the case if we hit the job-wide timeout). For
+              # non-experimental jobs, 360 is the default which will be
+              # superseded by the overall workflow timeout (but we need to
+              # specify something).
+              "timeout_minutes": 30 if experimental else 360,
+            }
+            return config
+
+          matrix = [
+            {"kernel": "LATEST", "runs_on": [], "arch": Arch.x86_64.value, "toolchain": "gcc", "llvm-version": "16"},
+            {"kernel": "LATEST", "runs_on": [], "arch": Arch.x86_64.value, "toolchain": "llvm", "llvm-version": "15"},
+            {"kernel": "LATEST", "runs_on": [], "arch": Arch.x86_64.value, "toolchain": "llvm", "llvm-version": "16"},
+            {"kernel": "LATEST", "runs_on": [], "arch": Arch.aarch64.value, "toolchain": "gcc", "llvm-version": "16"},
+            {"kernel": "LATEST", "runs_on": [], "arch": Arch.aarch64.value, "toolchain": "llvm", "llvm-version": "15"},
+            {"kernel": "LATEST", "runs_on": [], "arch": Arch.aarch64.value, "toolchain": "llvm", "llvm-version": "16"},
+            {"kernel": "LATEST", "runs_on": [], "arch": Arch.s390x.value, "toolchain": "gcc", "llvm-version": "16", "parallel_tests": False},
+          ]
+          self_hosted_repos = [
+            "kernel-patches/bpf",
+            "kernel-patches/vmtest",
+          ]
+
+          for idx in range(len(matrix) - 1, -1, -1):
+            if matrix[idx]['toolchain'] == 'gcc':
+              matrix[idx]['toolchain_full'] = 'gcc'
+            else:
+              matrix[idx]['toolchain_full'] = 'llvm-' + matrix[idx]['llvm-version']
+          # Only a few repository within "kernel-patches" use self-hosted runners.
+          if "${{ github.repository_owner }}" != "kernel-patches" or "${{ github.repository }}" not in self_hosted_repos:
+            # Outside of those repositories, we only run on x86_64 GH hosted runners (ubuntu-latest)
+            for idx in range(len(matrix) - 1, -1, -1):
+              if matrix[idx]["arch"] != Arch.x86_64.value:
+                del matrix[idx]
+              else:
+                matrix[idx]["runs_on"] = ["ubuntu-latest"]
+          else:
+            # Otherwise, run on (self-hosted, arch) runners
+            for idx in range(len(matrix) - 1, -1, -1):
+              matrix[idx]["runs_on"].extend(["self-hosted", matrix[idx]["arch"]])
+
+          build_matrix = {"include": matrix}
+          set_output("build_matrix", dumps(build_matrix))
+
+          def get_tests(config):
+            tests = [
+              "test_progs",
+              "test_progs_parallel",
+              "test_progs_no_alu32",
+              "test_progs_no_alu32_parallel",
+              "test_maps",
+              "test_verifier",
+            ]
+            if config.get("parallel_tests", True):
+              return tests
+            return [test for test in tests if not test.endswith("parallel") ]
+
+          test_matrix = {"include": [{**config, **generate_test_config(test)}
+                                      for config in matrix
+                                      for test in get_tests(config)
+                                    ]}
+          set_output("test_matrix", dumps(test_matrix))
+  build:
+    name: build for ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
+    needs: set-matrix
+    runs-on: ${{ matrix.runs_on }}
+    timeout-minutes: 100
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJSON(needs.set-matrix.outputs.build-matrix) }}
+    env:
+      KERNEL: ${{ matrix.kernel }}
+      REPO_ROOT: ${{ github.workspace }}
+      REPO_PATH: ""
+      KBUILD_OUTPUT: kbuild-output/
+    steps:
+      - uses: actions/checkout@v3
+        # We fetch an actual bit of history here to facilitate incremental
+        # builds (which may check out some earlier upstream change).
+        with:
+          fetch-depth: 50
+      - if: ${{ github.repository == 'kernel-patches/vmtest' }}
+        name: Download bpf-next tree
+        uses: libbpf/ci/get-linux-source@master
+        with:
+          dest: '.kernel'
+      - if: ${{ github.repository == 'kernel-patches/vmtest' }}
+        name: Move linux source in place
+        shell: bash
+        run: |
+          rm -rf .kernel/.git
+          cp -rf .kernel/. .
+          rm -rf .kernel
+      - name: Get commit meta-data
+        id: get-commit-metadata
+        shell: bash
+        run: |
+          if [ ${{ github.event_name }} = 'push' ]; then
+            branch="${{ github.ref_name }}"
+            echo "branch=${branch}" >> "${GITHUB_OUTPUT}"
+          else
+            branch="${{ github.base_ref }}"
+            echo "branch=${branch}" >> "${GITHUB_OUTPUT}"
+          fi
+
+          upstream=$(echo "${branch}" | sed 's@_base$@@')
+          commit="$(
+            git rev-parse "origin/${upstream}" &> /dev/null \
+              || (
+                git fetch --quiet --prune --no-tags --depth=1 --no-recurse-submodules origin +refs/heads/${upstream}:refs/remotes/origin/${upstream} \
+                  && git rev-parse "origin/${upstream}"
+              )
+          )"
+
+          echo "timestamp=$(TZ=utc git show --format='%cd' --no-patch --date=iso-strict-local ${commit})" >> "${GITHUB_OUTPUT}"
+          echo "commit=${commit}" >> "${GITHUB_OUTPUT}"
+          echo "Most recent upstream commit is ${commit}"
+      - name: Pull recent KBUILD_OUTPUT contents
+        uses: actions/cache@v3
+        with:
+          path: ${{ env.KBUILD_OUTPUT }}
+          key: kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}-${{ steps.get-commit-metadata.outputs.timestamp }}-${{ steps.get-commit-metadata.outputs.commit }}
+          restore-keys: |
+            kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}-${{ steps.get-commit-metadata.outputs.timestamp }}-
+            kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-${{ steps.get-commit-metadata.outputs.branch }}-
+            kbuild-output-${{ matrix.arch }}-${{ matrix.toolchain_full }}-
+      - name: Prepare incremental build
+        shell: bash
+        run: |
+          set -e -u
+
+          # $1 - the SHA-1 to fetch and check out
+          fetch_and_checkout() {
+            local build_base_sha="${1}"
+
+            # If cached artifacts became stale for one reason or another, we
+            # may not have the build base SHA available. Fetch it and retry.
+            git fetch origin "${build_base_sha}" && git checkout --quiet "${build_base_sha}"
+          }
+
+          # $1 - value of KBUILD_OUTPUT
+          clear_cache_artifacts() {
+            local kbuild_output="${1}"
+            echo "Unable to find earlier upstream ref. Discarding KBUILD_OUTPUT contents..."
+            rm --recursive --force "${kbuild_output}"
+            mkdir "${kbuild_output}"
+            false
+          }
+
+          # $1 - value of KBUILD_OUTPUT
+          # $2 - current time in ISO 8601 format
+          restore_source_code_times() {
+            local kbuild_output="${1}"
+            local current_time="${2}"
+            local src_time="$(date --iso-8601=ns --date="${current_time} - 2 minutes")"
+            local obj_time="$(date --iso-8601=ns --date="${current_time} - 1 minute")"
+
+            git ls-files | xargs --max-args=10000 touch -m --no-create --date="${src_time}"
+            find "${kbuild_output}" -type f | xargs --max-args=10000 touch -m --no-create --date="${obj_time}"
+            git checkout --quiet -
+            echo "Adjusted src and obj time stamps relative to system time"
+          }
+
+          mkdir --parents "${KBUILD_OUTPUT}"
+          current_time="$(date --iso-8601=ns)"
+
+          if [ -f "${KBUILD_OUTPUT}/.build-base-sha" ]; then
+            build_base_sha="$(cat "${KBUILD_OUTPUT}/.build-base-sha")"
+            echo "Setting up base build state for ${build_base_sha}"
+
+            (
+              git checkout --quiet "${build_base_sha}" \
+                || fetch_and_checkout "${build_base_sha}" \
+                || clear_cache_artifacts "${KBUILD_OUTPUT}"
+            ) && restore_source_code_times "${KBUILD_OUTPUT}" "${current_time}"
+          else
+            echo "No previous build data found"
+          fi
+
+          echo -n "${{ steps.get-commit-metadata.outputs.commit }}" > "${KBUILD_OUTPUT}/.build-base-sha"
+      - uses: libbpf/ci/patch-kernel@master
+        with:
+          patches-root: '${{ github.workspace }}/ci/diffs'
+          repo-root: '${{ github.workspace }}'
+      - name: Setup build environment
+        uses: libbpf/ci/setup-build-env@llvm-version
+        with:
+          llvm-version: ${{ matrix.llvm-version }}
+      - name: Build kernel image
+        uses: libbpf/ci/build-linux@llvm-version
+        with:
+          arch: ${{ matrix.arch }}
+          toolchain: ${{ matrix.toolchain }}
+          kbuild-output: ${{ env.KBUILD_OUTPUT }}
+          max-make-jobs: 32
+          llvm-version: ${{ matrix.llvm-version }}
+      - if: ${{ github.event_name != 'push' }}
+        name: Build selftests
+        uses: libbpf/ci/build-selftests@llvm-version
+        with:
+          toolchain: ${{ matrix.toolchain }}
+          kbuild-output: ${{ env.KBUILD_OUTPUT }}
+          max-make-jobs: 32
+          llvm-version: ${{ matrix.llvm-version }}
+      - if: ${{ github.event_name != 'push' }}
+        name: Build samples
+        uses: libbpf/ci/build-samples@llvm-version
+        with:
+          toolchain: ${{ matrix.toolchain }}
+          kbuild-output: ${{ env.KBUILD_OUTPUT }}
+          max-make-jobs: 32
+          llvm-version: ${{ matrix.llvm-version }}
+      - if: ${{ github.event_name != 'push' }}
+        name: Tar artifacts
+        run: |
+          # Remove intermediate object files that we have no use for. Ideally
+          # we'd just exclude them from tar below, but it does not provide
+          # options to express the precise constraints.
+          find selftests/ -name "*.o" -a ! -name "*.bpf.o" -print0 | \
+            xargs --null --max-args=10000 rm
+
+          # Strip debug information, which is excessively large (consuming
+          # bandwidth) while not actually being used (the kernel does not use
+          # DWARF to symbolize stacktraces).
+          strip --strip-debug "${KBUILD_OUTPUT}"/vmlinux
+
+          file_list=""
+          if [ "${{ github.repository }}" == "kernel-patches/vmtest" ]; then
+            # Package up a bunch of additional infrastructure to support running
+            # 'make kernelrelease' and bpf tool checks later on.
+            file_list="$(find . -iname Makefile | xargs) \
+              scripts/ \
+              tools/testing/selftests/bpf/ \
+              tools/include/ \
+              tools/bpf/bpftool/";
+          fi
+          # zstd is installed by default in the runner images.
+          tar -cf - \
+            "${KBUILD_OUTPUT}"/.config \
+            "${KBUILD_OUTPUT}"/$(KBUILD_OUTPUT="${KBUILD_OUTPUT}" make -s image_name) \
+            "${KBUILD_OUTPUT}"/include/config/auto.conf \
+            "${KBUILD_OUTPUT}"/include/generated/autoconf.h \
+            "${KBUILD_OUTPUT}"/vmlinux \
+            ${file_list} \
+            --exclude '*.cmd' \
+            --exclude '*.d' \
+            --exclude '*.h' \
+            --exclude '*.output' \
+            selftests/bpf/ | zstd -T0 -19 -o vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst
+      - if: ${{ github.event_name != 'push' }}
+        name: Remove KBUILD_OUTPUT contents
+        shell: bash
+        run: |
+          # Remove $KBUILD_OUTPUT to prevent cache creation for pull requests.
+          # Only on pushed changes are build artifacts actually cached, because
+          # of github.com/actions/cache's cache isolation logic.
+          rm -rf "${KBUILD_OUTPUT}"
+      - if: ${{ github.event_name != 'push' }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}
+          if-no-files-found: error
+          path: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst
+  test:
+    if: ${{ github.event_name != 'push' }}
+    name: ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
+    needs: [set-matrix, build]
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJSON(needs.set-matrix.outputs.test-matrix) }}
+    runs-on: ${{ matrix.runs_on }}
+    timeout-minutes: 100
+    env:
+      KERNEL: ${{ matrix.kernel }}
+      REPO_ROOT: ${{ github.workspace }}
+      REPO_PATH: ""
+      KBUILD_OUTPUT: kbuild-output/
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/download-artifact@v3
+        with:
+          name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}
+          path: .
+      - name: Untar artifacts
+        # zstd is installed by default in the runner images.
+        run: zstd -d -T0  vmlinux-${{ matrix.arch }}-${{ matrix.toolchain_full }}.tar.zst --stdout | tar -xf -
+      - name: Prepare rootfs
+        uses: libbpf/ci/prepare-rootfs@master
+        with:
+          project-name: 'libbpf'
+          arch: ${{ matrix.arch }}
+          kernel: ${{ matrix.kernel }}
+          kernel-root: '.'
+          kbuild-output: ${{ env.KBUILD_OUTPUT }}
+          image-output: '/tmp/root.img'
+          test: ${{ matrix.test }}
+      - name: Run selftests
+        uses: libbpf/ci/run-qemu@master
+        continue-on-error: ${{ matrix.continue_on_error }}
+        timeout-minutes: ${{ matrix.timeout_minutes }}
+        with:
+          arch: ${{ matrix.arch}}
+          img: '/tmp/root.img'
+          vmlinuz: '${{ github.workspace }}/vmlinuz'
+          kernel-root: '.'
+          max-cpu: 8
diff --git a/README b/README
index 669ac7c322927..e69de29bb2d1d 100644
--- a/README
+++ b/README
@@ -1,18 +0,0 @@
-Linux kernel
-============
-
-There are several guides for kernel developers and users. These guides can
-be rendered in a number of formats, like HTML and PDF. Please read
-Documentation/admin-guide/README.rst first.
-
-In order to build the documentation, use ``make htmldocs`` or
-``make pdfdocs``.  The formatted documentation can also be read online at:
-
-    https://www.kernel.org/doc/html/latest/
-
-There are various text files in the Documentation/ subdirectory,
-several of them using the Restructured Text markup notation.
-
-Please read the Documentation/process/changes.rst file, as it contains the
-requirements for building and running the kernel, and information about
-the problems which may result by upgrading your kernel.
diff --git a/ci/diffs/.keep b/ci/diffs/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch b/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch
new file mode 100644
index 0000000000000..3d8ea87a1dbda
--- /dev/null
+++ b/ci/diffs/0001-Revert-arch-fix-broken-BuildID-for-arm64-and-riscv.patch
@@ -0,0 +1,30 @@
+From cb50dac513235c6996b9d26f959886ba1d7be607 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Fri, 6 Jan 2023 13:59:26 +0200
+Subject: [PATCH] Revert "arch: fix broken BuildID for arm64 and riscv"
+
+This reverts commit 99cb0d917ffa1ab628bb67364ca9b162c07699b1.
+---
+ include/asm-generic/vmlinux.lds.h | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 659bf3b31c91..a94219e9916f 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -891,12 +891,7 @@
+ #define PRINTK_INDEX
+ #endif
+ 
+-/*
+- * Discard .note.GNU-stack, which is emitted as PROGBITS by the compiler.
+- * Otherwise, the type of .notes section would become PROGBITS instead of NOTES.
+- */
+ #define NOTES								\
+-	/DISCARD/ : { *(.note.GNU-stack) }				\
+ 	.notes : AT(ADDR(.notes) - LOAD_OFFSET) {			\
+ 		BOUNDED_SECTION_BY(.note.*, _notes)			\
+ 	} NOTES_HEADERS							\
+-- 
+2.39.0
+
diff --git a/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch b/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch
new file mode 100644
index 0000000000000..4fcc0146effc3
--- /dev/null
+++ b/ci/diffs/0001-bpf-Add-missing-btf_put-to-register_btf_id_dtor_kfun.patch
@@ -0,0 +1,41 @@
+From 74bc3a5acc82f020d2e126f56c535d02d1e74e37 Mon Sep 17 00:00:00 2001
+From: Jiri Olsa <jolsa@kernel.org>
+Date: Fri, 20 Jan 2023 13:21:48 +0100
+Subject: [PATCH] bpf: Add missing btf_put to register_btf_id_dtor_kfuncs
+
+We take the BTF reference before we register dtors and we need
+to put it back when it's done.
+
+We probably won't se a problem with kernel BTF, but module BTF
+would stay loaded (because of the extra ref) even when its module
+is removed.
+
+Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Fixes: 5ce937d613a4 ("bpf: Populate pairs of btf_id and destructor kfunc in btf")
+Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/r/20230120122148.1522359-1-jolsa@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+---
+ kernel/bpf/btf.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
+index f7dd8af06413..b7017cae6fd1 100644
+--- a/kernel/bpf/btf.c
++++ b/kernel/bpf/btf.c
+@@ -7782,9 +7782,9 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
+ 
+ 	sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL);
+ 
+-	return 0;
+ end:
+-	btf_free_dtor_kfunc_tab(btf);
++	if (ret)
++		btf_free_dtor_kfunc_tab(btf);
+ 	btf_put(btf);
+ 	return ret;
+ }
+-- 
+2.39.1
+
diff --git a/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch b/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch
new file mode 100644
index 0000000000000..669bde57d04f0
--- /dev/null
+++ b/ci/diffs/0001-bpf-Include-missing-nospec.h-to-avoid-build-error.patch
@@ -0,0 +1,45 @@
+From 345d24a91c79f408e355c8b7e873ccde0f097eea Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Wed, 22 Feb 2023 10:50:48 +0800
+Subject: [PATCH] bpf: Include missing nospec.h to avoid build error.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Commit 74e19ef0ff80 ("uaccess: Add speculation barrier to copy_from_user()")
+defines a default barrier_nospec() and removes the
+such a build error:
+
+  CC      kernel/bpf/core.o
+kernel/bpf/core.c: In function ‘___bpf_prog_run’:
+kernel/bpf/core.c:1913:3: error: implicit declaration of function ‘barrier_nospec’; did you mean ‘barrier_data’? [-Werror=implicit-function-declaration]
+   barrier_nospec();
+   ^~~~~~~~~~~~~~
+   barrier_data
+cc1: some warnings being treated as errors
+
+So include nospec.h to avoid the build error.
+
+Fixes: 74e19ef0ff80 ("uaccess: Add speculation barrier to copy_from_user()")
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Link: https://lore.kernel.org/r/20230222025048.3677315-1-chenhuacai@loongson.cn
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+---
+ kernel/bpf/core.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
+index 933869983e2a..b297e9f60ca1 100644
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -34,6 +34,7 @@
+ #include <linux/log2.h>
+ #include <linux/bpf_verifier.h>
+ #include <linux/nodemask.h>
++#include <linux/nospec.h>
+ #include <linux/bpf_mem_alloc.h>
+ #include <linux/memcontrol.h>
+ 
+-- 
+2.30.2
+
diff --git a/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch b/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch
new file mode 100644
index 0000000000000..bfb7de10b4793
--- /dev/null
+++ b/ci/diffs/0001-bpftool-Fix-NULL-pointer-dereference-when-pin-PROG-M.patch
@@ -0,0 +1,45 @@
+From 0dd340f3549863e1289a872057743c9a177d1e3f Mon Sep 17 00:00:00 2001
+From: Pu Lehui <pulehui@huawei.com>
+Date: Wed, 2 Nov 2022 16:40:34 +0800
+Subject: [PATCH 1/2] bpftool: Fix NULL pointer dereference when pin {PROG,
+ MAP, LINK} without FILE
+
+When using bpftool to pin {PROG, MAP, LINK} without FILE,
+segmentation fault will occur. The reson is that the lack
+of FILE will cause strlen to trigger NULL pointer dereference.
+The corresponding stacktrace is shown below:
+
+do_pin
+  do_pin_any
+    do_pin_fd
+      mount_bpffs_for_pin
+        strlen(name) <- NULL pointer dereference
+
+Fix it by adding validation to the common process.
+
+Fixes: 75a1e792c335 ("tools: bpftool: Allow all prog/map handles for pinning objects")
+Signed-off-by: Pu Lehui <pulehui@huawei.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Quentin Monnet <quentin@isovalent.com>
+Link: https://lore.kernel.org/bpf/20221102084034.3342995-1-pulehui@huaweicloud.com
+---
+ tools/bpf/bpftool/common.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
+index e4d33bc8bbbf..653c130a0aaa 100644
+--- a/tools/bpf/bpftool/common.c
++++ b/tools/bpf/bpftool/common.c
+@@ -302,6 +302,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***))
+ 	int err;
+ 	int fd;
+ 
++	if (!REQ_ARGS(3))
++		return -EINVAL;
++
+ 	fd = get_fd(&argc, &argv);
+ 	if (fd < 0)
+ 		return fd;
+-- 
+2.30.2
+
diff --git a/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch b/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch
new file mode 100644
index 0000000000000..1797384c1b5c8
--- /dev/null
+++ b/ci/diffs/0001-selftests-bpf-Add-config.aarch64.patch
@@ -0,0 +1,207 @@
+From ec99451f0a488e50aaf0ce467db8771411edc407 Mon Sep 17 00:00:00 2001
+From: Manu Bretelle <chantr4@gmail.com>
+Date: Fri, 21 Oct 2022 14:06:59 -0700
+Subject: [PATCH] selftests/bpf: Add config.aarch64
+
+config.aarch64, similarly to config.{s390x,x86_64} is a config enabling
+building a kernel on aarch64 to be used in bpf's
+selftests/kernel-patches CI.
+
+Signed-off-by: Manu Bretelle <chantr4@gmail.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20221021210701.728135-3-chantr4@gmail.com
+---
+ tools/testing/selftests/bpf/config.aarch64 | 181 +++++++++++++++++++++
+ 1 file changed, 181 insertions(+)
+ create mode 100644 tools/testing/selftests/bpf/config.aarch64
+
+diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
+new file mode 100644
+index 000000000000..1f0437644186
+--- /dev/null
++++ b/tools/testing/selftests/bpf/config.aarch64
+@@ -0,0 +1,181 @@
++CONFIG_9P_FS=y
++CONFIG_ARCH_VEXPRESS=y
++CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
++CONFIG_ARM_SMMU_V3=y
++CONFIG_ATA=y
++CONFIG_AUDIT=y
++CONFIG_BINFMT_MISC=y
++CONFIG_BLK_CGROUP=y
++CONFIG_BLK_DEV_BSGLIB=y
++CONFIG_BLK_DEV_INITRD=y
++CONFIG_BLK_DEV_IO_TRACE=y
++CONFIG_BLK_DEV_RAM=y
++CONFIG_BLK_DEV_SD=y
++CONFIG_BONDING=y
++CONFIG_BPFILTER=y
++CONFIG_BPF_JIT_ALWAYS_ON=y
++CONFIG_BPF_JIT_DEFAULT_ON=y
++CONFIG_BPF_PRELOAD_UMD=y
++CONFIG_BPF_PRELOAD=y
++CONFIG_BRIDGE=m
++CONFIG_CGROUP_CPUACCT=y
++CONFIG_CGROUP_DEVICE=y
++CONFIG_CGROUP_FREEZER=y
++CONFIG_CGROUP_HUGETLB=y
++CONFIG_CGROUP_NET_CLASSID=y
++CONFIG_CGROUP_PERF=y
++CONFIG_CGROUP_PIDS=y
++CONFIG_CGROUP_SCHED=y
++CONFIG_CGROUPS=y
++CONFIG_CHECKPOINT_RESTORE=y
++CONFIG_CHR_DEV_SG=y
++CONFIG_COMPAT=y
++CONFIG_CPUSETS=y
++CONFIG_CRASH_DUMP=y
++CONFIG_CRYPTO_USER_API_RNG=y
++CONFIG_CRYPTO_USER_API_SKCIPHER=y
++CONFIG_DEBUG_ATOMIC_SLEEP=y
++CONFIG_DEBUG_INFO_BTF=y
++CONFIG_DEBUG_INFO_DWARF4=y
++CONFIG_DEBUG_LIST=y
++CONFIG_DEBUG_LOCKDEP=y
++CONFIG_DEBUG_NOTIFIERS=y
++CONFIG_DEBUG_PAGEALLOC=y
++CONFIG_DEBUG_SECTION_MISMATCH=y
++CONFIG_DEBUG_SG=y
++CONFIG_DETECT_HUNG_TASK=y
++CONFIG_DEVTMPFS_MOUNT=y
++CONFIG_DEVTMPFS=y
++CONFIG_DRM_VIRTIO_GPU=y
++CONFIG_DRM=y
++CONFIG_DUMMY=y
++CONFIG_EXPERT=y
++CONFIG_EXT4_FS_POSIX_ACL=y
++CONFIG_EXT4_FS_SECURITY=y
++CONFIG_EXT4_FS=y
++CONFIG_FANOTIFY=y
++CONFIG_FB=y
++CONFIG_FUNCTION_PROFILER=y
++CONFIG_FUSE_FS=y
++CONFIG_FW_CFG_SYSFS_CMDLINE=y
++CONFIG_FW_CFG_SYSFS=y
++CONFIG_GDB_SCRIPTS=y
++CONFIG_HAVE_EBPF_JIT=y
++CONFIG_HAVE_KPROBES_ON_FTRACE=y
++CONFIG_HAVE_KPROBES=y
++CONFIG_HAVE_KRETPROBES=y
++CONFIG_HEADERS_INSTALL=y
++CONFIG_HIGH_RES_TIMERS=y
++CONFIG_HUGETLBFS=y
++CONFIG_HW_RANDOM_VIRTIO=y
++CONFIG_HW_RANDOM=y
++CONFIG_HZ_100=y
++CONFIG_IDLE_PAGE_TRACKING=y
++CONFIG_IKHEADERS=y
++CONFIG_INET6_ESP=y
++CONFIG_INET_ESP=y
++CONFIG_INET=y
++CONFIG_INPUT_EVDEV=y
++CONFIG_IP_ADVANCED_ROUTER=y
++CONFIG_IP_MULTICAST=y
++CONFIG_IP_MULTIPLE_TABLES=y
++CONFIG_IP_NF_IPTABLES=y
++CONFIG_IPV6_SEG6_LWTUNNEL=y
++CONFIG_IPVLAN=y
++CONFIG_JUMP_LABEL=y
++CONFIG_KERNEL_UNCOMPRESSED=y
++CONFIG_KPROBES_ON_FTRACE=y
++CONFIG_KPROBES=y
++CONFIG_KRETPROBES=y
++CONFIG_KSM=y
++CONFIG_LATENCYTOP=y
++CONFIG_LIVEPATCH=y
++CONFIG_LOCK_STAT=y
++CONFIG_MACVLAN=y
++CONFIG_MACVTAP=y
++CONFIG_MAGIC_SYSRQ=y
++CONFIG_MAILBOX=y
++CONFIG_MEMCG=y
++CONFIG_MEMORY_HOTPLUG=y
++CONFIG_MEMORY_HOTREMOVE=y
++CONFIG_NAMESPACES=y
++CONFIG_NET_9P_VIRTIO=y
++CONFIG_NET_9P=y
++CONFIG_NET_ACT_BPF=y
++CONFIG_NET_ACT_GACT=y
++CONFIG_NETDEVICES=y
++CONFIG_NETFILTER_XT_MATCH_BPF=y
++CONFIG_NETFILTER_XT_TARGET_MARK=y
++CONFIG_NET_KEY=y
++CONFIG_NET_SCH_FQ=y
++CONFIG_NET_VRF=y
++CONFIG_NET=y
++CONFIG_NF_TABLES=y
++CONFIG_NLMON=y
++CONFIG_NO_HZ_IDLE=y
++CONFIG_NR_CPUS=256
++CONFIG_NUMA=y
++CONFIG_OVERLAY_FS=y
++CONFIG_PACKET_DIAG=y
++CONFIG_PACKET=y
++CONFIG_PANIC_ON_OOPS=y
++CONFIG_PARTITION_ADVANCED=y
++CONFIG_PCI_HOST_GENERIC=y
++CONFIG_PCI=y
++CONFIG_PL320_MBOX=y
++CONFIG_POSIX_MQUEUE=y
++CONFIG_PROC_KCORE=y
++CONFIG_PROFILING=y
++CONFIG_PROVE_LOCKING=y
++CONFIG_PTDUMP_DEBUGFS=y
++CONFIG_RC_DEVICES=y
++CONFIG_RC_LOOPBACK=y
++CONFIG_RTC_CLASS=y
++CONFIG_RTC_DRV_PL031=y
++CONFIG_RT_GROUP_SCHED=y
++CONFIG_SAMPLE_SECCOMP=y
++CONFIG_SAMPLES=y
++CONFIG_SCHED_AUTOGROUP=y
++CONFIG_SCHED_TRACER=y
++CONFIG_SCSI_CONSTANTS=y
++CONFIG_SCSI_LOGGING=y
++CONFIG_SCSI_SCAN_ASYNC=y
++CONFIG_SCSI_VIRTIO=y
++CONFIG_SCSI=y
++CONFIG_SECURITY_NETWORK=y
++CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
++CONFIG_SERIAL_AMBA_PL011=y
++CONFIG_STACK_TRACER=y
++CONFIG_STATIC_KEYS_SELFTEST=y
++CONFIG_SYSVIPC=y
++CONFIG_TASK_DELAY_ACCT=y
++CONFIG_TASK_IO_ACCOUNTING=y
++CONFIG_TASKSTATS=y
++CONFIG_TASK_XACCT=y
++CONFIG_TCG_TIS=y
++CONFIG_TCG_TPM=y
++CONFIG_TCP_CONG_ADVANCED=y
++CONFIG_TCP_CONG_DCTCP=y
++CONFIG_TLS=y
++CONFIG_TMPFS_POSIX_ACL=y
++CONFIG_TMPFS=y
++CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
++CONFIG_TRANSPARENT_HUGEPAGE=y
++CONFIG_TUN=y
++CONFIG_UNIX=y
++CONFIG_UPROBES=y
++CONFIG_USELIB=y
++CONFIG_USER_NS=y
++CONFIG_VETH=y
++CONFIG_VIRTIO_BALLOON=y
++CONFIG_VIRTIO_BLK=y
++CONFIG_VIRTIO_CONSOLE=y
++CONFIG_VIRTIO_FS=y
++CONFIG_VIRTIO_INPUT=y
++CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
++CONFIG_VIRTIO_MMIO=y
++CONFIG_VIRTIO_NET=y
++CONFIG_VIRTIO_PCI=y
++CONFIG_VLAN_8021Q=y
++CONFIG_VSOCKETS=y
++CONFIG_XFRM_USER=y
+-- 
+2.38.1
+
diff --git a/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch b/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch
new file mode 100644
index 0000000000000..11d5233552b07
--- /dev/null
+++ b/ci/diffs/0001-selftests-bpf-Adjust-expected-error-message-for-test.patch
@@ -0,0 +1,43 @@
+From fa95252a62bc120fb1f939c46991280ba1375196 Mon Sep 17 00:00:00 2001
+From: Song Liu <song@kernel.org>
+Date: Thu, 2 Mar 2023 13:49:44 -0800
+Subject: [PATCH] selftests/bpf: Adjust expected error message for
+ test_global_func10.c
+
+For test programs that are expected to be failed verifier, we use
+__failure __msg(...) to specify the expected error message. However, the
+error message may change slightly among different versions of llvm. For
+example, in [1], the program compiled by llvm-17 gets
+
+  "invalid indirect access to stack ..."
+
+but the same program compile by llvm-16 gets
+
+  "invalid indirect read from stack ..."
+
+To avoid such issues, only compares "invalid indirect" part of the error
+message for test_global_func10.c.
+
+[1] https://github.com/kernel-patches/bpf/actions/runs/4288572350/jobs/7533052993
+
+Signed-off-by: Song Liu <song@kernel.org>
+---
+ tools/testing/selftests/bpf/progs/test_global_func10.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
+index 98327bdbbfd2..7a591d946027 100644
+--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
++++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
+@@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big)
+ }
+ 
+ SEC("cgroup_skb/ingress")
+-__failure __msg("invalid indirect read from stack")
++__failure __msg("invalid indirect")
+ int global_func10(struct __sk_buff *skb)
+ {
+ 	const struct Small small = {.x = skb->len };
+-- 
+2.30.2
+
diff --git a/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch b/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch
new file mode 100644
index 0000000000000..14a62c2d5d6c8
--- /dev/null
+++ b/ci/diffs/0001-selftests-bpf-Fix-compilation-errors-Assign-a-value-.patch
@@ -0,0 +1,50 @@
+From 11e456cae91e9044cb12c2b037b52c9b268925f7 Mon Sep 17 00:00:00 2001
+From: Rong Tao <rongtao@cestc.cn>
+Date: Fri, 24 Feb 2023 23:10:02 +0800
+Subject: [PATCH bpf] selftests/bpf: Fix compilation errors: Assign a value to
+ a constant
+
+Commit bc292ab00f6c("mm: introduce vma->vm_flags wrapper functions")
+turns the vm_flags into a const variable.
+
+Added bpf_find_vma test in commit f108662b27c9("selftests/bpf: Add tests
+for bpf_find_vma") to assign values to variables that declare const in
+find_vma_fail1.c programs, which is an error to the compiler and does not
+test BPF verifiers. It is better to replace 'const vm_flags_t vm_flags'
+with 'unsigned long vm_start' for testing.
+
+    $ make -C tools/testing/selftests/bpf/ -j8
+    ...
+    progs/find_vma_fail1.c:16:16: error: cannot assign to non-static data
+    member 'vm_flags' with const-qualified type 'const vm_flags_t' (aka
+    'const unsigned long')
+            vma->vm_flags |= 0x55;
+            ~~~~~~~~~~~~~ ^
+    ../tools/testing/selftests/bpf/tools/include/vmlinux.h:1898:20:
+    note: non-static data member 'vm_flags' declared const here
+                    const vm_flags_t vm_flags;
+                    ~~~~~~~~~~~`~~~~~~^~~~~~~~
+
+Signed-off-by: Rong Tao <rongtao@cestc.cn>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/tencent_CB281722B3C1BD504C16CDE586CACC2BE706@qq.com
+---
+ tools/testing/selftests/bpf/progs/find_vma_fail1.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
+index b3b326b8e2d1..47d5dedff554 100644
+--- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c
++++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
+@@ -13,7 +13,7 @@ static long write_vma(struct task_struct *task, struct vm_area_struct *vma,
+ 		      struct callback_ctx *data)
+ {
+ 	/* writing to vma, which is illegal */
+-	vma->vm_flags |= 0x55;
++	vma->vm_start = 0xffffffffff600000;
+ 
+ 	return 0;
+ }
+-- 
+2.39.0
+
diff --git a/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch b/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch
new file mode 100644
index 0000000000000..41fd6e38e8678
--- /dev/null
+++ b/ci/diffs/0001-selftests-bpf-Fix-decap_sanity_ns-cleanup.patch
@@ -0,0 +1,36 @@
+From:   Ilya Leoshkevich <iii@linux.ibm.com>
+Subject: [PATCH bpf-next 07/24] selftests/bpf: Fix decap_sanity_ns cleanup
+Date:   Wed, 25 Jan 2023 22:38:00 +0100
+
+decap_sanity prints the following on the 1st run:
+
+    decap_sanity: sh: 1: Syntax error: Bad fd number
+
+and the following on the 2nd run:
+
+    Cannot create namespace file "/run/netns/decap_sanity_ns": File exists
+
+The problem is that the cleanup command has a typo and does nothing.
+Fix the typo.
+
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+---
+ tools/testing/selftests/bpf/prog_tests/decap_sanity.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
+index 0b2f73b88c53..2853883b7cbb 100644
+--- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
++++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
+@@ -80,6 +80,6 @@ void test_decap_sanity(void)
+ 		bpf_tc_hook_destroy(&qdisc_hook);
+ 		close_netns(nstoken);
+ 	}
+-	system("ip netns del " NS_TEST " >& /dev/null");
++	system("ip netns del " NS_TEST " &> /dev/null");
+ 	decap_sanity__destroy(skel);
+ }
+-- 
+2.39.1
+
+
diff --git a/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch b/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch
new file mode 100644
index 0000000000000..7d3a35de2a636
--- /dev/null
+++ b/ci/diffs/0001-selftests-bpf-Initial-DENYLIST-for-aarch64.patch
@@ -0,0 +1,118 @@
+From 94d52a19180726ee8ddc70bea75d6605e1dd6029 Mon Sep 17 00:00:00 2001
+From: Manu Bretelle <chantr4@gmail.com>
+Date: Fri, 21 Oct 2022 14:07:01 -0700
+Subject: [PATCH] selftests/bpf: Initial DENYLIST for aarch64
+
+Those tests are currently failing on aarch64, ignore them until they are
+individually addressed.
+
+Using this deny list, vmtest.sh ran successfully using
+
+LLVM_STRIP=llvm-strip-16 CLANG=clang-16 \
+    tools/testing/selftests/bpf/vmtest.sh  -- \
+        ./test_progs -d \
+            \"$(cat tools/testing/selftests/bpf/DENYLIST{,.aarch64} \
+                | cut -d'#' -f1 \
+                | sed -e 's/^[[:space:]]*//' \
+                      -e 's/[[:space:]]*$//' \
+                | tr -s '\n' ','\
+            )\"
+
+Signed-off-by: Manu Bretelle <chantr4@gmail.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20221021210701.728135-5-chantr4@gmail.com
+---
+ tools/testing/selftests/bpf/DENYLIST.aarch64 | 81 ++++++++++++++++++++
+ 1 file changed, 81 insertions(+)
+ create mode 100644 tools/testing/selftests/bpf/DENYLIST.aarch64
+
+diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
+new file mode 100644
+index 000000000000..09416d5d2e33
+--- /dev/null
++++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
+@@ -0,0 +1,81 @@
++bloom_filter_map                                 # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22
++bpf_cookie/lsm
++bpf_cookie/multi_kprobe_attach_api
++bpf_cookie/multi_kprobe_link_api
++bpf_cookie/trampoline
++bpf_loop/check_callback_fn_stop                  # link unexpected error: -524
++bpf_loop/check_invalid_flags
++bpf_loop/check_nested_calls
++bpf_loop/check_non_constant_callback
++bpf_loop/check_nr_loops
++bpf_loop/check_null_callback_ctx
++bpf_loop/check_stack
++bpf_mod_race                                     # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524)
++bpf_tcp_ca/dctcp_fallback
++btf_dump/btf_dump: var_data                      # find type id unexpected find type id: actual -2 < expected 0
++cgroup_hierarchical_stats                        # attach unexpected error: -524 (errno 524)
++d_path/basic                                     # setup attach failed: -524
++deny_namespace                                   # attach unexpected error: -524 (errno 524)
++fentry_fexit                                     # fentry_attach unexpected error: -1 (errno 524)
++fentry_test                                      # fentry_attach unexpected error: -1 (errno 524)
++fexit_sleep                                      # fexit_attach fexit attach failed: -1
++fexit_stress                                     # fexit attach unexpected fexit attach: actual -524 < expected 0
++fexit_test                                       # fexit_attach unexpected error: -1 (errno 524)
++get_func_args_test                               # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline)
++get_func_ip_test                                 # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline)
++htab_update/reenter_update
++kfree_skb                                        # attach fentry unexpected error: -524 (trampoline)
++kfunc_call/subprog                               # extern (var ksym) 'bpf_prog_active': not found in kernel BTF
++kfunc_call/subprog_lskel                         # skel unexpected error: -2
++kfunc_dynptr_param/dynptr_data_null              # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22
++kprobe_multi_test/attach_api_addrs               # bpf_program__attach_kprobe_multi_opts unexpected error: -95
++kprobe_multi_test/attach_api_pattern             # bpf_program__attach_kprobe_multi_opts unexpected error: -95
++kprobe_multi_test/attach_api_syms                # bpf_program__attach_kprobe_multi_opts unexpected error: -95
++kprobe_multi_test/bench_attach                   # bpf_program__attach_kprobe_multi_opts unexpected error: -95
++kprobe_multi_test/link_api_addrs                 # link_fd unexpected link_fd: actual -95 < expected 0
++kprobe_multi_test/link_api_syms                  # link_fd unexpected link_fd: actual -95 < expected 0
++kprobe_multi_test/skel_api                       # kprobe_multi__attach unexpected error: -524 (errno 524)
++ksyms_module/libbpf                              # 'bpf_testmod_ksym_percpu': not found in kernel BTF
++ksyms_module/lskel                               # test_ksyms_module_lskel__open_and_load unexpected error: -2
++libbpf_get_fd_by_id_opts                         # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524)
++lookup_key                                       # test_lookup_key__attach unexpected error: -524 (errno 524)
++lru_bug                                          # lru_bug__attach unexpected error: -524 (errno 524)
++modify_return                                    # modify_return__attach failed unexpected error: -524 (errno 524)
++module_attach                                    # skel_attach skeleton attach failed: -524
++mptcp/base                                       # run_test mptcp unexpected error: -524 (errno 524)
++netcnt                                           # packets unexpected packets: actual 10001 != expected 10000
++recursion                                        # skel_attach unexpected error: -524 (errno 524)
++ringbuf                                          # skel_attach skeleton attachment failed: -1
++setget_sockopt                                   # attach_cgroup unexpected error: -524
++sk_storage_tracing                               # test_sk_storage_tracing__attach unexpected error: -524 (errno 524)
++skc_to_unix_sock                                 # could not attach BPF object unexpected error: -524 (errno 524)
++socket_cookie                                    # prog_attach unexpected error: -524
++stacktrace_build_id                              # compare_stack_ips stackmap vs. stack_amap err -1 errno 2
++task_local_storage/exit_creds                    # skel_attach unexpected error: -524 (errno 524)
++task_local_storage/recursion                     # skel_attach unexpected error: -524 (errno 524)
++test_bprm_opts                                   # attach attach failed: -524
++test_ima                                         # attach attach failed: -524
++test_local_storage                               # attach lsm attach failed: -524
++test_lsm                                         # test_lsm_first_attach unexpected error: -524 (errno 524)
++test_overhead                                    # attach_fentry unexpected error: -524
++timer                                            # timer unexpected error: -524 (errno 524)
++timer_crash                                      # timer_crash__attach unexpected error: -524 (errno 524)
++timer_mim                                        # timer_mim unexpected error: -524 (errno 524)
++trace_printk                                     # trace_printk__attach unexpected error: -1 (errno 524)
++trace_vprintk                                    # trace_vprintk__attach unexpected error: -1 (errno 524)
++tracing_struct                                   # tracing_struct__attach unexpected error: -524 (errno 524)
++trampoline_count                                 # attach_prog unexpected error: -524
++unpriv_bpf_disabled                              # skel_attach unexpected error: -524 (errno 524)
++user_ringbuf/test_user_ringbuf_post_misaligned   # misaligned_skel unexpected error: -524 (errno 524)
++user_ringbuf/test_user_ringbuf_post_producer_wrong_offset
++user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz
++user_ringbuf/test_user_ringbuf_basic             # ringbuf_basic_skel unexpected error: -524 (errno 524)
++user_ringbuf/test_user_ringbuf_sample_full_ring_buffer
++user_ringbuf/test_user_ringbuf_post_alignment_autoadjust
++user_ringbuf/test_user_ringbuf_overfill
++user_ringbuf/test_user_ringbuf_discards_properly_ignored
++user_ringbuf/test_user_ringbuf_loop
++user_ringbuf/test_user_ringbuf_msg_protocol
++user_ringbuf/test_user_ringbuf_blocking_reserve
++verify_pkcs7_sig                                 # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524)
++vmlinux                                          # skel_attach skeleton attach failed: -524
+-- 
+2.30.2
+
diff --git a/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch b/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch
new file mode 100644
index 0000000000000..08f2352bc1992
--- /dev/null
+++ b/ci/diffs/0001-selftests-bpf-Panic-on-hard-soft-lockup.patch
@@ -0,0 +1,57 @@
+From 5ed88f81511ce695692f0510ab3ca17eee68eff6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Daniel=20M=C3=BCller?= <deso@posteo.net>
+Date: Tue, 25 Oct 2022 23:15:46 +0000
+Subject: [PATCH] selftests/bpf: Panic on hard/soft lockup
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When running tests, we should probably accept any help we can get when
+it comes to detecting issues early or making them more debuggable. We
+have seen a few cases where a test_progs_noalu32 run, for example,
+encountered a soft lockup and stopped making progress. It was only
+interrupted once we hit the overall test timeout [0]. We can not and do
+not want to necessarily rely on test timeouts, because those rely on
+infrastructure provided by the environment we run in (and which is not
+present in tools/testing/selftests/bpf/vmtest.sh, for example).
+To that end, let's enable panics on soft as well as hard lockups to fail
+fast should we encounter one. That's happening in the configuration
+indented to be used for selftests (including when using vmtest.sh or
+when running in BPF CI).
+
+[0] https://github.com/kernel-patches/bpf/runs/7844499997
+
+Signed-off-by: Daniel Müller <deso@posteo.net>
+Link: https://lore.kernel.org/r/20221025231546.811766-1-deso@posteo.net
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+---
+ tools/testing/selftests/bpf/config        | 2 ++
+ tools/testing/selftests/bpf/config.x86_64 | 1 -
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
+index 921356..7a99a6 100644
+--- a/tools/testing/selftests/bpf/config
++++ b/tools/testing/selftests/bpf/config
+@@ -1,4 +1,6 @@
+ CONFIG_BLK_DEV_LOOP=y
++CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
++CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+ CONFIG_BPF=y
+ CONFIG_BPF_EVENTS=y
+ CONFIG_BPF_JIT=y
+diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
+index 21ce5e..dd97d6 100644
+--- a/tools/testing/selftests/bpf/config.x86_64
++++ b/tools/testing/selftests/bpf/config.x86_64
+@@ -18,7 +18,6 @@ CONFIG_BLK_DEV_RAM=y
+ CONFIG_BLK_DEV_RAM_SIZE=16384
+ CONFIG_BLK_DEV_THROTTLING=y
+ CONFIG_BONDING=y
+-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+ CONFIG_BOOTTIME_TRACING=y
+ CONFIG_BPF_JIT_ALWAYS_ON=y
+ CONFIG_BPF_KPROBE_OVERRIDE=y
+-- 
+2.30.2
+
diff --git a/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch b/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch
new file mode 100644
index 0000000000000..e1e5f01a59930
--- /dev/null
+++ b/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.patch
@@ -0,0 +1,77 @@
+From de9c8d848d90cf2e53aced50b350827442ca5a4f Mon Sep 17 00:00:00 2001
+From: Martin KaFai Lau <martin.lau@kernel.org>
+Date: Wed, 12 Oct 2022 15:12:35 -0700
+Subject: [PATCH] selftests/bpf: S/iptables/iptables-legacy/ in the bpf_nf and
+ xdp_synproxy test
+
+The recent vm image in CI has reported error in selftests that use
+the iptables command.  Manu Bretelle has pointed out the difference
+in the recent vm image that the iptables is sym-linked to the iptables-nft.
+With this knowledge,  I can also reproduce the CI error by manually running
+with the 'iptables-nft'.
+
+This patch is to replace the iptables command with iptables-legacy
+to unblock the CI tests.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: David Vernet <void@manifault.com>
+Link: https://lore.kernel.org/bpf/20221012221235.3529719-1-martin.lau@linux.dev
+---
+ tools/testing/selftests/bpf/prog_tests/bpf_nf.c       | 6 +++---
+ tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c | 6 +++---
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+index 8a838ea8bdf3..c8ba4009e4ab 100644
+--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
++++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+@@ -49,14 +49,14 @@ static int connect_to_server(int srv_fd)
+ 
+ static void test_bpf_nf_ct(int mode)
+ {
+-	const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0";
++	const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0";
+ 	int srv_fd = -1, client_fd = -1, srv_client_fd = -1;
+ 	struct sockaddr_in peer_addr = {};
+ 	struct test_bpf_nf *skel;
+ 	int prog_fd, err;
+ 	socklen_t len;
+ 	u16 srv_port;
+-	char cmd[64];
++	char cmd[128];
+ 	LIBBPF_OPTS(bpf_test_run_opts, topts,
+ 		.data_in = &pkt_v4,
+ 		.data_size_in = sizeof(pkt_v4),
+@@ -69,7 +69,7 @@ static void test_bpf_nf_ct(int mode)
+ 
+ 	/* Enable connection tracking */
+ 	snprintf(cmd, sizeof(cmd), iptables, "-A");
+-	if (!ASSERT_OK(system(cmd), "iptables"))
++	if (!ASSERT_OK(system(cmd), cmd))
+ 		goto end;
+ 
+ 	srv_port = (mode == TEST_XDP) ? 5005 : 5006;
+diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+index 75550a40e029..c72083885b6d 100644
+--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
++++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+@@ -94,12 +94,12 @@ static void test_synproxy(bool xdp)
+ 	SYS("sysctl -w net.ipv4.tcp_syncookies=2");
+ 	SYS("sysctl -w net.ipv4.tcp_timestamps=1");
+ 	SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
+-	SYS("iptables -t raw -I PREROUTING \
++	SYS("iptables-legacy -t raw -I PREROUTING \
+ 	    -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
+-	SYS("iptables -t filter -A INPUT \
++	SYS("iptables-legacy -t filter -A INPUT \
+ 	    -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
+ 	    -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
+-	SYS("iptables -t filter -A INPUT \
++	SYS("iptables-legacy -t filter -A INPUT \
+ 	    -i tmp1 -m state --state INVALID -j DROP");
+ 
+ 	ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
+-- 
+2.30.2
+
diff --git a/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch b/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch
new file mode 100644
index 0000000000000..b4fc1bb37dbdc
--- /dev/null
+++ b/ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch
@@ -0,0 +1,45 @@
+From e561fc8365da0215f68cfcffb6c309d1d7eb8c2b Mon Sep 17 00:00:00 2001
+From: Song Liu <song@kernel.org>
+Date: Tue, 13 Dec 2022 14:05:00 -0800
+Subject: [PATCH bpf-next] selftests/bpf: Select
+ CONFIG_FUNCTION_ERROR_INJECTION
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+BPF selftests require CONFIG_FUNCTION_ERROR_INJECTION to work. However,
+CONFIG_FUNCTION_ERROR_INJECTION is no longer 'y' by default after recent
+changes. As a result, we are seeing errors like the following from BPF CI:
+
+   bpf_testmod_test_read() is not modifiable
+   __x64_sys_setdomainname is not sleepable
+   __x64_sys_getpgid is not sleepable
+
+Fix this by explicitly selecting CONFIG_FUNCTION_ERROR_INJECTION in the
+selftest config.
+
+Fixes: a4412fdd49dc ("error-injection: Add prompt for function error injection")
+Reported-by: Daniel Müller <deso@posteo.net>
+Signed-off-by: Song Liu <song@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Daniel Müller <deso@posteo.net>
+Link: https://lore.kernel.org/bpf/20221213220500.3427947-1-song@kernel.org
+---
+ tools/testing/selftests/bpf/config | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
+index 612f699dc4f7..63cd4ab70171 100644
+--- a/tools/testing/selftests/bpf/config
++++ b/tools/testing/selftests/bpf/config
+@@ -16,6 +16,7 @@ CONFIG_CRYPTO_USER_API_HASH=y
+ CONFIG_DYNAMIC_FTRACE=y
+ CONFIG_FPROBE=y
+ CONFIG_FTRACE_SYSCALLS=y
++CONFIG_FUNCTION_ERROR_INJECTION=y
+ CONFIG_FUNCTION_TRACER=y
+ CONFIG_GENEVE=y
+ CONFIG_IKCONFIG=y
+-- 
+2.30.2
+
diff --git a/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch b/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch
new file mode 100644
index 0000000000000..c5f90daa56d3b
--- /dev/null
+++ b/ci/diffs/0001-x86-vdso-Conditionally-export-__vdso_sgx_enter_enclave.patch
@@ -0,0 +1,44 @@
+Recently, ld.lld moved from '--undefined-version' to
+'--no-undefined-version' as the default, which breaks building the vDSO
+when CONFIG_X86_SGX is not set:
+
+  ld.lld: error: version script assignment of 'LINUX_2.6' to symbol '__vdso_sgx_enter_enclave' failed: symbol not defined
+
+__vdso_sgx_enter_enclave is only included in the vDSO when
+CONFIG_X86_SGX is set. Only export it if it will be present in the final
+object, which clears up the error.
+
+Link: https://github.com/ClangBuiltLinux/linux/issues/1756
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+---
+
+It would be nice if this could be picked up for an -rc release but I
+won't argue otherwise.
+
+Alternatively, we could add '--undefined-version' to the vDSO ldflags
+but this does not seem unreasonable to me.
+
+ arch/x86/entry/vdso/vdso.lds.S | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
+index 4bf48462fca7..e8c60ae7a7c8 100644
+--- a/arch/x86/entry/vdso/vdso.lds.S
++++ b/arch/x86/entry/vdso/vdso.lds.S
+@@ -27,7 +27,9 @@ VERSION {
+ 		__vdso_time;
+ 		clock_getres;
+ 		__vdso_clock_getres;
++#ifdef CONFIG_X86_SGX
+ 		__vdso_sgx_enter_enclave;
++#endif
+ 	local: *;
+ 	};
+ }
+
+base-commit: f0c4d9fc9cc9462659728d168387191387e903cc
+
+-- 
+2.38.1
+
+
diff --git a/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch b/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch
new file mode 100644
index 0000000000000..2db04e0b9670c
--- /dev/null
+++ b/ci/diffs/0002-selftests-bpf-Set-CONFIG_BOOTPARAM_HUNG_TASK_PANIC.patch
@@ -0,0 +1,39 @@
+From 91c614a38376374ff39c4cc678c2c5cd22cbf8fc Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Daniel=20M=C3=BCller?= <deso@posteo.net>
+Date: Wed, 26 Oct 2022 13:52:28 -0700
+Subject: [PATCH] selftests/bpf: Set CONFIG_BOOTPARAM_HUNG_TASK_PANIC
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+With commit 5ed88f81511ce ("selftests/bpf: Panic on hard/soft lockup")
+we enabled the means to panic test runs quickly when they are stuck
+because of a hard or soft lockup. What we did not include is the means
+to do the same when a hung task is detected. The reasoning there was
+that virtualization effects may lead to delays and, hence, spurious
+failures.
+However, we see the occasional CI timeout when running the test_progs
+selftest with internal parallelism enabled (-j) that is not caused by a
+hard or soft lockup but due to a hung task. Hence, it makes sense to
+enable this detection as well. But let's give it some mileage first
+before upstreaming, though, and only include it in BPF CI.
+
+Signed-off-by: Daniel Müller <deso@posteo.net>
+---
+ tools/testing/selftests/bpf/config | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
+index 7a99a6..6c6821a 100644
+--- a/tools/testing/selftests/bpf/config
++++ b/tools/testing/selftests/bpf/config
+@@ -1,5 +1,6 @@
+ CONFIG_BLK_DEV_LOOP=y
+ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
++CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
+ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+ CONFIG_BPF=y
+ CONFIG_BPF_EVENTS=y
+-- 
+2.30.2
+
diff --git a/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch b/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch
new file mode 100644
index 0000000000000..9070b76442dda
--- /dev/null
+++ b/ci/diffs/0002-tools-headers-uapi-pull-in-stddef.h-to-fix-BPF-selft.patch
@@ -0,0 +1,104 @@
+From 038fafe1d1c92b8488e5e71ebea819050219dd6f Mon Sep 17 00:00:00 2001
+From: Andrii Nakryiko <andrii@kernel.org>
+Date: Wed, 2 Nov 2022 11:04:17 -0700
+Subject: [PATCH 2/2] tools headers uapi: pull in stddef.h to fix BPF selftests
+ build in CI
+
+With recent sync of linux/in.h tools/include headers are now relying on
+__DECLARE_FLEX_ARRAY macro, which isn't itself defined inside
+tools/include headers anywhere and is instead assumed to be present in
+system-wide UAPI header. This breaks isolated environments that don't
+have kernel UAPI headers installed system-wide, like BPF CI ([0]).
+
+To fix this, bring in include/uapi/linux/stddef.h into tools/include. We
+can't just copy/paste it, though, it has to be processed with
+scripts/headers_install.sh, which has a dependency on scripts/unifdef.
+So the full command to (re-)generate stddef.h for inclusion into
+tools/include directory is:
+
+  $ make scripts_unifdef && \
+    cp $KBUILD_OUTPUT/scripts/unifdef scripts/ && \
+    scripts/headers_install.sh include/uapi/linux/stddef.h tools/include/uapi/linux/stddef.h
+
+This assumes KBUILD_OUTPUT envvar is set and used for out-of-tree builds.
+
+  [0] https://github.com/kernel-patches/bpf/actions/runs/3379432493/jobs/5610982609
+
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Fixes: 036b8f5b8970 ("tools headers uapi: Update linux/in.h copy")
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+---
+ tools/include/uapi/linux/in.h     |  1 +
+ tools/include/uapi/linux/stddef.h | 47 +++++++++++++++++++++++++++++++
+ 2 files changed, 48 insertions(+)
+ create mode 100644 tools/include/uapi/linux/stddef.h
+
+diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
+index f243ce665f74..07a4cb149305 100644
+--- a/tools/include/uapi/linux/in.h
++++ b/tools/include/uapi/linux/in.h
+@@ -20,6 +20,7 @@
+ #define _UAPI_LINUX_IN_H
+ 
+ #include <linux/types.h>
++#include <linux/stddef.h>
+ #include <linux/libc-compat.h>
+ #include <linux/socket.h>
+ 
+diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h
+new file mode 100644
+index 000000000000..bb6ea517efb5
+--- /dev/null
++++ b/tools/include/uapi/linux/stddef.h
+@@ -0,0 +1,47 @@
++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++#ifndef _LINUX_STDDEF_H
++#define _LINUX_STDDEF_H
++
++
++
++#ifndef __always_inline
++#define __always_inline __inline__
++#endif
++
++/**
++ * __struct_group() - Create a mirrored named and anonyomous struct
++ *
++ * @TAG: The tag name for the named sub-struct (usually empty)
++ * @NAME: The identifier name of the mirrored sub-struct
++ * @ATTRS: Any struct attributes (usually empty)
++ * @MEMBERS: The member declarations for the mirrored structs
++ *
++ * Used to create an anonymous union of two structs with identical layout
++ * and size: one anonymous and one named. The former's members can be used
++ * normally without sub-struct naming, and the latter can be used to
++ * reason about the start, end, and size of the group of struct members.
++ * The named struct can also be explicitly tagged for layer reuse, as well
++ * as both having struct attributes appended.
++ */
++#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
++	union { \
++		struct { MEMBERS } ATTRS; \
++		struct TAG { MEMBERS } ATTRS NAME; \
++	}
++
++/**
++ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
++ *
++ * @TYPE: The type of each flexible array element
++ * @NAME: The name of the flexible array member
++ *
++ * In order to have a flexible array member in a union or alone in a
++ * struct, it needs to be wrapped in an anonymous struct with at least 1
++ * named member, but that member can be empty.
++ */
++#define __DECLARE_FLEX_ARRAY(TYPE, NAME)	\
++	struct { \
++		struct { } __empty_ ## NAME; \
++		TYPE NAME[]; \
++	}
++#endif
+-- 
+2.30.2
+
diff --git a/ci/vmtest/configs/DENYLIST b/ci/vmtest/configs/DENYLIST
new file mode 100644
index 0000000000000..e53b4640180e8
--- /dev/null
+++ b/ci/vmtest/configs/DENYLIST
@@ -0,0 +1,7 @@
+# TEMPORARY
+btf_dump/btf_dump: syntax
+kprobe_multi_bench_attach
+core_reloc/enum64val
+core_reloc/size___diff_sz
+core_reloc/type_based___diff_sz
+test_ima	# All of CI is broken on it following 6.3-rc1 merge
diff --git a/ci/vmtest/configs/DENYLIST.aarch64 b/ci/vmtest/configs/DENYLIST.aarch64
new file mode 100644
index 0000000000000..487b19ede4b61
--- /dev/null
+++ b/ci/vmtest/configs/DENYLIST.aarch64
@@ -0,0 +1,4 @@
+cgrp_local_storage                  # libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22
+core_reloc_btfgen                   # run_core_reloc_tests:FAIL:run_btfgen unexpected error: 32512 (errno 22)
+usdt/multispec                      # usdt_300_bad_attach unexpected pointer: 0x558c63d8f0
+xdp_bonding                         # whole test suite is very unstable on aarch64
diff --git a/ci/vmtest/configs/DENYLIST.s390x b/ci/vmtest/configs/DENYLIST.s390x
new file mode 100644
index 0000000000000..e6829c94bdaae
--- /dev/null
+++ b/ci/vmtest/configs/DENYLIST.s390x
@@ -0,0 +1,5 @@
+deny_namespace                           # not yet in bpf denylist
+tc_redirect/tc_redirect_dtime            # very flaky
+lru_bug                                  # not yet in bpf-next denylist
+usdt/basic                               # failing verifier due to bounds check after LLVM update
+usdt/multispec                           # same as above
diff --git a/ci/vmtest/configs/DENYLIST.x86_64 b/ci/vmtest/configs/DENYLIST.x86_64
new file mode 100644
index 0000000000000..6fc3413daab9f
--- /dev/null
+++ b/ci/vmtest/configs/DENYLIST.x86_64
@@ -0,0 +1 @@
+netcnt              # with kvm enabled, fail with packets unexpected packets: actual 10001 != expected 10000
diff --git a/ci/vmtest/helpers.sh b/ci/vmtest/helpers.sh
new file mode 100755
index 0000000000000..c44d0983156d0
--- /dev/null
+++ b/ci/vmtest/helpers.sh
@@ -0,0 +1,38 @@
+# shellcheck shell=bash
+
+# $1 - start or end
+# $2 - fold identifier, no spaces
+# $3 - fold section description
+foldable() {
+  local YELLOW='\033[1;33m'
+  local NOCOLOR='\033[0m'
+  if [ $1 = "start" ]; then
+    line="::group::$2"
+    if [ ! -z "${3:-}" ]; then
+      line="$line - ${YELLOW}$3${NOCOLOR}"
+    fi
+  else
+    line="::endgroup::"
+  fi
+  echo -e "$line"
+}
+
+__print() {
+  local TITLE=""
+  if [[ -n $2 ]]; then
+      TITLE=" title=$2"
+  fi
+  echo "::$1${TITLE}::$3"
+}
+
+# $1 - title
+# $2 - message
+print_error() {
+  __print error $1 $2
+}
+
+# $1 - title
+# $2 - message
+print_notice() {
+  __print notice $1 $2
+}
diff --git a/ci/vmtest/run_selftests.sh b/ci/vmtest/run_selftests.sh
new file mode 100755
index 0000000000000..0c18a331da75b
--- /dev/null
+++ b/ci/vmtest/run_selftests.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+# run_selftest.sh will run the tests within /${PROJECT_NAME}/selftests/bpf
+# If no specific test names are given, all test will be ran, otherwise, it will
+# run the test passed as parameters.
+# There is 2 ways to pass test names.
+# 1) command-line arguments to this script
+# 2) a comma-separated list of test names passed as `run_tests` boot parameters.
+# test names passed as any of those methods will be ran.
+
+set -euo pipefail
+
+source "$(cd "$(dirname "$0")" && pwd)/helpers.sh"
+
+ARCH=$(uname -m)
+
+STATUS_FILE=/exitstatus
+
+declare -a TEST_NAMES=()
+
+read_lists() {
+	(for path in "$@"; do
+		if [[ -s "$path" ]]; then
+			cat "$path"
+		fi;
+	done) | cut -d'#' -f1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | tr -s '\n' ','
+}
+
+read_test_names() {
+    foldable start read_test_names "Reading test names from boot parameters and command line arguments"
+    # Check if test names were passed as boot parameter.
+    # We expect `run_tests` to be a comma-separated list of test names.
+    IFS=',' read -r -a test_names_from_boot <<< \
+        "$(sed -n 's/.*run_tests=\([^ ]*\).*/\1/p' /proc/cmdline)"
+
+    echo "${#test_names_from_boot[@]} tests extracted from boot parameters: ${test_names_from_boot[*]}"
+    # Sort and only keep unique test names from both boot params and arguments
+    # TEST_NAMES will contain a sorted list of uniq tests to be ran.
+    # Only do this if any of $test_names_from_boot[@] or $@ has elements as
+    # "printf '%s\0'" will otherwise generate an empty element.
+    if [[ ${#test_names_from_boot[@]} -gt 0 || $# -gt 0 ]]
+    then
+        readarray -t TEST_NAMES < \
+            <(printf '%s\0' "${test_names_from_boot[@]}" "$@" | \
+                sort --zero-terminated --unique | \
+                xargs --null --max-args=1)
+    fi
+    foldable end read_test_names
+}
+
+test_progs_helper() {
+  local selftest="test_progs${1}"
+  local args="$2"
+
+  foldable start ${selftest} "Testing ${selftest}"
+  # "&& true" does not change the return code (it is not executed
+  # if the Python script fails), but it prevents exiting on a
+  # failure due to the "set -e".
+  ./${selftest} ${args} ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} && true
+  echo "${selftest}:$?" >>"${STATUS_FILE}"
+  foldable end ${selftest}
+}
+
+test_progs() {
+  test_progs_helper "" ""
+}
+
+test_progs_parallel() {
+  test_progs_helper "" "-j"
+}
+
+test_progs_no_alu32() {
+  test_progs_helper "-no_alu32" ""
+}
+
+test_progs_no_alu32_parallel() {
+  test_progs_helper "-no_alu32" "-j"
+}
+
+test_maps() {
+  foldable start test_maps "Testing test_maps"
+  taskset 0xF ./test_maps && true
+  echo "test_maps:$?" >>"${STATUS_FILE}"
+  foldable end test_maps
+}
+
+test_verifier() {
+  foldable start test_verifier "Testing test_verifier"
+  ./test_verifier && true
+  echo "test_verifier:$?" >>"${STATUS_FILE}"
+  foldable end test_verifier
+}
+
+foldable end vm_init
+
+foldable start kernel_config "Kconfig"
+
+zcat /proc/config.gz
+
+foldable end kernel_config
+
+configs_path=${PROJECT_NAME}/selftests/bpf
+local_configs_path=${PROJECT_NAME}/vmtest/configs
+DENYLIST=$(read_lists \
+	"$configs_path/DENYLIST" \
+	"$configs_path/DENYLIST.${ARCH}" \
+	"$local_configs_path/DENYLIST" \
+	"$local_configs_path/DENYLIST.${ARCH}" \
+)
+ALLOWLIST=$(read_lists \
+	"$configs_path/ALLOWLIST" \
+	"$configs_path/ALLOWLIST.${ARCH}" \
+	"$local_configs_path/ALLOWLIST" \
+	"$local_configs_path/ALLOWLIST.${ARCH}" \
+)
+
+echo "DENYLIST: ${DENYLIST}"
+echo "ALLOWLIST: ${ALLOWLIST}"
+
+cd ${PROJECT_NAME}/selftests/bpf
+
+# populate TEST_NAMES
+read_test_names "$@"
+# if we don't have any test name provided to the script, we run all tests.
+if [ ${#TEST_NAMES[@]} -eq 0 ]; then
+	test_progs
+	test_progs_no_alu32
+	test_maps
+	test_verifier
+else
+	# else we run the tests passed as command-line arguments and through boot
+	# parameter.
+	for test_name in "${TEST_NAMES[@]}"; do
+		"${test_name}"
+	done
+fi

From ba82d044b2f1677aa728360eb528ae7a180de405 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 22 Mar 2023 20:23:58 -0700
Subject: [PATCH 2/9] bpf: Retire the struct_ops map kvalue->refcnt.

We have replaced kvalue-refcnt with synchronize_rcu() to wait for an
RCU grace period.

Maintenance of kvalue->refcnt was a complicated task, as we had to
simultaneously keep track of two reference counts: one for the
reference count of bpf_map. When the kvalue->refcnt reaches zero, we
also have to reduce the reference count on bpf_map - yet these steps
are not performed in an atomic manner and require us to be vigilant
when managing them. By eliminating kvalue->refcnt, we can make our
maintenance more straightforward as the refcount of bpf_map is now
solely managed!

To prevent the trampoline image of a struct_ops from being released
while it is still in use, we wait for an RCU grace period. The
setsockopt(TCP_CONGESTION, "...") command allows you to change your
socket's congestion control algorithm and can result in releasing the
old struct_ops implementation. It is fine. However, this function is
exposed through bpf_setsockopt(), it may be accessed by BPF programs
as well. To ensure that the trampoline image belonging to struct_op
can be safely called while its method is in use, the trampoline
safeguarde the BPF program with rcu_read_lock(). Doing so prevents any
destruction of the associated images before returning from a
trampoline and requires us to wait for an RCU grace period.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 include/linux/bpf.h         |  1 +
 kernel/bpf/bpf_struct_ops.c | 73 ++++++++++++++++++++-----------------
 kernel/bpf/syscall.c        |  6 ++-
 3 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ec0df059f5620..f04098468d7aa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1945,6 +1945,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
 void bpf_map_inc(struct bpf_map *map);
 void bpf_map_inc_with_uref(struct bpf_map *map);
+struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
 struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index ba7a94276e3b8..13d373f65dfa2 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -11,6 +11,7 @@
 #include <linux/refcount.h>
 #include <linux/mutex.h>
 #include <linux/btf_ids.h>
+#include <linux/rcupdate_wait.h>
 
 enum bpf_struct_ops_state {
 	BPF_STRUCT_OPS_STATE_INIT,
@@ -249,6 +250,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
 	struct bpf_struct_ops_value *uvalue, *kvalue;
 	enum bpf_struct_ops_state state;
+	s64 refcnt;
 
 	if (unlikely(*(u32 *)key != 0))
 		return -ENOENT;
@@ -267,7 +269,14 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
 	uvalue = value;
 	memcpy(uvalue, st_map->uvalue, map->value_size);
 	uvalue->state = state;
-	refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt));
+
+	/* This value offers the user space a general estimate of how
+	 * many sockets are still utilizing this struct_ops for TCP
+	 * congestion control. The number might not be exact, but it
+	 * should sufficiently meet our present goals.
+	 */
+	refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt);
+	refcount_set(&uvalue->refcnt, max_t(s64, refcnt, 0));
 
 	return 0;
 }
@@ -491,7 +500,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		*(unsigned long *)(udata + moff) = prog->aux->id;
 	}
 
-	refcount_set(&kvalue->refcnt, 1);
 	bpf_map_inc(map);
 
 	set_memory_rox((long)st_map->image, 1);
@@ -536,8 +544,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
 	switch (prev_state) {
 	case BPF_STRUCT_OPS_STATE_INUSE:
 		st_map->st_ops->unreg(&st_map->kvalue.data);
-		if (refcount_dec_and_test(&st_map->kvalue.refcnt))
-			bpf_map_put(map);
+		bpf_map_put(map);
 		return 0;
 	case BPF_STRUCT_OPS_STATE_TOBEFREE:
 		return -EINPROGRESS;
@@ -570,7 +577,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
 	kfree(value);
 }
 
-static void bpf_struct_ops_map_free(struct bpf_map *map)
+static void __bpf_struct_ops_map_free(struct bpf_map *map)
 {
 	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
 
@@ -582,6 +589,24 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
 	bpf_map_area_free(st_map);
 }
 
+static void bpf_struct_ops_map_free(struct bpf_map *map)
+{
+	/* The struct_ops's function may switch to another struct_ops.
+	 *
+	 * For example, bpf_tcp_cc_x->init() may switch to
+	 * another tcp_cc_y by calling
+	 * setsockopt(TCP_CONGESTION, "tcp_cc_y").
+	 * During the switch,  bpf_struct_ops_put(tcp_cc_x) is called
+	 * and its refcount may reach 0 which then free its
+	 * trampoline image while tcp_cc_x is still running.
+	 *
+	 * Thus, a rcu grace period is needed here.
+	 */
+	synchronize_rcu_mult(call_rcu, call_rcu_tasks);
+
+	__bpf_struct_ops_map_free(map);
+}
+
 static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
 {
 	if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
@@ -630,7 +655,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 				   NUMA_NO_NODE);
 	st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
 	if (!st_map->uvalue || !st_map->links || !st_map->image) {
-		bpf_struct_ops_map_free(map);
+		__bpf_struct_ops_map_free(map);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -676,41 +701,23 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = {
 bool bpf_struct_ops_get(const void *kdata)
 {
 	struct bpf_struct_ops_value *kvalue;
+	struct bpf_struct_ops_map *st_map;
+	struct bpf_map *map;
 
 	kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
+	st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
 
-	return refcount_inc_not_zero(&kvalue->refcnt);
-}
-
-static void bpf_struct_ops_put_rcu(struct rcu_head *head)
-{
-	struct bpf_struct_ops_map *st_map;
-
-	st_map = container_of(head, struct bpf_struct_ops_map, rcu);
-	bpf_map_put(&st_map->map);
+	map = __bpf_map_inc_not_zero(&st_map->map, false);
+	return !IS_ERR(map);
 }
 
 void bpf_struct_ops_put(const void *kdata)
 {
 	struct bpf_struct_ops_value *kvalue;
+	struct bpf_struct_ops_map *st_map;
 
 	kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
-	if (refcount_dec_and_test(&kvalue->refcnt)) {
-		struct bpf_struct_ops_map *st_map;
-
-		st_map = container_of(kvalue, struct bpf_struct_ops_map,
-				      kvalue);
-		/* The struct_ops's function may switch to another struct_ops.
-		 *
-		 * For example, bpf_tcp_cc_x->init() may switch to
-		 * another tcp_cc_y by calling
-		 * setsockopt(TCP_CONGESTION, "tcp_cc_y").
-		 * During the switch,  bpf_struct_ops_put(tcp_cc_x) is called
-		 * and its map->refcnt may reach 0 which then free its
-		 * trampoline image while tcp_cc_x is still running.
-		 *
-		 * Thus, a rcu grace period is needed here.
-		 */
-		call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu);
-	}
+	st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
+
+	bpf_map_put(&st_map->map);
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 099e9068bcdd8..cff0348a2871b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1303,8 +1303,10 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 	return map;
 }
 
-/* map_idr_lock should have been held */
-static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
+/* map_idr_lock should have been held or the map should have been
+ * protected by rcu read lock.
+ */
+struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
 {
 	int refold;
 

From df8a566e2a33b48435a904848e96d8e3fb01edd1 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 22 Mar 2023 20:23:59 -0700
Subject: [PATCH 3/9] net: Update an existing TCP congestion control algorithm.

This feature lets you immediately transition to another congestion
control algorithm or implementation with the same name.  Once a name
is updated, new connections will apply this new algorithm.

The purpose is to update a customized algorithm implemented in BPF
struct_ops with a new version on the flight.  The following is an
example of using the userspace API implemented in later BPF patches.

   link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
   .......
   err = bpf_link__update_map(link, skel->maps.ca_update_2);

We first load and register an algorithm implemented in BPF struct_ops,
then swap it out with a new one using the same name. After that, newly
created connections will apply the updated algorithm, while older ones
retain the previous version already applied.

This patch also takes this chance to refactor the ca validation into
the new tcp_validate_congestion_control() function.

Cc: netdev@vger.kernel.org, Eric Dumazet <edumazet@google.com>
Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 include/net/tcp.h   |  3 +++
 net/ipv4/tcp_cong.c | 65 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index db9f828e9d1ee..2abb755e6a3a7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1117,6 +1117,9 @@ struct tcp_congestion_ops {
 
 int tcp_register_congestion_control(struct tcp_congestion_ops *type);
 void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
+int tcp_update_congestion_control(struct tcp_congestion_ops *type,
+				  struct tcp_congestion_ops *old_type);
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
 
 void tcp_assign_congestion_control(struct sock *sk);
 void tcp_init_congestion_control(struct sock *sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index db8b4b488c314..e677d0bc12add 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
 	return NULL;
 }
 
-/*
- * Attach new congestion control algorithm to the list
- * of available options.
- */
-int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
+int tcp_validate_congestion_control(struct tcp_congestion_ops *ca)
 {
-	int ret = 0;
-
 	/* all algorithms must implement these */
 	if (!ca->ssthresh || !ca->undo_cwnd ||
 	    !(ca->cong_avoid || ca->cong_control)) {
@@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 		return -EINVAL;
 	}
 
+	return 0;
+}
+
+/* Attach new congestion control algorithm to the list
+ * of available options.
+ */
+int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
+{
+	int ret;
+
+	ret = tcp_validate_congestion_control(ca);
+	if (ret)
+		return ret;
+
 	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
 
 	spin_lock(&tcp_cong_list_lock);
@@ -130,6 +138,49 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
 }
 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
 
+/* Replace a registered old ca with a new one.
+ *
+ * The new ca must have the same name as the old one, that has been
+ * registered.
+ */
+int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
+{
+	struct tcp_congestion_ops *existing;
+	int ret;
+
+	ret = tcp_validate_congestion_control(ca);
+	if (ret)
+		return ret;
+
+	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
+
+	spin_lock(&tcp_cong_list_lock);
+	existing = tcp_ca_find_key(old_ca->key);
+	if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) {
+		pr_notice("%s not registered or non-unique key\n",
+			  ca->name);
+		ret = -EINVAL;
+	} else if (existing != old_ca) {
+		pr_notice("invalid old congestion control algorithm to replace\n");
+		ret = -EINVAL;
+	} else {
+		/* Add the new one before removing the old one to keep
+		 * one implementation available all the time.
+		 */
+		list_add_tail_rcu(&ca->list, &tcp_cong_list);
+		list_del_rcu(&existing->list);
+		pr_debug("%s updated\n", ca->name);
+	}
+	spin_unlock(&tcp_cong_list_lock);
+
+	/* Wait for outstanding readers to complete before the
+	 * module or struct_ops gets removed entirely.
+	 */
+	synchronize_rcu();
+
+	return ret;
+}
+
 u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
 {
 	const struct tcp_congestion_ops *ca;

From b6f06fa61f3fe310622ba66da380b3ce136fb0f9 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 22 Mar 2023 20:24:00 -0700
Subject: [PATCH 4/9] bpf: Create links for BPF struct_ops maps.

Make bpf_link support struct_ops.  Previously, struct_ops were always
used alone without any associated links. Upon updating its value, a
struct_ops would be activated automatically. Yet other BPF program
types required to make a bpf_link with their instances before they
could become active. Now, however, you can create an inactive
struct_ops, and create a link to activate it later.

With bpf_links, struct_ops has a behavior similar to other BPF program
types. You can pin/unpin them from their links and the struct_ops will
be deactivated when its link is removed while previously need someone
to delete the value for it to be deactivated.

bpf_links are responsible for registering their associated
struct_ops. You can only use a struct_ops that has the BPF_F_LINK flag
set to create a bpf_link, while a structs without this flag behaves in
the same manner as before and is registered upon updating its value.

The BPF_LINK_TYPE_STRUCT_OPS serves a dual purpose. Not only is it
used to craft the links for BPF struct_ops programs, but also to
create links for BPF struct_ops them-self.  Since the links of BPF
struct_ops programs are only used to create trampolines internally,
they are never seen in other contexts. Thus, they can be reused for
struct_ops themself.

To maintain a reference to the map supporting this link, we add
bpf_struct_ops_link as an additional type. The pointer of the map is
RCU and won't be necessary until later in the patchset.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 include/linux/bpf.h            |   7 ++
 include/uapi/linux/bpf.h       |  12 ++-
 kernel/bpf/bpf_struct_ops.c    | 143 ++++++++++++++++++++++++++++++++-
 kernel/bpf/syscall.c           |  23 ++++--
 net/ipv4/bpf_tcp_ca.c          |   8 +-
 tools/include/uapi/linux/bpf.h |  12 ++-
 6 files changed, 190 insertions(+), 15 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f04098468d7aa..8552279efe46b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1518,6 +1518,7 @@ struct bpf_struct_ops {
 			   void *kdata, const void *udata);
 	int (*reg)(void *kdata);
 	void (*unreg)(void *kdata);
+	int (*validate)(void *kdata);
 	const struct btf_type *type;
 	const struct btf_type *value_type;
 	const char *name;
@@ -1552,6 +1553,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
 	else
 		module_put(owner);
 }
+int bpf_struct_ops_link_create(union bpf_attr *attr);
 
 #ifdef CONFIG_NET
 /* Define it here to avoid the use of forward declaration */
@@ -1592,6 +1594,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
 {
 	return -EINVAL;
 }
+static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 13129df937cde..42f40ee083bf9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1033,6 +1033,7 @@ enum bpf_attach_type {
 	BPF_PERF_EVENT,
 	BPF_TRACE_KPROBE_MULTI,
 	BPF_LSM_CGROUP,
+	BPF_STRUCT_OPS,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1266,6 +1267,9 @@ enum {
 
 /* Create a map that is suitable to be an inner map with dynamic max entries */
 	BPF_F_INNER_MAP		= (1U << 12),
+
+/* Create a map that will be registered/unregesitered by the backed bpf_link */
+	BPF_F_LINK		= (1U << 13),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -1507,7 +1511,10 @@ union bpf_attr {
 	} task_fd_query;
 
 	struct { /* struct used by BPF_LINK_CREATE command */
-		__u32		prog_fd;	/* eBPF program to attach */
+		union {
+			__u32		prog_fd;	/* eBPF program to attach */
+			__u32		map_fd;		/* struct_ops to attach */
+		};
 		union {
 			__u32		target_fd;	/* object to attach to */
 			__u32		target_ifindex; /* target ifindex */
@@ -6379,6 +6386,9 @@ struct bpf_link_info {
 		struct {
 			__u32 ifindex;
 		} xdp;
+		struct {
+			__u32 map_id;
+		} struct_ops;
 	};
 } __attribute__((aligned(8)));
 
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 13d373f65dfa2..3c77abfdb70ce 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -17,6 +17,7 @@ enum bpf_struct_ops_state {
 	BPF_STRUCT_OPS_STATE_INIT,
 	BPF_STRUCT_OPS_STATE_INUSE,
 	BPF_STRUCT_OPS_STATE_TOBEFREE,
+	BPF_STRUCT_OPS_STATE_READY,
 };
 
 #define BPF_STRUCT_OPS_COMMON_VALUE			\
@@ -59,6 +60,11 @@ struct bpf_struct_ops_map {
 	struct bpf_struct_ops_value kvalue;
 };
 
+struct bpf_struct_ops_link {
+	struct bpf_link link;
+	struct bpf_map __rcu *map;
+};
+
 #define VALUE_PREFIX "bpf_struct_ops_"
 #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
 
@@ -500,11 +506,29 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		*(unsigned long *)(udata + moff) = prog->aux->id;
 	}
 
-	bpf_map_inc(map);
+	if (st_map->map.map_flags & BPF_F_LINK) {
+		err = st_ops->validate(kdata);
+		if (err)
+			goto reset_unlock;
+		set_memory_rox((long)st_map->image, 1);
+		/* Let bpf_link handle registration & unregistration.
+		 *
+		 * Pair with smp_load_acquire() during lookup_elem().
+		 */
+		smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY);
+		goto unlock;
+	}
 
 	set_memory_rox((long)st_map->image, 1);
 	err = st_ops->reg(kdata);
 	if (likely(!err)) {
+		/* This refcnt increment on the map here after
+		 * 'st_ops->reg()' is secure since the state of the
+		 * map must be set to INIT at this moment, and thus
+		 * bpf_struct_ops_map_delete_elem() can't unregister
+		 * or transition it to TOBEFREE concurrently.
+		 */
+		bpf_map_inc(map);
 		/* Pair with smp_load_acquire() during lookup_elem().
 		 * It ensures the above udata updates (e.g. prog->aux->id)
 		 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
@@ -520,7 +544,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	 */
 	set_memory_nx((long)st_map->image, 1);
 	set_memory_rw((long)st_map->image, 1);
-	bpf_map_put(map);
 
 reset_unlock:
 	bpf_struct_ops_map_put_progs(st_map);
@@ -538,6 +561,9 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
 	struct bpf_struct_ops_map *st_map;
 
 	st_map = (struct bpf_struct_ops_map *)map;
+	if (st_map->map.map_flags & BPF_F_LINK)
+		return -EOPNOTSUPP;
+
 	prev_state = cmpxchg(&st_map->kvalue.state,
 			     BPF_STRUCT_OPS_STATE_INUSE,
 			     BPF_STRUCT_OPS_STATE_TOBEFREE);
@@ -610,7 +636,7 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
 static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
 {
 	if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
-	    attr->map_flags || !attr->btf_vmlinux_value_type_id)
+	    (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id)
 		return -EINVAL;
 	return 0;
 }
@@ -634,6 +660,9 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	if (attr->value_size != vt->size)
 		return ERR_PTR(-EINVAL);
 
+	if (attr->map_flags & BPF_F_LINK && !st_ops->validate)
+		return ERR_PTR(-EOPNOTSUPP);
+
 	t = st_ops->type;
 
 	st_map_size = sizeof(*st_map) +
@@ -721,3 +750,111 @@ void bpf_struct_ops_put(const void *kdata)
 
 	bpf_map_put(&st_map->map);
 }
+
+static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
+{
+	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+
+	return map->map_type == BPF_MAP_TYPE_STRUCT_OPS &&
+		map->map_flags & BPF_F_LINK &&
+		/* Pair with smp_store_release() during map_update */
+		smp_load_acquire(&st_map->kvalue.state) == BPF_STRUCT_OPS_STATE_READY;
+}
+
+static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
+{
+	struct bpf_struct_ops_link *st_link;
+	struct bpf_struct_ops_map *st_map;
+
+	st_link = container_of(link, struct bpf_struct_ops_link, link);
+	st_map = (struct bpf_struct_ops_map *)
+		rcu_dereference_protected(st_link->map, true);
+	if (st_map) {
+		/* st_link->map can be NULL if
+		 * bpf_struct_ops_link_create() fails to register.
+		 */
+		st_map->st_ops->unreg(&st_map->kvalue.data);
+		bpf_map_put(&st_map->map);
+	}
+	kfree(st_link);
+}
+
+static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
+					    struct seq_file *seq)
+{
+	struct bpf_struct_ops_link *st_link;
+	struct bpf_map *map;
+
+	st_link = container_of(link, struct bpf_struct_ops_link, link);
+	rcu_read_lock();
+	map = rcu_dereference(st_link->map);
+	seq_printf(seq, "map_id:\t%d\n", map->id);
+	rcu_read_unlock();
+}
+
+static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
+					       struct bpf_link_info *info)
+{
+	struct bpf_struct_ops_link *st_link;
+	struct bpf_map *map;
+
+	st_link = container_of(link, struct bpf_struct_ops_link, link);
+	rcu_read_lock();
+	map = rcu_dereference(st_link->map);
+	info->struct_ops.map_id = map->id;
+	rcu_read_unlock();
+	return 0;
+}
+
+static const struct bpf_link_ops bpf_struct_ops_map_lops = {
+	.dealloc = bpf_struct_ops_map_link_dealloc,
+	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
+	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
+};
+
+int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+	struct bpf_struct_ops_link *link = NULL;
+	struct bpf_link_primer link_primer;
+	struct bpf_struct_ops_map *st_map;
+	struct bpf_map *map;
+	int err;
+
+	map = bpf_map_get(attr->link_create.map_fd);
+	if (!map)
+		return -EINVAL;
+
+	st_map = (struct bpf_struct_ops_map *)map;
+
+	if (!bpf_struct_ops_valid_to_reg(map)) {
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	link = kzalloc(sizeof(*link), GFP_USER);
+	if (!link) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
+
+	err = bpf_link_prime(&link->link, &link_primer);
+	if (err)
+		goto err_out;
+
+	err = st_map->st_ops->reg(st_map->kvalue.data);
+	if (err) {
+		bpf_link_cleanup(&link_primer);
+		link = NULL;
+		goto err_out;
+	}
+	RCU_INIT_POINTER(link->map, map);
+
+	return bpf_link_settle(&link_primer);
+
+err_out:
+	bpf_map_put(map);
+	kfree(link);
+	return err;
+}
+
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cff0348a2871b..21f76698875c6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2825,16 +2825,19 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 	const struct bpf_prog *prog = link->prog;
 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
 
-	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 	seq_printf(m,
 		   "link_type:\t%s\n"
-		   "link_id:\t%u\n"
-		   "prog_tag:\t%s\n"
-		   "prog_id:\t%u\n",
+		   "link_id:\t%u\n",
 		   bpf_link_type_strs[link->type],
-		   link->id,
-		   prog_tag,
-		   prog->aux->id);
+		   link->id);
+	if (prog) {
+		bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
+		seq_printf(m,
+			   "prog_tag:\t%s\n"
+			   "prog_id:\t%u\n",
+			   prog_tag,
+			   prog->aux->id);
+	}
 	if (link->ops->show_fdinfo)
 		link->ops->show_fdinfo(link, m);
 }
@@ -4314,7 +4317,8 @@ static int bpf_link_get_info_by_fd(struct file *file,
 
 	info.type = link->type;
 	info.id = link->id;
-	info.prog_id = link->prog->aux->id;
+	if (link->prog)
+		info.prog_id = link->prog->aux->id;
 
 	if (link->ops->fill_link_info) {
 		err = link->ops->fill_link_info(link, &info);
@@ -4577,6 +4581,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 	if (CHECK_ATTR(BPF_LINK_CREATE))
 		return -EINVAL;
 
+	if (attr->link_create.attach_type == BPF_STRUCT_OPS)
+		return bpf_struct_ops_link_create(attr);
+
 	prog = bpf_prog_get(attr->link_create.prog_fd);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 13fc0c185cd92..bbbd5eb94db20 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -239,8 +239,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
 		if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name,
 				     sizeof(tcp_ca->name)) <= 0)
 			return -EINVAL;
-		if (tcp_ca_find(utcp_ca->name))
-			return -EEXIST;
 		return 1;
 	}
 
@@ -266,6 +264,11 @@ static void bpf_tcp_ca_unreg(void *kdata)
 	tcp_unregister_congestion_control(kdata);
 }
 
+static int bpf_tcp_ca_validate(void *kdata)
+{
+	return tcp_validate_congestion_control(kdata);
+}
+
 struct bpf_struct_ops bpf_tcp_congestion_ops = {
 	.verifier_ops = &bpf_tcp_ca_verifier_ops,
 	.reg = bpf_tcp_ca_reg,
@@ -273,6 +276,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
 	.check_member = bpf_tcp_ca_check_member,
 	.init_member = bpf_tcp_ca_init_member,
 	.init = bpf_tcp_ca_init,
+	.validate = bpf_tcp_ca_validate,
 	.name = "tcp_congestion_ops",
 };
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 13129df937cde..9cf1deaf21f25 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1033,6 +1033,7 @@ enum bpf_attach_type {
 	BPF_PERF_EVENT,
 	BPF_TRACE_KPROBE_MULTI,
 	BPF_LSM_CGROUP,
+	BPF_STRUCT_OPS,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1266,6 +1267,9 @@ enum {
 
 /* Create a map that is suitable to be an inner map with dynamic max entries */
 	BPF_F_INNER_MAP		= (1U << 12),
+
+/* Create a map that will be registered/unregesitered by the backed bpf_link */
+	BPF_F_LINK		= (1U << 13),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -1507,7 +1511,10 @@ union bpf_attr {
 	} task_fd_query;
 
 	struct { /* struct used by BPF_LINK_CREATE command */
-		__u32		prog_fd;	/* eBPF program to attach */
+		union {
+			__u32		prog_fd;	/* eBPF program to attach */
+			__u32		map_fd;		/* eBPF struct_ops to attach */
+		};
 		union {
 			__u32		target_fd;	/* object to attach to */
 			__u32		target_ifindex; /* target ifindex */
@@ -6379,6 +6386,9 @@ struct bpf_link_info {
 		struct {
 			__u32 ifindex;
 		} xdp;
+		struct {
+			__u32 map_id;
+		} struct_ops;
 	};
 } __attribute__((aligned(8)));
 

From fedaea5f4f1fd2c4dfbaab333927f0c740c4e4c9 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 22 Mar 2023 20:24:01 -0700
Subject: [PATCH 5/9] libbpf: Create a bpf_link in
 bpf_map__attach_struct_ops().

bpf_map__attach_struct_ops() was creating a dummy bpf_link as a
placeholder, but now it is constructing an authentic one by calling
bpf_link_create() if the map has the BPF_F_LINK flag.

You can flag a struct_ops map with BPF_F_LINK by calling
bpf_map__set_map_flags().

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/lib/bpf/libbpf.c | 95 +++++++++++++++++++++++++++++++-----------
 1 file changed, 71 insertions(+), 24 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5d32aa8ea38ab..4ff82baba0ea8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -116,6 +116,7 @@ static const char * const attach_type_name[] = {
 	[BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]	= "sk_reuseport_select_or_migrate",
 	[BPF_PERF_EVENT]		= "perf_event",
 	[BPF_TRACE_KPROBE_MULTI]	= "trace_kprobe_multi",
+	[BPF_STRUCT_OPS]		= "struct_ops",
 };
 
 static const char * const link_type_name[] = {
@@ -7686,6 +7687,37 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 	return 0;
 }
 
+static void bpf_map_prepare_vdata(const struct bpf_map *map)
+{
+	struct bpf_struct_ops *st_ops;
+	__u32 i;
+
+	st_ops = map->st_ops;
+	for (i = 0; i < btf_vlen(st_ops->type); i++) {
+		struct bpf_program *prog = st_ops->progs[i];
+		void *kern_data;
+		int prog_fd;
+
+		if (!prog)
+			continue;
+
+		prog_fd = bpf_program__fd(prog);
+		kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
+		*(unsigned long *)kern_data = prog_fd;
+	}
+}
+
+static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
+{
+	int i;
+
+	for (i = 0; i < obj->nr_maps; i++)
+		if (bpf_map__is_struct_ops(&obj->maps[i]))
+			bpf_map_prepare_vdata(&obj->maps[i]);
+
+	return 0;
+}
+
 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
 {
 	int err, i;
@@ -7711,6 +7743,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
 	err = err ? : bpf_object__load_progs(obj, extra_log_level);
 	err = err ? : bpf_object_init_prog_arrays(obj);
+	err = err ? : bpf_object_prepare_struct_ops(obj);
 
 	if (obj->gen_loader) {
 		/* reset FDs */
@@ -11579,22 +11612,30 @@ struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
 	return link;
 }
 
+struct bpf_link_struct_ops {
+	struct bpf_link link;
+	int map_fd;
+};
+
 static int bpf_link__detach_struct_ops(struct bpf_link *link)
 {
+	struct bpf_link_struct_ops *st_link;
 	__u32 zero = 0;
 
-	if (bpf_map_delete_elem(link->fd, &zero))
-		return -errno;
+	st_link = container_of(link, struct bpf_link_struct_ops, link);
 
-	return 0;
+	if (st_link->map_fd < 0)
+		/* w/o a real link */
+		return bpf_map_delete_elem(link->fd, &zero);
+
+	return close(link->fd);
 }
 
 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
 {
-	struct bpf_struct_ops *st_ops;
-	struct bpf_link *link;
-	__u32 i, zero = 0;
-	int err;
+	struct bpf_link_struct_ops *link;
+	__u32 zero = 0;
+	int err, fd;
 
 	if (!bpf_map__is_struct_ops(map) || map->fd == -1)
 		return libbpf_err_ptr(-EINVAL);
@@ -11603,31 +11644,37 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
 	if (!link)
 		return libbpf_err_ptr(-EINVAL);
 
-	st_ops = map->st_ops;
-	for (i = 0; i < btf_vlen(st_ops->type); i++) {
-		struct bpf_program *prog = st_ops->progs[i];
-		void *kern_data;
-		int prog_fd;
+	/* kern_vdata should be prepared during the loading phase. */
+	err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
+	/* It can be EBUSY if the map has been used to create or
+	 * update a link before.  We don't allow updating the value of
+	 * a struct_ops once it is set.  That ensures that the value
+	 * never changed.  So, it is safe to skip EBUSY.
+	 */
+	if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
+		free(link);
+		return libbpf_err_ptr(err);
+	}
 
-		if (!prog)
-			continue;
+	link->link.detach = bpf_link__detach_struct_ops;
 
-		prog_fd = bpf_program__fd(prog);
-		kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
-		*(unsigned long *)kern_data = prog_fd;
+	if (!(map->def.map_flags & BPF_F_LINK)) {
+		/* w/o a real link */
+		link->link.fd = map->fd;
+		link->map_fd = -1;
+		return &link->link;
 	}
 
-	err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
-	if (err) {
-		err = -errno;
+	fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
+	if (fd < 0) {
 		free(link);
-		return libbpf_err_ptr(err);
+		return libbpf_err_ptr(fd);
 	}
 
-	link->detach = bpf_link__detach_struct_ops;
-	link->fd = map->fd;
+	link->link.fd = fd;
+	link->map_fd = map->fd;
 
-	return link;
+	return &link->link;
 }
 
 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,

From c7e34c02b1a349820339d79585098aa7402711db Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 22 Mar 2023 20:24:02 -0700
Subject: [PATCH 6/9] bpf: Update the struct_ops of a bpf_link.

By improving the BPF_LINK_UPDATE command of bpf(), it should allow you
to conveniently switch between different struct_ops on a single
bpf_link. This would enable smoother transitions from one struct_ops
to another.

The struct_ops maps passing along with BPF_LINK_UPDATE should have the
BPF_F_LINK flag.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h            |  3 +++
 include/uapi/linux/bpf.h       | 21 +++++++++++----
 kernel/bpf/bpf_struct_ops.c    | 48 +++++++++++++++++++++++++++++++++-
 kernel/bpf/syscall.c           | 34 ++++++++++++++++++++++++
 net/ipv4/bpf_tcp_ca.c          |  6 +++++
 tools/include/uapi/linux/bpf.h | 21 +++++++++++----
 6 files changed, 122 insertions(+), 11 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8552279efe46b..2d8f3f639e680 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1476,6 +1476,8 @@ struct bpf_link_ops {
 	void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq);
 	int (*fill_link_info)(const struct bpf_link *link,
 			      struct bpf_link_info *info);
+	int (*update_map)(struct bpf_link *link, struct bpf_map *new_map,
+			  struct bpf_map *old_map);
 };
 
 struct bpf_tramp_link {
@@ -1518,6 +1520,7 @@ struct bpf_struct_ops {
 			   void *kdata, const void *udata);
 	int (*reg)(void *kdata);
 	void (*unreg)(void *kdata);
+	int (*update)(void *kdata, void *old_kdata);
 	int (*validate)(void *kdata);
 	const struct btf_type *type;
 	const struct btf_type *value_type;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 42f40ee083bf9..e3d3b5160d26f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1555,12 +1555,23 @@ union bpf_attr {
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
 		__u32		link_fd;	/* link fd */
-		/* new program fd to update link with */
-		__u32		new_prog_fd;
+		union {
+			/* new program fd to update link with */
+			__u32		new_prog_fd;
+			/* new struct_ops map fd to update link with */
+			__u32           new_map_fd;
+		};
 		__u32		flags;		/* extra flags */
-		/* expected link's program fd; is specified only if
-		 * BPF_F_REPLACE flag is set in flags */
-		__u32		old_prog_fd;
+		union {
+			/* expected link's program fd; is specified only if
+			 * BPF_F_REPLACE flag is set in flags.
+			 */
+			__u32		old_prog_fd;
+			/* expected link's map fd; is specified only
+			 * if BPF_F_REPLACE flag is set.
+			 */
+			__u32           old_map_fd;
+		};
 	} link_update;
 
 	struct {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 3c77abfdb70ce..2b3577422bb55 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -65,6 +65,8 @@ struct bpf_struct_ops_link {
 	struct bpf_map __rcu *map;
 };
 
+static DEFINE_MUTEX(update_mutex);
+
 #define VALUE_PREFIX "bpf_struct_ops_"
 #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
 
@@ -660,7 +662,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	if (attr->value_size != vt->size)
 		return ERR_PTR(-EINVAL);
 
-	if (attr->map_flags & BPF_F_LINK && !st_ops->validate)
+	if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update))
 		return ERR_PTR(-EOPNOTSUPP);
 
 	t = st_ops->type;
@@ -806,10 +808,54 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
 	return 0;
 }
 
+static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map,
+					  struct bpf_map *expected_old_map)
+{
+	struct bpf_struct_ops_map *st_map, *old_st_map;
+	struct bpf_map *old_map;
+	struct bpf_struct_ops_link *st_link;
+	int err = 0;
+
+	st_link = container_of(link, struct bpf_struct_ops_link, link);
+	st_map = container_of(new_map, struct bpf_struct_ops_map, map);
+
+	if (!bpf_struct_ops_valid_to_reg(new_map))
+		return -EINVAL;
+
+	mutex_lock(&update_mutex);
+
+	old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+	if (expected_old_map && old_map != expected_old_map) {
+		err = -EPERM;
+		goto err_out;
+	}
+
+	old_st_map = container_of(old_map, struct bpf_struct_ops_map, map);
+	/* The new and old struct_ops must be the same type. */
+	if (st_map->st_ops != old_st_map->st_ops) {
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	err = st_map->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
+	if (err)
+		goto err_out;
+
+	bpf_map_inc(new_map);
+	rcu_assign_pointer(st_link->map, new_map);
+	bpf_map_put(old_map);
+
+err_out:
+	mutex_unlock(&update_mutex);
+
+	return err;
+}
+
 static const struct bpf_link_ops bpf_struct_ops_map_lops = {
 	.dealloc = bpf_struct_ops_map_link_dealloc,
 	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
 	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
+	.update_map = bpf_struct_ops_map_link_update,
 };
 
 int bpf_struct_ops_link_create(union bpf_attr *attr)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 21f76698875c6..b4d758fa5981d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4682,6 +4682,35 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 	return ret;
 }
 
+static int link_update_map(struct bpf_link *link, union bpf_attr *attr)
+{
+	struct bpf_map *new_map, *old_map = NULL;
+	int ret;
+
+	new_map = bpf_map_get(attr->link_update.new_map_fd);
+	if (IS_ERR(new_map))
+		return -EINVAL;
+
+	if (attr->link_update.flags & BPF_F_REPLACE) {
+		old_map = bpf_map_get(attr->link_update.old_map_fd);
+		if (IS_ERR(old_map)) {
+			ret = -EINVAL;
+			goto out_put;
+		}
+	} else if (attr->link_update.old_map_fd) {
+		ret = -EINVAL;
+		goto out_put;
+	}
+
+	ret = link->ops->update_map(link, new_map, old_map);
+
+	if (old_map)
+		bpf_map_put(old_map);
+out_put:
+	bpf_map_put(new_map);
+	return ret;
+}
+
 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
 
 static int link_update(union bpf_attr *attr)
@@ -4702,6 +4731,11 @@ static int link_update(union bpf_attr *attr)
 	if (IS_ERR(link))
 		return PTR_ERR(link);
 
+	if (link->ops->update_map) {
+		ret = link_update_map(link, attr);
+		goto out_put_link;
+	}
+
 	new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
 	if (IS_ERR(new_prog)) {
 		ret = PTR_ERR(new_prog);
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index bbbd5eb94db20..e8b27826283ea 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -264,6 +264,11 @@ static void bpf_tcp_ca_unreg(void *kdata)
 	tcp_unregister_congestion_control(kdata);
 }
 
+static int bpf_tcp_ca_update(void *kdata, void *old_kdata)
+{
+	return tcp_update_congestion_control(kdata, old_kdata);
+}
+
 static int bpf_tcp_ca_validate(void *kdata)
 {
 	return tcp_validate_congestion_control(kdata);
@@ -273,6 +278,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
 	.verifier_ops = &bpf_tcp_ca_verifier_ops,
 	.reg = bpf_tcp_ca_reg,
 	.unreg = bpf_tcp_ca_unreg,
+	.update = bpf_tcp_ca_update,
 	.check_member = bpf_tcp_ca_check_member,
 	.init_member = bpf_tcp_ca_init_member,
 	.init = bpf_tcp_ca_init,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9cf1deaf21f25..d6c5a022ae28d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1555,12 +1555,23 @@ union bpf_attr {
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
 		__u32		link_fd;	/* link fd */
-		/* new program fd to update link with */
-		__u32		new_prog_fd;
+		union {
+			/* new program fd to update link with */
+			__u32		new_prog_fd;
+			/* new struct_ops map fd to update link with */
+			__u32           new_map_fd;
+		};
 		__u32		flags;		/* extra flags */
-		/* expected link's program fd; is specified only if
-		 * BPF_F_REPLACE flag is set in flags */
-		__u32		old_prog_fd;
+		union {
+			/* expected link's program fd; is specified only if
+			 * BPF_F_REPLACE flag is set in flags.
+			 */
+			__u32		old_prog_fd;
+			/* expected link's map fd; is specified only
+			 * if BPF_F_REPLACE flag is set.
+			 */
+			__u32           old_map_fd;
+		};
 	} link_update;
 
 	struct {

From 1d6dbcccff1c72cadca70f6e090e4ec6ca348ae1 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 22 Mar 2023 20:24:03 -0700
Subject: [PATCH 7/9] libbpf: Update a bpf_link with another struct_ops.

Introduce bpf_link__update_map(), which allows to atomically update
underlying struct_ops implementation for given struct_ops BPF link.

Also add old_map_fd to struct bpf_link_update_opts to handle
BPF_F_REPLACE feature.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 tools/lib/bpf/bpf.c      |  8 +++++++-
 tools/lib/bpf/bpf.h      |  3 ++-
 tools/lib/bpf/libbpf.c   | 35 +++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   |  1 +
 tools/lib/bpf/libbpf.map |  1 +
 5 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e750b6f5fcc36..767035900354d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -794,11 +794,17 @@ int bpf_link_update(int link_fd, int new_prog_fd,
 	if (!OPTS_VALID(opts, bpf_link_update_opts))
 		return libbpf_err(-EINVAL);
 
+	if (OPTS_GET(opts, old_prog_fd, 0) && OPTS_GET(opts, old_map_fd, 0))
+		return libbpf_err(-EINVAL);
+
 	memset(&attr, 0, attr_sz);
 	attr.link_update.link_fd = link_fd;
 	attr.link_update.new_prog_fd = new_prog_fd;
 	attr.link_update.flags = OPTS_GET(opts, flags, 0);
-	attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+	if (OPTS_GET(opts, old_prog_fd, 0))
+		attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+	else if (OPTS_GET(opts, old_map_fd, 0))
+		attr.link_update.old_map_fd = OPTS_GET(opts, old_map_fd, 0);
 
 	ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz);
 	return libbpf_err_errno(ret);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index f0f7863732381..b073e73439efd 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -336,8 +336,9 @@ struct bpf_link_update_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 	__u32 flags;	   /* extra flags */
 	__u32 old_prog_fd; /* expected old program FD */
+	__u32 old_map_fd;  /* expected old map FD */
 };
-#define bpf_link_update_opts__last_field old_prog_fd
+#define bpf_link_update_opts__last_field old_map_fd
 
 LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
 			       const struct bpf_link_update_opts *opts);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4ff82baba0ea8..2b5ea2500b804 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -11677,6 +11677,41 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
 	return &link->link;
 }
 
+/*
+ * Swap the back struct_ops of a link with a new struct_ops map.
+ */
+int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
+{
+	struct bpf_link_struct_ops *st_ops_link;
+	__u32 zero = 0;
+	int err;
+
+	if (!bpf_map__is_struct_ops(map) || map->fd < 0)
+		return -EINVAL;
+
+	st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
+	/* Ensure the type of a link is correct */
+	if (st_ops_link->map_fd < 0)
+		return -EINVAL;
+
+	err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
+	/* It can be EBUSY if the map has been used to create or
+	 * update a link before.  We don't allow updating the value of
+	 * a struct_ops once it is set.  That ensures that the value
+	 * never changed.  So, it is safe to skip EBUSY.
+	 */
+	if (err && err != -EBUSY)
+		return err;
+
+	err = bpf_link_update(link->fd, map->fd, NULL);
+	if (err < 0)
+		return err;
+
+	st_ops_link->map_fd = map->fd;
+
+	return 0;
+}
+
 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
 							  void *private_data);
 
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index db4992a036f8b..1615e55e2e790 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -719,6 +719,7 @@ bpf_program__attach_freplace(const struct bpf_program *prog,
 struct bpf_map;
 
 LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
+LIBBPF_API int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map);
 
 struct bpf_iter_attach_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 50dde1f6521ef..a5aa3a383d694 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -386,6 +386,7 @@ LIBBPF_1.1.0 {
 LIBBPF_1.2.0 {
 	global:
 		bpf_btf_get_info_by_fd;
+		bpf_link__update_map;
 		bpf_link_get_info_by_fd;
 		bpf_map_get_info_by_fd;
 		bpf_prog_get_info_by_fd;

From 9c643ab315d5890c76b536223d664387938f3ffa Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 22 Mar 2023 20:24:04 -0700
Subject: [PATCH 8/9] libbpf: Use .struct_ops.link section to indicate a
 struct_ops with a link.

Flags a struct_ops is to back a bpf_link by putting it to the
".struct_ops.link" section.  Once it is flagged, the created
struct_ops can be used to create a bpf_link or update a bpf_link that
has been backed by another struct_ops.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/lib/bpf/libbpf.c | 60 +++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2b5ea2500b804..f6a071db5c6e5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -468,6 +468,7 @@ struct bpf_struct_ops {
 #define KCONFIG_SEC ".kconfig"
 #define KSYMS_SEC ".ksyms"
 #define STRUCT_OPS_SEC ".struct_ops"
+#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
 
 enum libbpf_map_type {
 	LIBBPF_MAP_UNSPEC,
@@ -597,6 +598,7 @@ struct elf_state {
 	Elf64_Ehdr *ehdr;
 	Elf_Data *symbols;
 	Elf_Data *st_ops_data;
+	Elf_Data *st_ops_link_data;
 	size_t shstrndx; /* section index for section name strings */
 	size_t strtabidx;
 	struct elf_sec_desc *secs;
@@ -606,6 +608,7 @@ struct elf_state {
 	int text_shndx;
 	int symbols_shndx;
 	int st_ops_shndx;
+	int st_ops_link_shndx;
 };
 
 struct usdt_manager;
@@ -1119,7 +1122,8 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
 	return 0;
 }
 
-static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
+static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
+				int shndx, Elf_Data *data, __u32 map_flags)
 {
 	const struct btf_type *type, *datasec;
 	const struct btf_var_secinfo *vsi;
@@ -1130,15 +1134,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
 	struct bpf_map *map;
 	__u32 i;
 
-	if (obj->efile.st_ops_shndx == -1)
+	if (shndx == -1)
 		return 0;
 
 	btf = obj->btf;
-	datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
+	datasec_id = btf__find_by_name_kind(btf, sec_name,
 					    BTF_KIND_DATASEC);
 	if (datasec_id < 0) {
 		pr_warn("struct_ops init: DATASEC %s not found\n",
-			STRUCT_OPS_SEC);
+			sec_name);
 		return -EINVAL;
 	}
 
@@ -1151,7 +1155,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
 		type_id = btf__resolve_type(obj->btf, vsi->type);
 		if (type_id < 0) {
 			pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
-				vsi->type, STRUCT_OPS_SEC);
+				vsi->type, sec_name);
 			return -EINVAL;
 		}
 
@@ -1170,7 +1174,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
 		if (IS_ERR(map))
 			return PTR_ERR(map);
 
-		map->sec_idx = obj->efile.st_ops_shndx;
+		map->sec_idx = shndx;
 		map->sec_offset = vsi->offset;
 		map->name = strdup(var_name);
 		if (!map->name)
@@ -1180,6 +1184,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
 		map->def.key_size = sizeof(int);
 		map->def.value_size = type->size;
 		map->def.max_entries = 1;
+		map->def.map_flags = map_flags;
 
 		map->st_ops = calloc(1, sizeof(*map->st_ops));
 		if (!map->st_ops)
@@ -1192,14 +1197,14 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
 		if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
 			return -ENOMEM;
 
-		if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
+		if (vsi->offset + type->size > data->d_size) {
 			pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
-				var_name, STRUCT_OPS_SEC);
+				var_name, sec_name);
 			return -EINVAL;
 		}
 
 		memcpy(st_ops->data,
-		       obj->efile.st_ops_data->d_buf + vsi->offset,
+		       data->d_buf + vsi->offset,
 		       type->size);
 		st_ops->tname = tname;
 		st_ops->type = type;
@@ -1212,6 +1217,19 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
 	return 0;
 }
 
+static int bpf_object_init_struct_ops(struct bpf_object *obj)
+{
+	int err;
+
+	err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx,
+				   obj->efile.st_ops_data, 0);
+	err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC,
+					  obj->efile.st_ops_link_shndx,
+					  obj->efile.st_ops_link_data,
+					  BPF_F_LINK);
+	return err;
+}
+
 static struct bpf_object *bpf_object__new(const char *path,
 					  const void *obj_buf,
 					  size_t obj_buf_sz,
@@ -1248,6 +1266,7 @@ static struct bpf_object *bpf_object__new(const char *path,
 	obj->efile.obj_buf_sz = obj_buf_sz;
 	obj->efile.btf_maps_shndx = -1;
 	obj->efile.st_ops_shndx = -1;
+	obj->efile.st_ops_link_shndx = -1;
 	obj->kconfig_map_idx = -1;
 
 	obj->kern_version = get_kernel_version();
@@ -1265,6 +1284,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
 	obj->efile.elf = NULL;
 	obj->efile.symbols = NULL;
 	obj->efile.st_ops_data = NULL;
+	obj->efile.st_ops_link_data = NULL;
 
 	zfree(&obj->efile.secs);
 	obj->efile.sec_cnt = 0;
@@ -2619,7 +2639,7 @@ static int bpf_object__init_maps(struct bpf_object *obj,
 	err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
 	err = err ?: bpf_object__init_global_data_maps(obj);
 	err = err ?: bpf_object__init_kconfig_map(obj);
-	err = err ?: bpf_object__init_struct_ops_maps(obj);
+	err = err ?: bpf_object_init_struct_ops(obj);
 
 	return err;
 }
@@ -2753,12 +2773,13 @@ static bool libbpf_needs_btf(const struct bpf_object *obj)
 {
 	return obj->efile.btf_maps_shndx >= 0 ||
 	       obj->efile.st_ops_shndx >= 0 ||
+	       obj->efile.st_ops_link_shndx >= 0 ||
 	       obj->nr_extern > 0;
 }
 
 static bool kernel_needs_btf(const struct bpf_object *obj)
 {
-	return obj->efile.st_ops_shndx >= 0;
+	return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0;
 }
 
 static int bpf_object__init_btf(struct bpf_object *obj,
@@ -3451,6 +3472,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			} else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
 				obj->efile.st_ops_data = data;
 				obj->efile.st_ops_shndx = idx;
+			} else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) {
+				obj->efile.st_ops_link_data = data;
+				obj->efile.st_ops_link_shndx = idx;
 			} else {
 				pr_info("elf: skipping unrecognized data section(%d) %s\n",
 					idx, name);
@@ -3465,6 +3489,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			/* Only do relo for section with exec instructions */
 			if (!section_have_execinstr(obj, targ_sec_idx) &&
 			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&
+			    strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
 			    strcmp(name, ".rel" MAPS_ELF_SEC)) {
 				pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
 					idx, name, targ_sec_idx,
@@ -6611,7 +6636,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
 			return -LIBBPF_ERRNO__INTERNAL;
 		}
 
-		if (idx == obj->efile.st_ops_shndx)
+		if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx)
 			err = bpf_object__collect_st_ops_relos(obj, shdr, data);
 		else if (idx == obj->efile.btf_maps_shndx)
 			err = bpf_object__collect_map_relos(obj, shdr, data);
@@ -8853,6 +8878,7 @@ const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
 }
 
 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
+						     int sec_idx,
 						     size_t offset)
 {
 	struct bpf_map *map;
@@ -8862,7 +8888,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
 		map = &obj->maps[i];
 		if (!bpf_map__is_struct_ops(map))
 			continue;
-		if (map->sec_offset <= offset &&
+		if (map->sec_idx == sec_idx &&
+		    map->sec_offset <= offset &&
 		    offset - map->sec_offset < map->def.value_size)
 			return map;
 	}
@@ -8904,7 +8931,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 		}
 
 		name = elf_sym_str(obj, sym->st_name) ?: "<?>";
-		map = find_struct_ops_map_by_offset(obj, rel->r_offset);
+		map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
 		if (!map) {
 			pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
 				(size_t)rel->r_offset);
@@ -8971,8 +8998,9 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 		}
 
 		/* struct_ops BPF prog can be re-used between multiple
-		 * .struct_ops as long as it's the same struct_ops struct
-		 * definition and the same function pointer field
+		 * .struct_ops & .struct_ops.link as long as it's the
+		 * same struct_ops struct definition and the same
+		 * function pointer field
 		 */
 		if (prog->attach_btf_id != st_ops->type_id ||
 		    prog->expected_attach_type != member_idx) {

From 4a8c17e9f94ed95fdb961bcc372a42090b9b74ca Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 22 Mar 2023 20:24:05 -0700
Subject: [PATCH 9/9] selftests/bpf: Test switching TCP Congestion Control
 algorithms.

Create a pair of sockets that utilize the congestion control algorithm
under a particular name. Then switch up this congestion control
algorithm to another implementation and check whether newly created
connections using the same cc name now run the new implementation.

Also, try to update a link with a struct_ops that is without
BPF_F_LINK or with a wrong or different name.  These cases should fail
due to the violation of assumptions.  To update a bpf_link of a
struct_ops, it must be replaced with another struct_ops that is
identical in type and name and has the BPF_F_LINK flag.

The other test case is to create links from the same struct_ops more
than once.  It makes sure a struct_ops can be used repeatly.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 .../selftests/bpf/prog_tests/bpf_tcp_ca.c     | 160 ++++++++++++++++++
 .../selftests/bpf/progs/tcp_ca_update.c       |  80 +++++++++
 2 files changed, 240 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_update.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e980188d41246..2c80f9291ceda 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -8,6 +8,7 @@
 #include "bpf_dctcp.skel.h"
 #include "bpf_cubic.skel.h"
 #include "bpf_tcp_nogpl.skel.h"
+#include "tcp_ca_update.skel.h"
 #include "bpf_dctcp_release.skel.h"
 #include "tcp_ca_write_sk_pacing.skel.h"
 #include "tcp_ca_incompl_cong_ops.skel.h"
@@ -381,6 +382,155 @@ static void test_unsupp_cong_op(void)
 	libbpf_set_print(old_print_fn);
 }
 
+static void test_update_ca(void)
+{
+	struct tcp_ca_update *skel;
+	struct bpf_link *link;
+	int saved_ca1_cnt;
+	int err;
+
+	skel = tcp_ca_update__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+	ASSERT_OK_PTR(link, "attach_struct_ops");
+
+	do_test("tcp_ca_update", NULL);
+	saved_ca1_cnt = skel->bss->ca1_cnt;
+	ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+	err = bpf_link__update_map(link, skel->maps.ca_update_2);
+	ASSERT_OK(err, "update_map");
+
+	do_test("tcp_ca_update", NULL);
+	ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+	ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
+
+	bpf_link__destroy(link);
+	tcp_ca_update__destroy(skel);
+}
+
+static void test_update_wrong(void)
+{
+	struct tcp_ca_update *skel;
+	struct bpf_link *link;
+	int saved_ca1_cnt;
+	int err;
+
+	skel = tcp_ca_update__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+	ASSERT_OK_PTR(link, "attach_struct_ops");
+
+	do_test("tcp_ca_update", NULL);
+	saved_ca1_cnt = skel->bss->ca1_cnt;
+	ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+	err = bpf_link__update_map(link, skel->maps.ca_wrong);
+	ASSERT_ERR(err, "update_map");
+
+	do_test("tcp_ca_update", NULL);
+	ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+
+	bpf_link__destroy(link);
+	tcp_ca_update__destroy(skel);
+}
+
+static void test_mixed_links(void)
+{
+	struct tcp_ca_update *skel;
+	struct bpf_link *link, *link_nl;
+	int err;
+
+	skel = tcp_ca_update__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link);
+	ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl");
+
+	link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+	ASSERT_OK_PTR(link, "attach_struct_ops");
+
+	do_test("tcp_ca_update", NULL);
+	ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt");
+
+	err = bpf_link__update_map(link, skel->maps.ca_no_link);
+	ASSERT_ERR(err, "update_map");
+
+	bpf_link__destroy(link);
+	bpf_link__destroy(link_nl);
+	tcp_ca_update__destroy(skel);
+}
+
+static void test_multi_links(void)
+{
+	struct tcp_ca_update *skel;
+	struct bpf_link *link;
+
+	skel = tcp_ca_update__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+	ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+	bpf_link__destroy(link);
+
+	/* A map should be able to be used to create links multiple
+	 * times.
+	 */
+	link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+	ASSERT_OK_PTR(link, "attach_struct_ops_2nd");
+	bpf_link__destroy(link);
+
+	tcp_ca_update__destroy(skel);
+}
+
+static void test_link_replace(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts);
+	struct tcp_ca_update *skel;
+	struct bpf_link *link;
+	int err;
+
+	skel = tcp_ca_update__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+	ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+	bpf_link__destroy(link);
+
+	link = bpf_map__attach_struct_ops(skel->maps.ca_update_2);
+	ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+
+	/* BPF_F_REPLACE with a wrong old map Fd. It should fail!
+	 *
+	 * With BPF_F_REPLACE, the link should be updated only if the
+	 * old map fd given here matches the map backing the link.
+	 */
+	opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_1);
+	opts.flags = BPF_F_REPLACE;
+	err = bpf_link_update(bpf_link__fd(link),
+			      bpf_map__fd(skel->maps.ca_update_1),
+			      &opts);
+	ASSERT_ERR(err, "bpf_link_update_fail");
+
+	/* BPF_F_REPLACE with a correct old map Fd. It should success! */
+	opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_2);
+	err = bpf_link_update(bpf_link__fd(link),
+			      bpf_map__fd(skel->maps.ca_update_1),
+			      &opts);
+	ASSERT_OK(err, "bpf_link_update_success");
+
+	bpf_link__destroy(link);
+
+	tcp_ca_update__destroy(skel);
+}
+
 void test_bpf_tcp_ca(void)
 {
 	if (test__start_subtest("dctcp"))
@@ -399,4 +549,14 @@ void test_bpf_tcp_ca(void)
 		test_incompl_cong_ops();
 	if (test__start_subtest("unsupp_cong_op"))
 		test_unsupp_cong_op();
+	if (test__start_subtest("update_ca"))
+		test_update_ca();
+	if (test__start_subtest("update_wrong"))
+		test_update_wrong();
+	if (test__start_subtest("mixed_links"))
+		test_mixed_links();
+	if (test__start_subtest("multi_links"))
+		test_multi_links();
+	if (test__start_subtest("link_replace"))
+		test_link_replace();
 }
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
new file mode 100644
index 0000000000000..b93a0ed330578
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ca1_cnt = 0;
+int ca2_cnt = 0;
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+	return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/ca_update_1_init")
+void BPF_PROG(ca_update_1_init, struct sock *sk)
+{
+	ca1_cnt++;
+}
+
+SEC("struct_ops/ca_update_2_init")
+void BPF_PROG(ca_update_2_init, struct sock *sk)
+{
+	ca2_cnt++;
+}
+
+SEC("struct_ops/ca_update_cong_control")
+void BPF_PROG(ca_update_cong_control, struct sock *sk,
+	      const struct rate_sample *rs)
+{
+}
+
+SEC("struct_ops/ca_update_ssthresh")
+__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
+{
+	return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/ca_update_undo_cwnd")
+__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
+{
+	return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_1 = {
+	.init = (void *)ca_update_1_init,
+	.cong_control = (void *)ca_update_cong_control,
+	.ssthresh = (void *)ca_update_ssthresh,
+	.undo_cwnd = (void *)ca_update_undo_cwnd,
+	.name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_2 = {
+	.init = (void *)ca_update_2_init,
+	.cong_control = (void *)ca_update_cong_control,
+	.ssthresh = (void *)ca_update_ssthresh,
+	.undo_cwnd = (void *)ca_update_undo_cwnd,
+	.name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_wrong = {
+	.cong_control = (void *)ca_update_cong_control,
+	.ssthresh = (void *)ca_update_ssthresh,
+	.undo_cwnd = (void *)ca_update_undo_cwnd,
+	.name = "tcp_ca_wrong",
+};
+
+SEC(".struct_ops")
+struct tcp_congestion_ops ca_no_link = {
+	.cong_control = (void *)ca_update_cong_control,
+	.ssthresh = (void *)ca_update_ssthresh,
+	.undo_cwnd = (void *)ca_update_undo_cwnd,
+	.name = "tcp_ca_no_link",
+};