kernel-patches · kernel-patches-daemon-bpf · Apr 3, 2024 · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024
diff --git a/.github/actions/veristat_baseline_compare/action.yml b/.github/actions/veristat_baseline_compare/action.yml
@@ -0,0 +1,49 @@
+name: 'run-veristat'
+description: 'Run veristat benchmark'
+inputs:
+  veristat_output:
+    description: 'Veristat output filepath'
+    required: true
+  baseline_name:
+    description: 'Veristat baseline cache name'
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - uses: actions/upload-artifact@v4
+      with:
+        name: ${{ inputs.baseline_name }}
+        if-no-files-found: error
+        path: ${{ github.workspace }}/${{ inputs.veristat_output }}
+
+    # For pull request:
+    # - get baseline log from cache
+    # - compare it to current run
+    - if: ${{ github.event_name == 'pull_request' }}
+      uses: actions/cache/restore@v4
+      with:
+        key: ${{ inputs.baseline_name }}
+        restore-keys: |
+          ${{ inputs.baseline_name }}-
+        path: '${{ github.workspace }}/${{ inputs.baseline_name }}'
+
+    - if: ${{ github.event_name == 'pull_request' }}
+      name: Show veristat comparison
+      shell: bash
+      run: ./.github/scripts/compare-veristat-results.sh
+      env:
+        BASELINE_PATH: ${{ github.workspace }}/${{ inputs.baseline_name }}
+        VERISTAT_OUTPUT: ${{ inputs.veristat_output }}
+
+    # For push: just put baseline log to cache
+    - if: ${{ github.event_name == 'push' }}
+      shell: bash
+      run: |
+        mv "${{ github.workspace }}/${{ inputs.veristat_output }}" \
+           "${{ github.workspace }}/${{ inputs.baseline_name }}"
+
+    - if: ${{ github.event_name == 'push' }}
+      uses: actions/cache/save@v4
+      with:
+        key: ${{ inputs.baseline_name }}-${{ github.run_id }}
+        path: '${{ github.workspace }}/${{ inputs.baseline_name }}'
diff --git a/.github/scripts/compare-veristat-results.sh b/.github/scripts/compare-veristat-results.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+if [[ ! -f "${BASELINE_PATH}" ]]; then
+    echo "# No ${BASELINE_PATH} available" >> "${GITHUB_STEP_SUMMARY}"
+
+    echo "No ${BASELINE_PATH} available"
+    echo "Printing veristat results"
+    cat "${VERISTAT_OUTPUT}"
+
+    exit
+fi
+
+selftests/bpf/veristat \
+    --output-format csv \
+    --emit file,prog,verdict,states \
+    --compare "${BASELINE_PATH}" "${VERISTAT_OUTPUT}" > compare.csv
+
+python3 ./.github/scripts/veristat_compare.py compare.csv
diff --git a/.github/scripts/get-commit-metadata.sh b/.github/scripts/get-commit-metadata.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+branch="${GITHUB_BASE_REF}"
+
+if [ "${GITHUB_EVENT_NAME}" = 'push' ]; then
+  branch="${GITHUB_REF_NAME}"
+fi
+
+echo "branch=${branch}" >> "${GITHUB_OUTPUT}"
+
+upstream="${branch//_base/}"
+commit="$(
+  git rev-parse "origin/${upstream}" &> /dev/null \
+    || (
+      git fetch --quiet --prune --no-tags --depth=1 --no-recurse-submodules origin "+refs/heads/${upstream}:refs/remotes/origin/${upstream}" && \
+      git rev-parse "origin/${upstream}"
+    )
+)"
+timestamp_utc="$(TZ=utc git show --format='%cd' --no-patch --date=iso-strict-local "${commit}")"
+
+echo "timestamp=${timestamp_utc}" >> "${GITHUB_OUTPUT}"
+echo "commit=${commit}" >> "${GITHUB_OUTPUT}"
+echo "Most recent upstream commit is ${commit}"
diff --git a/.github/scripts/matrix.py b/.github/scripts/matrix.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+
+import os
+import dataclasses
+import json
+
+from enum import Enum
+from typing import Any, Dict, List, Final, Set, Union
+
+MANAGED_OWNER: Final[str] = "kernel-patches"
+MANAGED_REPOS: Final[Set[str]] = {
+    f"{MANAGED_OWNER}/bpf",
+    f"{MANAGED_OWNER}/vmtest",
+}
+# We need to run on ubuntu 20.04 because our rootfs is based on debian buster and we
+# otherwise get library versioning issue such as
+# `./test_verifier: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by ./test_verifier)`
+DEFAULT_RUNNER: Final[str] = "ubuntu-20.04"
+DEFAULT_LLVM_VERSION: Final[int] = 17
+
+
+class Arch(str, Enum):
+    """
+    CPU architecture supported by CI.
+    """
+
+    AARCH64 = "aarch64"
+    S390X = "s390x"
+    X86_64 = "x86_64"
+
+
+class Compiler(str, Enum):
+    GCC = "gcc"
+    LLVM = "llvm"
+
+
+@dataclasses.dataclass
+class Toolchain:
+    compiler: Compiler
+    # This is relevant ONLY for LLVM and should not be required for GCC
+    version: int
+
+    @property
+    def short_name(self) -> str:
+        return str(self.compiler.value)
+
+    @property
+    def full_name(self) -> str:
+        if self.compiler == Compiler.GCC:
+            return self.short_name
+
+        return f"{self.short_name}-{self.version}"
+
+    def to_dict(self) -> Dict[str, Union[str, int]]:
+        return {
+            "name": self.short_name,
+            "fullname": self.full_name,
+            "version": self.version,
+        }
+
+
+@dataclasses.dataclass
+class BuildConfig:
+    arch: Arch
+    toolchain: Toolchain
+    kernel: str = "LATEST"
+    run_veristat: bool = False
+    parallel_tests: bool = False
+    build_release: bool = False
+
+    @property
+    def runs_on(self) -> List[str]:
+        if is_managed_repo():
+            return ["self-hosted", self.arch.value]
+        return [DEFAULT_RUNNER]
+
+    @property
+    def build_runs_on(self) -> List[str]:
+        if is_managed_repo():
+            # Build s390x on x86_64
+            return [
+                "self-hosted",
+                self.arch.value == "s390x" and Arch.X86_64.value or self.arch.value,
+            ]
+        return [DEFAULT_RUNNER]
+
+    @property
+    def tests(self) -> Dict[str, Any]:
+        tests_list = [
+            "test_progs",
+            "test_progs_parallel",
+            "test_progs_no_alu32",
+            "test_progs_no_alu32_parallel",
+            "test_maps",
+            "test_verifier",
+        ]
+
+        if self.toolchain.version >= 18:
+            tests_list.append("test_progs_cpuv4")
+
+        if not self.parallel_tests:
+            tests_list = [test for test in tests_list if not test.endswith("parallel")]
+
+        return {"include": [generate_test_config(test) for test in tests_list]}
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "arch": self.arch.value,
+            "toolchain": self.toolchain.to_dict(),
+            "kernel": self.kernel,
+            "run_veristat": self.run_veristat,
+            "parallel_tests": self.parallel_tests,
+            "build_release": self.build_release,
+            "runs_on": self.runs_on,
+            "tests": self.tests,
+            "build_runs_on": self.build_runs_on,
+        }
+
+
+def is_managed_repo() -> bool:
+    return (
+        os.environ["GITHUB_REPOSITORY_OWNER"] == MANAGED_OWNER
+        and os.environ["GITHUB_REPOSITORY"] in MANAGED_REPOS
+    )
+
+
+def set_output(name, value):
+    """Write an output variable to the GitHub output file."""
+    with open(os.getenv("GITHUB_OUTPUT"), "a", encoding="utf-8") as file:
+        file.write(f"{name}={value}\n")
+
+
+def generate_test_config(test: str) -> Dict[str, Union[str, int]]:
+    """Create the configuration for the provided test."""
+    is_parallel = test.endswith("_parallel")
+    config = {
+        "test": test,
+        "continue_on_error": is_parallel,
+        # While in experimental mode, parallel jobs may get stuck
+        # anywhere, including in user space where the kernel won't detect
+        # a problem and panic. We add a second layer of (smaller) timeouts
+        # here such that if we get stuck in a parallel run, we hit this
+        # timeout and fail without affecting the overall job success (as
+        # would be the case if we hit the job-wide timeout). For
+        # non-experimental jobs, 360 is the default which will be
+        # superseded by the overall workflow timeout (but we need to
+        # specify something).
+        "timeout_minutes": 30 if is_parallel else 360,
+    }
+    return config
+
+
+if __name__ == "__main__":
+    matrix = [
+        BuildConfig(
+            arch=Arch.X86_64,
+            toolchain=Toolchain(compiler=Compiler.GCC, version=DEFAULT_LLVM_VERSION),
+            run_veristat=True,
+            parallel_tests=True,
+        ),
+        BuildConfig(
+            arch=Arch.X86_64,
+            toolchain=Toolchain(compiler=Compiler.LLVM, version=DEFAULT_LLVM_VERSION),
+            build_release=True,
+        ),
+        BuildConfig(
+            arch=Arch.X86_64,
+            toolchain=Toolchain(compiler=Compiler.LLVM, version=18),
+            build_release=True,
+        ),
+        BuildConfig(
+            arch=Arch.AARCH64,
+            toolchain=Toolchain(compiler=Compiler.GCC, version=DEFAULT_LLVM_VERSION),
+        ),
+        # BuildConfig(
+        #     arch=Arch.AARCH64,
+        #     toolchain=Toolchain(
+        #         compiler=Compiler.LLVM,
+        #         version=DEFAULT_LLVM_VERSION
+        #     ),
+        # ),
+        BuildConfig(
+            arch=Arch.S390X,
+            toolchain=Toolchain(compiler=Compiler.GCC, version=DEFAULT_LLVM_VERSION),
+        ),
+    ]
+
+    # Outside of those repositories we only run on x86_64
+    if not is_managed_repo():
+        matrix = [config for config in matrix if config.arch == Arch.X86_64]
+
+    json_matrix = json.dumps({"include": [config.to_dict() for config in matrix]})
+    print(json_matrix)
+    set_output("build_matrix", json_matrix)
diff --git a/.github/scripts/prepare-incremental-builds.sh b/.github/scripts/prepare-incremental-builds.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+set -eu
+
+commit_id="${1}"
+
+# $1 - the SHA-1 to fetch and check out
+fetch_and_checkout() {
+  local build_base_sha
+
+  build_base_sha="${1}"
+  # If cached artifacts became stale for one reason or another, we
+  # may not have the build base SHA available. Fetch it and retry.
+  git fetch origin "${build_base_sha}" && git checkout --quiet "${build_base_sha}"
+}
+
+# $1 - value of KBUILD_OUTPUT
+clear_cache_artifacts() {
+  local output_dir
+
+  output_dir="${1}"
+  echo "Unable to find earlier upstream ref. Discarding KBUILD_OUTPUT contents..."
+  rm --recursive --force "${output_dir}"
+  mkdir "${output_dir}"
+  false
+}
+
+# $1 - value of KBUILD_OUTPUT
+# $2 - current time in ISO 8601 format
+restore_source_code_times() {
+  local build_output
+  local current_time
+  local src_time
+  local obj_time
+
+  build_output="${1}"
+  current_time="${2}"
+  src_time="$(date --iso-8601=ns --date="${current_time} - 2 minutes")"
+  obj_time="$(date --iso-8601=ns --date="${current_time} - 1 minute")"
+
+  git ls-files | xargs --max-args=10000 touch -m --no-create --date="${src_time}"
+  find "${build_output}" -type f | xargs --max-args=10000 touch -m --no-create --date="${obj_time}"
+  git checkout --quiet -
+  echo "Adjusted src and obj time stamps relative to system time"
+}
+
+mkdir --parents "${KBUILD_OUTPUT}"
+current_time="$(date --iso-8601=ns)"
+
+if [ -f "${KBUILD_OUTPUT}/.build-base-sha" ]; then
+  build_base_sha="$(cat "${KBUILD_OUTPUT}/.build-base-sha")"
+  echo "Setting up base build state for ${build_base_sha}"
+
+  (
+    git checkout --quiet "${build_base_sha}" \
+      || fetch_and_checkout "${build_base_sha}" \
+      || clear_cache_artifacts "${KBUILD_OUTPUT}"
+  ) && restore_source_code_times "${KBUILD_OUTPUT}" "${current_time}"
+else
+  echo "No previous build data found"
+fi
+
+echo -n "${commit_id}" > "${KBUILD_OUTPUT}/.build-base-sha"