Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 93 additions & 7 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ KERNEL_EXPERIMENT=${KERNEL_EXPERIMENT:-none}
KERNEL_ORDER_FILE=${KERNEL_ORDER_FILE:-}
KERNEL_SYSCALL_TABLE=${KERNEL_SYSCALL_TABLE:-}
KERNEL_CONFIG_FRAGMENT=${KERNEL_CONFIG_FRAGMENT:-}
# DWARF policy. The default ('none') matches the production image:
# CONFIG_DEBUG_INFO_NONE=y, addr2line cannot resolve any symbol, and
# scripts/subsystem-rollup.py exits with the documented "DWARF
# missing" code. Set KERNEL_DEBUG_INFO=reduced to ship REDUCED DWARF
# so the diagnostic rollup can attribute symbols to source dirs. The
# deployed Image is stripped of debug sections before bootwrapper
# packing, so this knob only changes vmlinux artifacts and link time.
KERNEL_DEBUG_INFO=${KERNEL_DEBUG_INFO:-none}
KERNEL_REPORT_DIR=${KERNEL_REPORT_DIR:-${ROOTDIR}/profiles/kernel-pgo}
PGO_WORKLOAD_FILE=${PGO_WORKLOAD_FILE:-${ROOTDIR}/configs/pgo-workload.txt}
PGO_BASE_CONFIG_FRAGMENT=${PGO_BASE_CONFIG_FRAGMENT:-${ROOTDIR}/configs/kernel-pgo-prune.config}
Expand Down Expand Up @@ -74,6 +82,7 @@ image_fingerprint() {
printf 'KERNEL_ORDER_FILE=%s\n' "${KERNEL_ORDER_FILE}"
printf 'KERNEL_SYSCALL_TABLE=%s\n' "${KERNEL_SYSCALL_TABLE}"
printf 'KERNEL_CONFIG_FRAGMENT=%s\n' "${KERNEL_CONFIG_FRAGMENT}"
printf 'KERNEL_DEBUG_INFO=%s\n' "${KERNEL_DEBUG_INFO}"
if [ -n "${KERNEL_ORDER_FILE}" ] && [ -f "${KERNEL_ORDER_FILE}" ]; then
sha256sum "${KERNEL_ORDER_FILE}"
fi
Expand Down Expand Up @@ -759,11 +768,35 @@ build_linux() {
# default contributes ~180KB of static .data via _printk_rb_static_infos.
sed -i "/^CONFIG_LOG_BUF_SHIFT=/d" .config
echo "CONFIG_LOG_BUF_SHIFT=12" >>.config
# No DWARF in vmlinux: shortens the kernel link and shrinks build
# artifacts. CONFIG_DEBUG_INFO is a hidden bool selected by the
# DWARF4/5 choice options; once DEBUG_INFO_NONE wins, it disappears
# from .config rather than emitting an explicit "not set" line.
echo "CONFIG_DEBUG_INFO_NONE=y" >>.config
# DWARF policy. Production: CONFIG_DEBUG_INFO_NONE=y (no DWARF in
# vmlinux, fastest link, smallest build artifacts). Diagnostic:
# KERNEL_DEBUG_INFO=reduced enables CONFIG_DEBUG_INFO_REDUCED=y so
# scripts/subsystem-rollup.py can attribute every .text symbol to
# a source directory via addr2line. CONFIG_DEBUG_INFO is a hidden
# bool selected by the DWARF choice options; we never set it
# directly. The deployed Image is stripped of debug sections, so
# the diagnostic build does not change shipped image size.
case "${KERNEL_DEBUG_INFO}" in
none)
echo "CONFIG_DEBUG_INFO_NONE=y" >>.config
;;
reduced)
# The "Debug information" choice block requires exactly one
# positive selection. Pick TOOLCHAIN_DEFAULT (which selects
# the hidden CONFIG_DEBUG_INFO bool), then layer
# DEBUG_INFO_REDUCED on top -- it is a depends-on-DEBUG_INFO
# modifier, not a choice member. Stating only DEBUG_INFO_NONE
# off would leave the choice under-specified and olddefconfig
# would silently fall back to the kconfig default.
echo "# CONFIG_DEBUG_INFO_NONE is not set" >>.config
echo "CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y" >>.config
echo "CONFIG_DEBUG_INFO_REDUCED=y" >>.config
;;
*)
echo "ERROR: KERNEL_DEBUG_INFO must be 'none' or 'reduced' (got '${KERNEL_DEBUG_INFO}')"
exit 1
;;
esac
# Drop the ARM EABI unwind tables (.ARM.exidx/.ARM.extab, ~75KB).
# Requires patch 0010 to introduce UNWINDER_NONE on Thumb-2.
echo "# CONFIG_UNWINDER_ARM is not set" >>.config
Expand Down Expand Up @@ -804,6 +837,19 @@ build_linux() {
echo "# CONFIG_SHMEM is not set" >>.config
echo "# CONFIG_SECURITY is not set" >>.config

# Initrd decompressor pruning: the embedded initramfs is gzip-compressed
# (CONFIG_INITRAMFS_COMPRESSION_GZIP=y). Every other RD_* selector
# defaults to y under EXPERT and pulls a full decompressor library into
# the image -- olddefconfig silently restores them after defconfig.
# Sub-bucket rollup measured RD_ZSTD = 36,942 bytes (lib/zstd),
# RD_LZ4 = 10,972 bytes (lib/lz4), RD_XZ = 6,598 bytes (lib/xz) of
# dead .text in the production vmlinux. RD_ZSTD also pulls
# lib/xxhash.c (~3KB). Keep RD_GZIP=y as the boot-path requirement;
# explicitly disable the rest.
echo "# CONFIG_RD_ZSTD is not set" >>.config
echo "# CONFIG_RD_LZ4 is not set" >>.config
echo "# CONFIG_RD_XZ is not set" >>.config

# Serial-only target: drop the VT terminal layer and accessibility
# console support. CONFIG_TTY stays on -- the AMBA PL011 console
# driver depends on it.
Expand Down Expand Up @@ -871,7 +917,6 @@ build_linux() {
"# CONFIG_BLOCK is not set" \
"CONFIG_SLUB_TINY=y" \
"CONFIG_LOG_BUF_SHIFT=12" \
"CONFIG_DEBUG_INFO_NONE=y" \
"CONFIG_UNWINDER_NONE=y" \
"# CONFIG_IO_URING is not set" \
"# CONFIG_FUTEX is not set" \
Expand Down Expand Up @@ -905,7 +950,10 @@ build_linux() {
"# CONFIG_SECCOMP is not set" \
"# CONFIG_KEYS is not set" \
"# CONFIG_STACKPROTECTOR is not set" \
"# CONFIG_DEBUG_BUGVERBOSE is not set"; do
"# CONFIG_DEBUG_BUGVERBOSE is not set" \
"# CONFIG_RD_ZSTD is not set" \
"# CONFIG_RD_LZ4 is not set" \
"# CONFIG_RD_XZ is not set"; do
if ! grep -q "^${opt}\$" .config; then
echo "ERROR: expected '${opt}' in .config after olddefconfig"
exit 1
Expand All @@ -920,6 +968,28 @@ build_linux() {
exit 1
fi

# DWARF policy is mode-conditional: production keeps DEBUG_INFO_NONE=y,
# the diagnostic build keeps DEBUG_INFO_REDUCED=y. The other side must
# not survive olddefconfig in either mode.
case "${KERNEL_DEBUG_INFO}" in
none)
if ! grep -q "^CONFIG_DEBUG_INFO_NONE=y\$" .config; then
echo "ERROR: expected 'CONFIG_DEBUG_INFO_NONE=y' in .config (KERNEL_DEBUG_INFO=none)"
exit 1
fi
if grep -q "^CONFIG_DEBUG_INFO_REDUCED=y\$" .config; then
echo "ERROR: CONFIG_DEBUG_INFO_REDUCED=y survived olddefconfig despite KERNEL_DEBUG_INFO=none"
exit 1
fi
;;
reduced)
if ! grep -q "^CONFIG_DEBUG_INFO_REDUCED=y\$" .config; then
echo "ERROR: expected 'CONFIG_DEBUG_INFO_REDUCED=y' in .config (KERNEL_DEBUG_INFO=reduced)"
exit 1
fi
;;
esac

# Negative-guard for symbols whose `# CONFIG_X is not set` line gets
# stripped by olddefconfig because their `depends on` clause is unmet
# under the current build state (NOMMU, NET=n, SYSFS=n, MULTIUSER=n,
Expand All @@ -942,6 +1012,22 @@ build_linux() {
fi
done

# Decompressor library guard. RD_ZSTD/RD_LZ4/RD_XZ disabled above
# must cascade to ZSTD_DECOMPRESS / LZ4_DECOMPRESS / XZ_DEC, the
# umbrella DECOMPRESS_* hidden bools, and XXHASH (selected by
# ZSTD_DECOMPRESS, also pulled by BCACHE / BTRFS but those need
# BLOCK=y which this target lacks). If anything else still
# selects them (a future fs/ or net/ enable, e.g. squashfs+zstd),
# we must catch that drift loudly so the size win does not
# silently regress.
for sym in ZSTD_DECOMPRESS ZSTD_COMMON LZ4_DECOMPRESS XZ_DEC \
XXHASH DECOMPRESS_ZSTD DECOMPRESS_LZ4 DECOMPRESS_XZ; do
if grep -q "^CONFIG_${sym}=y\$" .config; then
echo "ERROR: CONFIG_${sym}=y survived olddefconfig (decompressor guard tripped)"
exit 1
fi
done

if [ "${KERNEL_EXPERIMENT}" = "llvm-order-use" ]; then
run_logged "build" kernel_make -j${MAKE_JOBS} KALLSYMS_EXTRA_PASS=1
else
Expand Down
34 changes: 34 additions & 0 deletions configs/subsystem-budget.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Per-bucket vmlinux .text byte ceilings.
#
# Format: <bucket> <ceiling-bytes> [<noise-band-pct>]
#
# - Bucket names match scripts/subsystem-rollup.py output. Run a
# diagnostic build (KERNEL_DEBUG_INFO=reduced) and inspect
# profiles/kernel-pgo/none/subsystem-rollup.txt for the live names.
# - The noise band absorbs run-to-run variance from GCC LTO
# re-deciding what to inline when nothing semantic changed. Default
# is 2.0%. Start there, then tighten after observing a week of
# clean builds. <icf-merged> tends to be jitterier than real
# subsystems -- a wider band there is reasonable.
# - A breach is "actual > limit * (1 + band/100)". The total-bytes
# gate is the coarse safeguard; this layer answers WHICH bucket
# regressed.
#
# How to populate:
# 1. KERNEL_DEBUG_INFO=reduced ./build.sh linux bootwrapper
# 2. Read profiles/kernel-pgo/none/subsystem-rollup.txt for the
# observed sizes.
# 3. Pick ceilings 5-10% above each observed value -- enough room
# for legitimate growth without masking regressions.
#
# Example values (uncomment and tune to your build):
# kernel 260000 2.0
# mm 80000 2.0
# fs 20000 2.0
# arch/arm 120000 2.0
# drivers/tty 25000 2.0
# drivers/clocksource 10000 2.0
# lib 70000 2.0
# crypto 5000 2.0
# <icf-merged> 30000 5.0
# <compiler-partition> 5000 5.0
157 changes: 157 additions & 0 deletions scripts/check-subsystem-budget.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#!/usr/bin/env python3

# Diff vmlinux .text subsystem rollup against per-bucket byte budgets.
#
# The total-bytes regression gate is the coarse safeguard: it catches
# the image getting bigger overall. It cannot tell a 3% drop in
# drivers/ from a 3% growth in mm/ that cancels out. This script reads
# scripts/subsystem-rollup.py's table and compares each bucket against
# configs/subsystem-budget.txt with a per-bucket noise band -- LTO
# re-decides what to inline between rebuilds, so identical sources
# still produce small per-bucket fluctuations. Default band is +/- 2%;
# tighten after observing run-to-run variance over a week of clean
# builds.
#
# Exit codes:
# 0 -- all buckets within band, OR no budget rules active
# 1 -- one or more buckets exceed (limit * (1 + band/100))
# 2 -- missing/unreadable inputs (rollup or budget file)

import argparse
import pathlib
import sys

DEFAULT_BAND_PCT = 2.0


def read_budget(path):
rules = {}
for raw in path.read_text().splitlines():
line = raw.split("#", 1)[0].strip()
if not line:
continue
parts = line.split()
if len(parts) < 2:
print(
f"check-subsystem-budget: ignoring malformed rule "
f"in {path}: {raw!r}",
file=sys.stderr,
)
continue
bucket = parts[0]
try:
limit = int(parts[1])
except ValueError:
print(
f"check-subsystem-budget: non-integer limit in {path}: "
f"{raw!r}",
file=sys.stderr,
)
continue
try:
band = float(parts[2]) if len(parts) >= 3 else DEFAULT_BAND_PCT
except ValueError:
band = DEFAULT_BAND_PCT
rules[bucket] = (limit, band)
return rules


def read_rollup(path):
rows = {}
for raw in path.read_text().splitlines():
if not raw or raw.startswith("#"):
continue
# Tab-delimited. Bucket names contain dashes and angle brackets,
# so split only on tab; leading whitespace is reserved for header
# commentary that the # filter above already drops.
parts = raw.split("\t")
if len(parts) < 2:
continue
bucket = parts[0]
try:
rows[bucket] = int(parts[1])
except ValueError:
continue
return rows


def main(argv):
ap = argparse.ArgumentParser(
description="Compare subsystem rollup against per-bucket budgets."
)
ap.add_argument("--rollup", required=True, type=pathlib.Path)
ap.add_argument("--budget", required=True, type=pathlib.Path)
ap.add_argument(
"--output",
required=True,
type=pathlib.Path,
help="Where to write the human-readable status table.",
)
args = ap.parse_args(argv)

if not args.rollup.exists():
print(
f"check-subsystem-budget: rollup not found: {args.rollup}",
file=sys.stderr,
)
return 2
if not args.budget.exists():
print(
f"check-subsystem-budget: budget not found: {args.budget}",
file=sys.stderr,
)
return 2

budgets = read_budget(args.budget)
rollup = read_rollup(args.rollup)

if not budgets:
# Empty file is a deliberate state: the operator has staged the
# gate but not pinned ceilings yet (typical after the first
# diagnostic build). Emit a status note and succeed.
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(
"# subsystem budget check\n"
"# no active rules in budget file -- nothing to gate\n"
)
return 0

breaches = []
lines = [
f"# subsystem budget check (default band = +/- {DEFAULT_BAND_PCT}%)",
f"# rollup: {args.rollup}",
f"# budget: {args.budget}",
"# bucket\tactual\tlimit\tband_pct\tdelta_vs_limit\tstatus",
]
for bucket, (limit, band) in sorted(budgets.items()):
actual = rollup.get(bucket, 0)
delta = actual - limit
ceiling = int(limit * (1 + band / 100.0))
status = "BREACH" if actual > ceiling else "ok"
if status == "BREACH":
breaches.append((bucket, actual, limit, band, delta))
lines.append(
f"{bucket}\t{actual}\t{limit}\t{band:.1f}\t{delta:+d}\t{status}"
)

args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text("\n".join(lines) + "\n")

if breaches:
print(
f"check-subsystem-budget: {len(breaches)} bucket(s) breach "
"the budget (after noise band):",
file=sys.stderr,
)
for bucket, actual, limit, band, delta in breaches:
print(
f" {bucket}: {actual} > {limit} "
f"({delta:+d} bytes, band {band:.1f}%)",
file=sys.stderr,
)
return 1
return 0


if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
3 changes: 2 additions & 1 deletion scripts/collect-kernel-profile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fi
rm -f "${TRACE_LOG}" "${CONSOLE_LOG}" "${MANIFEST_LOG}" \
"${PROFILE_PREFIX}_ld_profile.txt" "${PROFILE_PREFIX}_summary.txt" \
"${PROFILE_PREFIX}_hits.txt" "${PROFILE_PREFIX}_concentration.txt" \
"${PROFILE_PREFIX}_syscalls.txt"
"${PROFILE_PREFIX}_syscalls.txt" "${PROFILE_PREFIX}_bootcost.txt"

# exec,in_asm: TB execution counts + disassembly for SVC site detection.
# Add "cpu" (QEMU_LOG=exec,cpu,in_asm) to enable R7-based syscall number
Expand Down Expand Up @@ -66,3 +66,4 @@ echo " ${PROFILE_PREFIX}_hits.txt"
echo " ${PROFILE_PREFIX}_concentration.txt"
echo " ${PROFILE_PREFIX}_summary.txt"
echo " ${PROFILE_PREFIX}_syscalls.txt"
echo " ${PROFILE_PREFIX}_bootcost.txt"
Loading
Loading