From c4bbb041b59eba6eebee66b44cd26519fa936f83 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 9 Nov 2025 12:33:29 -0800 Subject: [PATCH 1/3] [CodeGen][KCFI] Emit signed comment for .set directive value When emitting the assembly .set directive, KCFI needs to use getZExtValue(). However, this means that FileCheck pattern matching can't match between the .set directive and the IR when the high bit of a 32-bit value is set. We had gotten lucky with the existing tests happening to just not have had the high bit set. The coming hash change will expose this, though. LLVM IR's default printing behavior uses APInt::operator<<, which calls APInt::print(OS, /*isSigned=*/true). This means KCFI operand bundles in call instructions print as signed (e.g. [ "kcfi"(i32 -1208803271) ]), and KCFI type metadata prints as signed (e.g. !3 = !{i32 -1208803271}). Changing the IR to print unsigned i32 values would impact hundreds of existing tests, so it is best to just leave it be. Update the KCFI .set direct to use getSExtValue() in a comment so that we can both build correctly and use FileCheck with pattern matching in tests. Signed-off-by: Kees Cook --- clang/lib/CodeGen/CodeGenModule.cpp | 3 ++- clang/test/CodeGen/cfi-salt.c | 4 ++-- clang/test/CodeGen/kcfi.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index f303550c64292..d91efd1ba76ed 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3189,7 +3189,8 @@ void CodeGenModule::finalizeKCFITypes() { continue; std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" + - Name + ", " + Twine(Type->getZExtValue()) + "\n") + Name + ", " + Twine(Type->getZExtValue()) + " # " + + Twine(Type->getSExtValue()) + "\n") .str(); M.appendModuleInlineAsm(Asm); } diff --git a/clang/test/CodeGen/cfi-salt.c b/clang/test/CodeGen/cfi-salt.c index 7ba1e2fc14daa..8363236869013 100644 --- a/clang/test/CodeGen/cfi-salt.c +++ b/clang/test/CodeGen/cfi-salt.c @@ -27,9 +27,9 @@ typedef unsigned int (* __cfi_salt ufn_salt_t)(void); /// Must emit __kcfi_typeid symbols for address-taken function declarations // CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]" -// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,LOW_SODIUM_HASH:]]" +// CHECK: module asm ".set __kcfi_typeid_[[F4]], {{[0-9]+}} # [[#%d,LOW_SODIUM_HASH:]]" // CHECK: module asm ".weak __kcfi_typeid_[[F4_SALT:[a-zA-Z0-9_]+]]" -// CHECK: module asm ".set __kcfi_typeid_[[F4_SALT]], [[#%d,ASM_SALTY_HASH:]]" +// CHECK: module asm ".set __kcfi_typeid_[[F4_SALT]], {{[0-9]+}} # [[#%d,ASM_SALTY_HASH:]]" /// Must not __kcfi_typeid symbols for non-address-taken declarations // CHECK-NOT: module asm ".weak __kcfi_typeid_f6" diff --git a/clang/test/CodeGen/kcfi.c b/clang/test/CodeGen/kcfi.c index 622843cedba50..b2856b5149be9 100644 --- a/clang/test/CodeGen/kcfi.c +++ b/clang/test/CodeGen/kcfi.c @@ -7,7 +7,7 @@ /// Must emit __kcfi_typeid symbols for address-taken function declarations // CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]" -// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,HASH:]]" +// CHECK: module asm ".set __kcfi_typeid_[[F4]], {{[0-9]+}} # [[#%d,HASH:]]" /// Must not __kcfi_typeid symbols for non-address-taken declarations // CHECK-NOT: module asm ".weak __kcfi_typeid_{{f6|_Z2f6v}}" From a6c1fda53ebea596e52a5e8dfd4a3d0165944519 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 9 Nov 2025 13:47:35 -0800 Subject: [PATCH 2/3] [CodeGen][KCFI] Introduce llvm::getKCFITypeID to wrap hash implementation KCFI generates hashes in two places. Instead of exposing the hash implementation in both places, introduce a helper that wraps the specific hash implementation in a single place. Signed-off-by: Kees Cook --- clang/lib/CodeGen/CodeGenModule.cpp | 5 ++-- llvm/include/llvm/Support/Hash.h | 26 ++++++++++++++++++++ llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/Hash.cpp | 20 +++++++++++++++ llvm/lib/Transforms/Instrumentation/KCFI.cpp | 1 + llvm/lib/Transforms/Utils/ModuleUtils.cpp | 12 ++++----- 6 files changed, 56 insertions(+), 9 deletions(-) create mode 100644 llvm/include/llvm/Support/Hash.h create mode 100644 llvm/lib/Support/Hash.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d91efd1ba76ed..30ff5c11afcf9 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -67,10 +67,10 @@ #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" -#include "llvm/Support/xxhash.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/Triple.h" #include "llvm/TargetParser/X86TargetParser.h" +#include "llvm/Transforms/Instrumentation/KCFI.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include #include @@ -2434,8 +2434,7 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T, StringRef Salt) { if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) Out << ".generalized"; - return llvm::ConstantInt::get(Int32Ty, - static_cast(llvm::xxHash64(OutName))); + return llvm::ConstantInt::get(Int32Ty, llvm::getKCFITypeID(OutName)); } void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, diff --git a/llvm/include/llvm/Support/Hash.h b/llvm/include/llvm/Support/Hash.h new file mode 100644 index 0000000000000..f98318fbb5340 --- /dev/null +++ b/llvm/include/llvm/Support/Hash.h @@ -0,0 +1,26 @@ +//===- llvm/Support/Hash.h - Hash functions --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides hash functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_HASH_H +#define LLVM_SUPPORT_HASH_H + +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { + +/// Compute KCFI type ID from mangled type name using FNV-1a hash. +uint32_t getKCFITypeID(StringRef MangledTypeName); + +} // end namespace llvm + +#endif // LLVM_SUPPORT_HASH_H diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 671a5fe941cef..9f40c1ed1a10b 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -202,6 +202,7 @@ add_llvm_component_library(LLVMSupport FormatVariadic.cpp GlobPattern.cpp GraphWriter.cpp + Hash.cpp HexagonAttributeParser.cpp HexagonAttributes.cpp InitLLVM.cpp diff --git a/llvm/lib/Support/Hash.cpp b/llvm/lib/Support/Hash.cpp new file mode 100644 index 0000000000000..40223b55144b8 --- /dev/null +++ b/llvm/lib/Support/Hash.cpp @@ -0,0 +1,20 @@ +//===- Hash.cpp - Hash functions ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements hash functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Hash.h" +#include "llvm/Support/xxhash.h" + +using namespace llvm; + +uint32_t llvm::getKCFITypeID(StringRef MangledTypeName) { + return static_cast(xxHash64(MangledTypeName)); +} diff --git a/llvm/lib/Transforms/Instrumentation/KCFI.cpp b/llvm/lib/Transforms/Instrumentation/KCFI.cpp index f4cb4e2d1c9e1..f06b1d3157939 100644 --- a/llvm/lib/Transforms/Instrumentation/KCFI.cpp +++ b/llvm/lib/Transforms/Instrumentation/KCFI.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Support/xxhash.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 596849ecab742..4b496dbae5a61 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -21,7 +21,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/xxhash.h" +#include "llvm/Transforms/Instrumentation/KCFI.h" using namespace llvm; @@ -208,10 +208,10 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { std::string Type = MangledType.str(); if (M.getModuleFlag("cfi-normalize-integers")) Type += ".normalized"; - F.setMetadata(LLVMContext::MD_kcfi_type, - MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( - Type::getInt32Ty(Ctx), - static_cast(xxHash64(Type)))))); + F.setMetadata( + LLVMContext::MD_kcfi_type, + MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(Ctx), getKCFITypeID(Type))))); // If the module was compiled with -fpatchable-function-entry, ensure // we use the same patchable-function-prefix. if (auto *MD = mdconst::extract_or_null( From 7ee5e430e955b6fd67e709a68fec170a9e159d46 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 13 Nov 2025 17:24:17 -0800 Subject: [PATCH 3/3] [CodeGen][KCFI] Provide alternative KCFI hash In order to transition between KCFI hash, we need to be able to specify them. Add the Clang option -fsanitize-kcfi-hash= and a IR module option "kcfi-hash" that can choose between xxHash64 and FNV-1a. Default to xxHash64 to stay backward compatible, as we'll need to also update rustc to take a new option to change the hash to FNV-1a for interop with the coming GCC KCFI. --- clang/include/clang/Basic/CodeGenOptions.h | 4 +++ clang/include/clang/Options/Options.td | 10 +++++++ clang/lib/CodeGen/CodeGenModule.cpp | 10 ++++++- clang/test/CodeGen/kcfi-hash.c | 9 ++++++ llvm/include/llvm/Support/Hash.h | 16 ++++++++-- llvm/lib/Support/Hash.cpp | 35 ++++++++++++++++++++-- llvm/lib/Transforms/Utils/ModuleUtils.cpp | 15 +++++++--- 7 files changed, 90 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/kcfi-hash.c diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 6c445253d518b..c60ca507ff917 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -21,6 +21,7 @@ #include "llvm/Frontend/Debug/Options.h" #include "llvm/Frontend/Driver/CodeGenOptions.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Hash.h" #include "llvm/Support/Regex.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h" @@ -514,6 +515,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// binary metadata pass should not be instrumented. std::vector SanitizeMetadataIgnorelistFiles; + /// Hash algorithm to use for KCFI type IDs. + llvm::KCFIHashAlgorithm SanitizeKcfiHash; + /// Name of the stack usage file (i.e., .su file) if user passes /// -fstack-usage. If empty, it can be implied that -fstack-usage is not /// passed on the command line. diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 2f7434d8afe11..647ee5912d42f 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -2690,6 +2690,16 @@ def fsanitize_kcfi_arity : Flag<["-"], "fsanitize-kcfi-arity">, Group, HelpText<"Embed function arity information into the KCFI patchable function prefix">, MarshallingInfoFlag>; +def fsanitize_kcfi_hash_EQ + : Joined<["-"], "fsanitize-kcfi-hash=">, + HelpText<"Select hash algorithm for KCFI type IDs (xxHash64, FNV-1a)">, + Visibility<[ClangOption, CC1Option]>, + Values<"xxHash64,FNV-1a">, + NormalizedValuesScope<"llvm">, + NormalizedValues<["KCFIHashAlgorithm::xxHash64", + "KCFIHashAlgorithm::FNV1a"]>, + MarshallingInfoEnum, + "KCFIHashAlgorithm::xxHash64">; defm sanitize_stats : BoolOption<"f", "sanitize-stats", CodeGenOpts<"SanitizeStats">, DefaultFalse, PosFlag, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 30ff5c11afcf9..7b48d8a22b79e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -66,6 +66,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Hash.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/Triple.h" @@ -1272,6 +1273,12 @@ void CodeGenModule::Release() { CodeGenOpts.PatchableFunctionEntryOffset); if (CodeGenOpts.SanitizeKcfiArity) getModule().addModuleFlag(llvm::Module::Override, "kcfi-arity", 1); + // Store the hash algorithm choice for use in LLVM passes + getModule().addModuleFlag( + llvm::Module::Override, "kcfi-hash", + llvm::MDString::get( + getLLVMContext(), + llvm::stringifyKCFIHashAlgorithm(CodeGenOpts.SanitizeKcfiHash))); } if (CodeGenOpts.CFProtectionReturn && @@ -2434,7 +2441,8 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T, StringRef Salt) { if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers) Out << ".generalized"; - return llvm::ConstantInt::get(Int32Ty, llvm::getKCFITypeID(OutName)); + return llvm::ConstantInt::get( + Int32Ty, llvm::getKCFITypeID(OutName, getCodeGenOpts().SanitizeKcfiHash)); } void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, diff --git a/clang/test/CodeGen/kcfi-hash.c b/clang/test/CodeGen/kcfi-hash.c new file mode 100644 index 0000000000000..636d265feb9b4 --- /dev/null +++ b/clang/test/CodeGen/kcfi-hash.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -o - %s | FileCheck --check-prefix=DEFAULT %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -fsanitize-kcfi-hash=xxHash64 -o - %s | FileCheck --check-prefix=XXHASH %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi -fsanitize-kcfi-hash=FNV-1a -o - %s | FileCheck --check-prefix=FNV %s + +void foo(void) {} + +// DEFAULT: ![[#]] = !{i32 4, !"kcfi-hash", !"xxHash64"} +// XXHASH: ![[#]] = !{i32 4, !"kcfi-hash", !"xxHash64"} +// FNV: ![[#]] = !{i32 4, !"kcfi-hash", !"FNV-1a"} diff --git a/llvm/include/llvm/Support/Hash.h b/llvm/include/llvm/Support/Hash.h index f98318fbb5340..303acde0551ad 100644 --- a/llvm/include/llvm/Support/Hash.h +++ b/llvm/include/llvm/Support/Hash.h @@ -18,8 +18,20 @@ namespace llvm { -/// Compute KCFI type ID from mangled type name using FNV-1a hash. -uint32_t getKCFITypeID(StringRef MangledTypeName); +enum class KCFIHashAlgorithm { xxHash64, FNV1a }; + +/// Parse a KCFI hash algorithm name. +/// Returns xxHash64 if the name is not recognized. +KCFIHashAlgorithm parseKCFIHashAlgorithm(StringRef Name); + +/// Convert a KCFI hash algorithm enum to its string representation. +StringRef stringifyKCFIHashAlgorithm(KCFIHashAlgorithm Algorithm); + +/// Compute KCFI type ID from mangled type name. +/// The algorithm can be xxHash64 or FNV-1a. +uint32_t +getKCFITypeID(StringRef MangledTypeName, + KCFIHashAlgorithm Algorithm = KCFIHashAlgorithm::xxHash64); } // end namespace llvm diff --git a/llvm/lib/Support/Hash.cpp b/llvm/lib/Support/Hash.cpp index 40223b55144b8..38befcca86b15 100644 --- a/llvm/lib/Support/Hash.cpp +++ b/llvm/lib/Support/Hash.cpp @@ -15,6 +15,37 @@ using namespace llvm; -uint32_t llvm::getKCFITypeID(StringRef MangledTypeName) { - return static_cast(xxHash64(MangledTypeName)); +KCFIHashAlgorithm llvm::parseKCFIHashAlgorithm(StringRef Name) { + if (Name == "FNV-1a") + return KCFIHashAlgorithm::FNV1a; + // Default to xxHash64 for backward compatibility + return KCFIHashAlgorithm::xxHash64; +} + +StringRef llvm::stringifyKCFIHashAlgorithm(KCFIHashAlgorithm Algorithm) { + switch (Algorithm) { + case KCFIHashAlgorithm::xxHash64: + return "xxHash64"; + case KCFIHashAlgorithm::FNV1a: + return "FNV-1a"; + } + llvm_unreachable("Unknown KCFI hash algorithm"); +} + +uint32_t llvm::getKCFITypeID(StringRef MangledTypeName, + KCFIHashAlgorithm Algorithm) { + switch (Algorithm) { + case KCFIHashAlgorithm::xxHash64: + // Use lower 32 bits of xxHash64 + return static_cast(xxHash64(MangledTypeName)); + case KCFIHashAlgorithm::FNV1a: + // FNV-1a hash (32-bit) + uint32_t Hash = 2166136261u; // FNV offset basis + for (unsigned char C : MangledTypeName) { + Hash ^= C; + Hash *= 16777619u; // FNV prime + } + return Hash; + } + llvm_unreachable("Unknown KCFI hash algorithm"); } diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 4b496dbae5a61..30d7831f06a2b 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Hash.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation/KCFI.h" @@ -208,10 +209,16 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { std::string Type = MangledType.str(); if (M.getModuleFlag("cfi-normalize-integers")) Type += ".normalized"; - F.setMetadata( - LLVMContext::MD_kcfi_type, - MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( - Type::getInt32Ty(Ctx), getKCFITypeID(Type))))); + + // Determine which hash algorithm to use + auto *MD = dyn_cast_or_null(M.getModuleFlag("kcfi-hash")); + KCFIHashAlgorithm Algorithm = + parseKCFIHashAlgorithm(MD ? MD->getString() : ""); + + F.setMetadata(LLVMContext::MD_kcfi_type, + MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(Ctx), + getKCFITypeID(Type, Algorithm))))); // If the module was compiled with -fpatchable-function-entry, ensure // we use the same patchable-function-prefix. if (auto *MD = mdconst::extract_or_null(