From fc5d883c7269c0182c99ee0962722cc22c749b88 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 23 Sep 2025 15:00:05 -0700 Subject: [PATCH 1/2] [AArch64] Improve host feature detection. SVE depends on a combination of host support and operating system support. Sometimes those don't line up with detected host CPU name; make sure SVE is disabled when it isn't available. Implement this for both Windows and Linux. (We don't have a codepath for other operating systems. If someone wants to implement this, it should be possible to adapt fmv code from compiler-rt.) While I'm here, also add support for detecting other Windows CPU features. For Windows, declare constants ourselves so the code builds on older SDKs; we also do this in compiler-rt. --- llvm/lib/TargetParser/Host.cpp | 64 ++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index a5bdc9dd38848..8754ef88edd97 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2260,20 +2260,80 @@ StringMap sys::getHostCPUFeatures() { uint32_t Sha2 = CAP_SHA1 | CAP_SHA2; Features["aes"] = (crypto & Aes) == Aes; Features["sha2"] = (crypto & Sha2) == Sha2; + + // SVE support is disabled in for cores which are identified as supporting + // SVE; disable SVE if we don't detect support at runtime. + if (!Features.contains("sve")) + Features["sve"] = false; #endif return Features; } #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64) || \ defined(__arm64ec__) || defined(_M_ARM64EC)) +#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 +#endif +#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44 +#endif +#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 +#endif +#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46 +#endif +#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 +#endif +#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50 +#endif +#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55 +#endif +#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56 +#endif +#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57 +#endif +#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58 +#endif +#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59 +#endif StringMap sys::getHostCPUFeatures() { StringMap Features; // If we're asking the OS at runtime, believe what the OS says - Features["neon"] = - IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE); Features["crc"] = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); + Features["lse"] = + IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE); + Features["dotprod"] = + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE); + Features["jscvt"] = + IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE); + Features["rcpc"] = + IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE); + Features["sve"] = + IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE); + Features["sve2"] = + IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE); + Features["sve-aes"] = + IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE); + Features["sve-sha3"] = + IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE); + Features["sve-sm4"] = + IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE); + Features["f32mm"] = + IsProcessorFeaturePresent(PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE); + Features["f64mm"] = + IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE); + Features["i8mm"] = + IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE); // Avoid inferring "crypto" means more than the traditional AES + SHA2 bool TradCrypto = From 2dc6d3eec0e86906f6152fbe15dd225836f8a17d Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 24 Sep 2025 15:35:42 -0700 Subject: [PATCH 2/2] Fix jscvt detection --- llvm/lib/TargetParser/Host.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 8754ef88edd97..b4b2817c6e3ee 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2314,7 +2314,7 @@ StringMap sys::getHostCPUFeatures() { IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE); Features["dotprod"] = IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE); - Features["jscvt"] = + Features["jsconv"] = IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE); Features["rcpc"] = IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE);