From 1d6b7cbb41b9dcb71e45da40e72baefc1a366c81 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Mon, 10 Nov 2025 16:33:19 +0000 Subject: [PATCH 1/3] [libc] Add support for MVE to Arm startup code In order to have MVE support, the same bits of the CPACR register that enable the floating-point extension must be set. --- libc/startup/baremetal/arm/start.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp index 4740067722022..b22529f214216 100644 --- a/libc/startup/baremetal/arm/start.cpp +++ b/libc/startup/baremetal/arm/start.cpp @@ -131,20 +131,23 @@ namespace LIBC_NAMESPACE_DECL { __arm_wsr("CPSR_c", 0x13); // SVC #endif -#ifdef __ARM_FP -// Enable FPU -#if __ARM_ARCH_PROFILE == 'M' +#if __ARM_ARCH_PROFILE == 'M' && \ + (defined(__ARM_FP) || defined(__ARM_FEATURE_MVE)) + // Enable FPU and MVE. They can't be enabled independently: the two are + // governed by the same bits in CPACR. // Based on // https://developer.arm.com/documentation/dui0646/c/Cortex-M7-Peripherals/Floating-Point-Unit/Enabling-the-FPU - // Set CPACR cp10 and cp11 + // Set CPACR cp10 and cp11. auto cpacr = (volatile uint32_t *const)0xE000ED88; *cpacr |= (0xF << 20); __dsb(0xF); __isb(0xF); -#elif __ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R' +#elif (__ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R') && \ + defined(__ARM_FP) + // Enable FPU. // Based on // https://developer.arm.com/documentation/dui0472/m/Compiler-Coding-Practices/Enabling-NEON-and-FPU-for-bare-metal - // Set CPACR cp10 and cp11 + // Set CPACR cp10 and cp11. uint32_t cpacr = __arm_rsr("p15:0:c1:c0:2"); cpacr |= (0xF << 20); __arm_wsr("p15:0:c1:c0:2", cpacr); @@ -154,7 +157,6 @@ namespace LIBC_NAMESPACE_DECL { __asm__ __volatile__("vmrs %0, FPEXC" : "=r"(fpexc) : :); fpexc |= (1 << 30); __asm__ __volatile__("vmsr FPEXC, %0" : : "r"(fpexc) :); -#endif #endif // Perform the equivalent of scatterloading From 77b94bda4ab9f27fa99e73a1af5790866045a59e Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Tue, 11 Nov 2025 16:50:39 +0000 Subject: [PATCH 2/3] Addressing comments --- libc/startup/baremetal/arm/start.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp index b22529f214216..f03103ce7c7ee 100644 --- a/libc/startup/baremetal/arm/start.cpp +++ b/libc/startup/baremetal/arm/start.cpp @@ -138,10 +138,18 @@ namespace LIBC_NAMESPACE_DECL { // Based on // https://developer.arm.com/documentation/dui0646/c/Cortex-M7-Peripherals/Floating-Point-Unit/Enabling-the-FPU // Set CPACR cp10 and cp11. - auto cpacr = (volatile uint32_t *const)0xE000ED88; + auto cpacr = reinterpret_cast(0xE000ED88); *cpacr |= (0xF << 20); __dsb(0xF); __isb(0xF); +#if defined(__ARM_FEATURE_MVE) + // Set FPSCR's LTPSIZE field to 4 to disable low-overhead-loop tail + // predication. + uint32_t fpscr; + __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(fpscr) : :); + fpscr |= (0x4 << 16); + __asm__ __volatile__("vmsr FPSCR, %0" : : "r"(fpscr) :); +#endif #elif (__ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R') && \ defined(__ARM_FP) // Enable FPU. @@ -155,7 +163,7 @@ namespace LIBC_NAMESPACE_DECL { // Set FPEXC.EN uint32_t fpexc; __asm__ __volatile__("vmrs %0, FPEXC" : "=r"(fpexc) : :); - fpexc |= (1 << 30); + fpexc |= (0x1 << 30); __asm__ __volatile__("vmsr FPEXC, %0" : : "r"(fpexc) :); #endif From 2671cf337ae1ce9e51a74ae030443c1c59f151d9 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 12 Nov 2025 09:38:52 +0000 Subject: [PATCH 3/3] Address comments (2) --- libc/startup/baremetal/arm/start.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp index f03103ce7c7ee..db89828a0b45e 100644 --- a/libc/startup/baremetal/arm/start.cpp +++ b/libc/startup/baremetal/arm/start.cpp @@ -143,8 +143,7 @@ namespace LIBC_NAMESPACE_DECL { __dsb(0xF); __isb(0xF); #if defined(__ARM_FEATURE_MVE) - // Set FPSCR's LTPSIZE field to 4 to disable low-overhead-loop tail - // predication. + // Initialize low-overhead-loop tail predication to its neutral state uint32_t fpscr; __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(fpscr) : :); fpscr |= (0x4 << 16);