llvm · statham-arm · Oct 1, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -422,6 +422,24 @@ set(arm_or_thumb2_base_SOURCES
   ${GENERIC_SOURCES}
 )
 
+option(COMPILER_RT_ARM_OPTIMIZED_FP
+  "On 32-bit Arm, use optimized assembly implementations of FP arithmetic. Likely to increase code size, but be faster." ON)
+
+if(COMPILER_RT_ARM_OPTIMIZED_FP)
+  set(assembly_files
+    arm/mulsf3.S
+    arm/divsf3.S)
+  set_source_files_properties(${assembly_files}
+    PROPERTIES COMPILE_OPTIONS "-Wa,-mimplicit-it=always")
+  set(arm_or_thumb2_base_SOURCES
+    ${assembly_files}
+    arm/fnan2.c
+    arm/fnorm2.c
+    arm/funder.c
+    ${arm_or_thumb2_base_SOURCES}
+  )
+endif()
+
 set(arm_sync_SOURCES
   arm/sync_fetch_and_add_4.S
   arm/sync_fetch_and_add_8.S
@@ -455,6 +473,16 @@ set(thumb1_base_SOURCES
   ${GENERIC_SOURCES}
 )
 
+if(COMPILER_RT_ARM_OPTIMIZED_FP)
+  set(thumb1_base_SOURCES
+    arm/thumb1/mulsf3.S
+    arm/fnan2.c
+    arm/fnorm2.c
+    arm/funder.c
+    ${thumb1_base_SOURCES}
+  )
+endif()
+
 set(arm_EABI_RT_SOURCES
   arm/aeabi_cdcmp.S
   arm/aeabi_cdcmpeq_check_nan.c

diff --git a/compiler-rt/lib/builtins/arm/divsf3.S b/compiler-rt/lib/builtins/arm/divsf3.S
diff --git a/compiler-rt/lib/builtins/arm/fnan2.c b/compiler-rt/lib/builtins/arm/fnan2.c
@@ -0,0 +1,42 @@
+//===-- fnan2.c - Handle single-precision NaN inputs to binary operation --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This helper function is available for use by single-precision float
+// arithmetic implementations to handle propagating NaNs from the input
+// operands to the output, in a way that matches Arm hardware FP.
+//
+// On input, a and b are floating-point numbers in IEEE 754 encoding, and at
+// least one of them must be a NaN. The return value is the correct output NaN.
+//
+// A signalling NaN in the input (with bit 22 clear) takes priority over any
+// quiet NaN, and is adjusted on return by setting bit 22 to make it quiet. If
+// both inputs are the same type of NaN then the first input takes priority:
+// the input a is used instead of b.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+uint32_t __compiler_rt_fnan2(uint32_t a, uint32_t b) {
+  // Make shifted-left copies of a and b to discard the sign bit. Then add 1 at
+  // the bit position where the quiet vs signalling bit ended up. This squashes
+  // all the signalling NaNs to the top of the range of 32-bit values, from
+  // 0xff800001 to 0xffffffff inclusive; meanwhile, all the quiet NaN values
+  // wrap round to the bottom, from 0 to 0x007fffff inclusive. So we can detect
+  // a signalling NaN by asking if it's greater than 0xff800000, and a quiet
+  // one by asking if it's less than 0x00800000.
+  uint32_t aadj = (a << 1) + 0x00800000;
+  uint32_t badj = (b << 1) + 0x00800000;
+  if (aadj > 0xff800000)   // a is a signalling NaN?
+    return a | 0x00400000; //   if so, return it with the quiet bit set
+  if (badj > 0xff800000)   // b is a signalling NaN?
+    return b | 0x00400000; //   if so, return it with the quiet bit set
+  if (aadj < 0x00800000)   // a is a quiet NaN?
+    return a;              // if so, return it
+  return b;                // otherwise we expect b must be a quiet NaN
+}
diff --git a/compiler-rt/lib/builtins/arm/fnorm2.c b/compiler-rt/lib/builtins/arm/fnorm2.c
@@ -0,0 +1,62 @@
+//===-- fnorm2.c - Handle single-precision denormal inputs to binary op ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This helper function is available for use by single-precision float
+// arithmetic implementations, to handle denormal inputs on entry by
+// renormalizing the mantissa and modifying the exponent to match.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+// Structure containing the function's inputs and outputs.
+//
+// On entry: a, b are two input floating-point numbers, still in IEEE 754
+// encoding. expa and expb are the 8-bit exponents of those numbers, extracted
+// and shifted down to the low 8 bits of the word, with no other change.
+// Neither value should be zero, or have the maximum exponent (indicating an
+// infinity or NaN).
+//
+// On exit: each of a and b contains the mantissa of the input value, with the
+// leading 1 bit made explicit, and shifted up to the top of the word. If expa
+// was zero (indicating that a was denormal) then it is now represented as a
+// normalized number with an out-of-range exponent (zero or negative). The same
+// applies to expb and b.
+struct fnorm2 {
+  uint32_t a, b, expa, expb;
+};
+
+void __compiler_rt_fnorm2(struct fnorm2 *values) {
+  // Shift the mantissas of a and b to the right place to follow a leading 1 in
+  // the top bit, if there is one.
+  values->a <<= 8;
+  values->b <<= 8;
+
+  // Test if a is denormal.
+  if (values->expa == 0) {
+    // If so, decide how much further up to shift its mantissa, and adjust its
+    // exponent to match. This brings the leading 1 of the denormal mantissa to
+    // the top of values->a.
+    uint32_t shift = __builtin_clz(values->a);
+    values->a <<= shift;
+    values->expa = 1 - shift;
+  } else {
+    // Otherwise, leave the mantissa of a in its current position, and OR in
+    // the explicit leading 1.
+    values->a |= 0x80000000;
+  }
+
+  // Do the same operation on b.
+  if (values->expb == 0) {
+    uint32_t shift = __builtin_clz(values->b);
+    values->b <<= shift;
+    values->expb = 1 - shift;
+  } else {
+    values->b |= 0x80000000;
+  }
+}
diff --git a/compiler-rt/lib/builtins/arm/funder.c b/compiler-rt/lib/builtins/arm/funder.c
@@ -0,0 +1,78 @@
+//===-- funder.c - Handle single-precision floating-point underflow -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This helper function is available for use by single-precision float
+// arithmetic implementations to handle underflowed output values, if they were
+// computed in the form of a normalized mantissa and an out-of-range exponent.
+//
+// On input: x should be a complete IEEE 754 floating-point value representing
+// the desired output scaled up by 2^192 (the same value that would have been
+// passed to an underflow trap handler in IEEE 754:1985).
+//
+// This isn't enough information to re-round to the correct output denormal
+// without also knowing whether x itself has already been rounded, and which
+// way. 'errsign' gives this information, by indicating the sign of the value
+// (true result - x). That is, if errsign > 0 it means the true value was
+// larger (x was rounded down); if errsign < 0 then x was rounded up; if
+// errsign == 0 then x represents the _exact_ desired output value.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#define SIGNBIT 0x80000000
+#define MANTSIZE 23
+#define BIAS 0xc0
+
+uint32_t __compiler_rt_funder(uint32_t x, uint32_t errsign) {
+  uint32_t sign = x & SIGNBIT;
+  uint32_t exponent = (x << 1) >> 24;
+
+  // Rule out exponents so small (or large!) that no denormalisation
+  // is needed.
+  if (exponent > BIAS) {
+    // Exponent 0xc1 or above means a normalised number got here by
+    // mistake, so we just remove the 0xc0 exponent bias and go
+    // straight home.
+    return x - (BIAS << MANTSIZE);
+  }
+  uint32_t bits_lost = BIAS + 1 - exponent;
+  if (bits_lost > MANTSIZE + 1) {
+    // The implicit leading 1 of the intermediate value's mantissa is
+    // below the lowest mantissa bit of a denormal by at least 2 bits.
+    // Round down to 0 unconditionally.
+    return sign;
+  }
+
+  // Make the full mantissa (with leading bit) at the top of the word.
+  uint32_t mantissa = 0x80000000 | (x << 8);
+  // Adjust by 1 depending on the sign of the error.
+  mantissa -= errsign >> 31;
+  mantissa += (-errsign) >> 31;
+
+  // Shift down to the output position, keeping the bits shifted off.
+  uint32_t outmant, shifted_off;
+  if (bits_lost == MANTSIZE + 1) {
+    // Special case for the exponent where we have to shift the whole
+    // of 'mantissa' off the bottom of the word.
+    outmant = 0;
+    shifted_off = mantissa;
+  } else {
+    outmant = mantissa >> (8 + bits_lost);
+    shifted_off = mantissa << (32 - (8 + bits_lost));
+  }
+
+  // Re-round.
+  if (shifted_off >> 31) {
+    outmant++;
+    if (!(shifted_off << 1))
+      outmant &= ~1; // halfway case: round to even
+  }
+
+  return sign | outmant;
+}