Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,24 @@ set(arm_or_thumb2_base_SOURCES
${GENERIC_SOURCES}
)

option(COMPILER_RT_ARM_OPTIMIZED_FP
"On 32-bit Arm, use optimized assembly implementations of FP arithmetic. Likely to increase code size, but be faster." ON)

if(COMPILER_RT_ARM_OPTIMIZED_FP)
set(assembly_files
arm/mulsf3.S
arm/divsf3.S)
set_source_files_properties(${assembly_files}
PROPERTIES COMPILE_OPTIONS "-Wa,-mimplicit-it=always")
set(arm_or_thumb2_base_SOURCES
${assembly_files}
arm/fnan2.c
arm/fnorm2.c
arm/funder.c
${arm_or_thumb2_base_SOURCES}
)
endif()

set(arm_sync_SOURCES
arm/sync_fetch_and_add_4.S
arm/sync_fetch_and_add_8.S
Expand Down Expand Up @@ -455,6 +473,16 @@ set(thumb1_base_SOURCES
${GENERIC_SOURCES}
)

if(COMPILER_RT_ARM_OPTIMIZED_FP)
set(thumb1_base_SOURCES
arm/thumb1/mulsf3.S
arm/fnan2.c
arm/fnorm2.c
arm/funder.c
${thumb1_base_SOURCES}
)
endif()

set(arm_EABI_RT_SOURCES
arm/aeabi_cdcmp.S
arm/aeabi_cdcmpeq_check_nan.c
Expand Down
608 changes: 608 additions & 0 deletions compiler-rt/lib/builtins/arm/divsf3.S

Large diffs are not rendered by default.

42 changes: 42 additions & 0 deletions compiler-rt/lib/builtins/arm/fnan2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//===-- fnan2.c - Handle single-precision NaN inputs to binary operation --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This helper function is available for use by single-precision float
// arithmetic implementations to handle propagating NaNs from the input
// operands to the output, in a way that matches Arm hardware FP.
//
// On input, a and b are floating-point numbers in IEEE 754 encoding, and at
// least one of them must be a NaN. The return value is the correct output NaN.
//
// A signalling NaN in the input (with bit 22 clear) takes priority over any
// quiet NaN, and is adjusted on return by setting bit 22 to make it quiet. If
// both inputs are the same type of NaN then the first input takes priority:
// the input a is used instead of b.
//
//===----------------------------------------------------------------------===//

#include <stdint.h>

uint32_t __compiler_rt_fnan2(uint32_t a, uint32_t b) {
// Make shifted-left copies of a and b to discard the sign bit. Then add 1 at
// the bit position where the quiet vs signalling bit ended up. This squashes
// all the signalling NaNs to the top of the range of 32-bit values, from
// 0xff800001 to 0xffffffff inclusive; meanwhile, all the quiet NaN values
// wrap round to the bottom, from 0 to 0x007fffff inclusive. So we can detect
// a signalling NaN by asking if it's greater than 0xff800000, and a quiet
// one by asking if it's less than 0x00800000.
uint32_t aadj = (a << 1) + 0x00800000;
uint32_t badj = (b << 1) + 0x00800000;
if (aadj > 0xff800000) // a is a signalling NaN?
return a | 0x00400000; // if so, return it with the quiet bit set
if (badj > 0xff800000) // b is a signalling NaN?
return b | 0x00400000; // if so, return it with the quiet bit set
if (aadj < 0x00800000) // a is a quiet NaN?
return a; // if so, return it
return b; // otherwise we expect b must be a quiet NaN
}
62 changes: 62 additions & 0 deletions compiler-rt/lib/builtins/arm/fnorm2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//===-- fnorm2.c - Handle single-precision denormal inputs to binary op ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This helper function is available for use by single-precision float
// arithmetic implementations, to handle denormal inputs on entry by
// renormalizing the mantissa and modifying the exponent to match.
//
//===----------------------------------------------------------------------===//

#include <stdint.h>

// Structure containing the function's inputs and outputs.
//
// On entry: a, b are two input floating-point numbers, still in IEEE 754
// encoding. expa and expb are the 8-bit exponents of those numbers, extracted
// and shifted down to the low 8 bits of the word, with no other change.
// Neither value should be zero, or have the maximum exponent (indicating an
// infinity or NaN).
//
// On exit: each of a and b contains the mantissa of the input value, with the
// leading 1 bit made explicit, and shifted up to the top of the word. If expa
// was zero (indicating that a was denormal) then it is now represented as a
// normalized number with an out-of-range exponent (zero or negative). The same
// applies to expb and b.
struct fnorm2 {
uint32_t a, b, expa, expb;
};

void __compiler_rt_fnorm2(struct fnorm2 *values) {
// Shift the mantissas of a and b to the right place to follow a leading 1 in
// the top bit, if there is one.
values->a <<= 8;
values->b <<= 8;

// Test if a is denormal.
if (values->expa == 0) {
// If so, decide how much further up to shift its mantissa, and adjust its
// exponent to match. This brings the leading 1 of the denormal mantissa to
// the top of values->a.
uint32_t shift = __builtin_clz(values->a);
values->a <<= shift;
values->expa = 1 - shift;
} else {
// Otherwise, leave the mantissa of a in its current position, and OR in
// the explicit leading 1.
values->a |= 0x80000000;
}

// Do the same operation on b.
if (values->expb == 0) {
uint32_t shift = __builtin_clz(values->b);
values->b <<= shift;
values->expb = 1 - shift;
} else {
values->b |= 0x80000000;
}
}
78 changes: 78 additions & 0 deletions compiler-rt/lib/builtins/arm/funder.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//===-- funder.c - Handle single-precision floating-point underflow -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This helper function is available for use by single-precision float
// arithmetic implementations to handle underflowed output values, if they were
// computed in the form of a normalized mantissa and an out-of-range exponent.
//
// On input: x should be a complete IEEE 754 floating-point value representing
// the desired output scaled up by 2^192 (the same value that would have been
// passed to an underflow trap handler in IEEE 754:1985).
//
// This isn't enough information to re-round to the correct output denormal
// without also knowing whether x itself has already been rounded, and which
// way. 'errsign' gives this information, by indicating the sign of the value
// (true result - x). That is, if errsign > 0 it means the true value was
// larger (x was rounded down); if errsign < 0 then x was rounded up; if
// errsign == 0 then x represents the _exact_ desired output value.
//
//===----------------------------------------------------------------------===//

#include <stdint.h>

#define SIGNBIT 0x80000000
#define MANTSIZE 23
#define BIAS 0xc0

uint32_t __compiler_rt_funder(uint32_t x, uint32_t errsign) {
uint32_t sign = x & SIGNBIT;
uint32_t exponent = (x << 1) >> 24;

// Rule out exponents so small (or large!) that no denormalisation
// is needed.
if (exponent > BIAS) {
// Exponent 0xc1 or above means a normalised number got here by
// mistake, so we just remove the 0xc0 exponent bias and go
// straight home.
return x - (BIAS << MANTSIZE);
}
uint32_t bits_lost = BIAS + 1 - exponent;
if (bits_lost > MANTSIZE + 1) {
// The implicit leading 1 of the intermediate value's mantissa is
// below the lowest mantissa bit of a denormal by at least 2 bits.
// Round down to 0 unconditionally.
return sign;
}

// Make the full mantissa (with leading bit) at the top of the word.
uint32_t mantissa = 0x80000000 | (x << 8);
// Adjust by 1 depending on the sign of the error.
mantissa -= errsign >> 31;
mantissa += (-errsign) >> 31;

// Shift down to the output position, keeping the bits shifted off.
uint32_t outmant, shifted_off;
if (bits_lost == MANTSIZE + 1) {
// Special case for the exponent where we have to shift the whole
// of 'mantissa' off the bottom of the word.
outmant = 0;
shifted_off = mantissa;
} else {
outmant = mantissa >> (8 + bits_lost);
shifted_off = mantissa << (32 - (8 + bits_lost));
}

// Re-round.
if (shifted_off >> 31) {
outmant++;
if (!(shifted_off << 1))
outmant &= ~1; // halfway case: round to even
}

return sign | outmant;
}
Loading