From 4b2521473b9a3cd5b65a0f45f8bf8594f11de28f Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 13 Nov 2025 16:51:30 +0000 Subject: [PATCH] Revert "[compiler-rt][ARM] Optimized mulsf3 and divsf3 (#161546)" This reverts commit f7e652127772e9390ecd1fee9504c07435a9bb87. --- .../cmake/Modules/CheckAssemblerFlag.cmake | 38 -- compiler-rt/lib/builtins/CMakeLists.txt | 45 -- compiler-rt/lib/builtins/arm/divsf3.S | 608 ----------------- compiler-rt/lib/builtins/arm/fnan2.c | 42 -- compiler-rt/lib/builtins/arm/fnorm2.c | 62 -- compiler-rt/lib/builtins/arm/funder.c | 78 --- compiler-rt/lib/builtins/arm/mulsf3.S | 309 --------- compiler-rt/lib/builtins/arm/thumb1/mulsf3.S | 251 ------- compiler-rt/test/builtins/CMakeLists.txt | 4 - compiler-rt/test/builtins/Unit/divsf3_test.c | 503 +++----------- compiler-rt/test/builtins/Unit/mulsf3_test.c | 616 ------------------ 11 files changed, 95 insertions(+), 2461 deletions(-) delete mode 100644 compiler-rt/cmake/Modules/CheckAssemblerFlag.cmake delete mode 100644 compiler-rt/lib/builtins/arm/divsf3.S delete mode 100644 compiler-rt/lib/builtins/arm/fnan2.c delete mode 100644 compiler-rt/lib/builtins/arm/fnorm2.c delete mode 100644 compiler-rt/lib/builtins/arm/funder.c delete mode 100644 compiler-rt/lib/builtins/arm/mulsf3.S delete mode 100644 compiler-rt/lib/builtins/arm/thumb1/mulsf3.S delete mode 100644 compiler-rt/test/builtins/Unit/mulsf3_test.c diff --git a/compiler-rt/cmake/Modules/CheckAssemblerFlag.cmake b/compiler-rt/cmake/Modules/CheckAssemblerFlag.cmake deleted file mode 100644 index 49e8b8547c5cd..0000000000000 --- a/compiler-rt/cmake/Modules/CheckAssemblerFlag.cmake +++ /dev/null @@ -1,38 +0,0 @@ -# Helper function to find out whether the assembler supports a particular -# command-line flag. You'd like to use the standard check_compiler_flag(), but -# that only supports a fixed list of languages, and ASM isn't one of them. So -# we do it ourselves, by trying to assemble an empty source file. - -function(check_assembler_flag outvar flag) - if(NOT DEFINED "${outvar}") - if(NOT CMAKE_REQUIRED_QUIET) - message(CHECK_START "Checking for assembler flag ${flag}") - endif() - - # Stop try_compile from attempting to link the result of the assembly, so - # that we don't depend on having a working linker, and also don't have to - # figure out what special symbol like _start needs to be defined in the - # test input. - # - # This change is made within the dynamic scope of this function, so - # CMAKE_TRY_COMPILE_TARGET_TYPE will be restored to its previous value on - # return. - set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) - - # Try to assemble an empty file with a .S name, using the provided flag. - try_compile(success - SOURCE_FROM_CONTENT "CheckAssemblerFlag.s" "" - COMPILE_DEFINITIONS ${flag} - NO_CACHE) - - if(NOT CMAKE_REQUIRED_QUIET) - if(success) - message(CHECK_PASS "Accepted") - set(${outvar} 1 CACHE INTERNAL "Test assembler flag ${flag}") - else() - message(CHECK_FAIL "Not accepted") - set(${outvar} "" CACHE INTERNAL "Test assembler flag ${flag}") - endif() - endif() - endif() -endfunction() diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 6f5c2cd7d1971..02e6ecfbdb60e 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -60,7 +60,6 @@ endif() include(builtin-config-ix) include(CMakeDependentOption) include(CMakePushCheckState) -include(CheckAssemblerFlag) option(COMPILER_RT_BUILTINS_HIDE_SYMBOLS "Do not export any symbols from the static library." ON) @@ -424,40 +423,6 @@ set(arm_or_thumb2_base_SOURCES ${GENERIC_SOURCES} ) -option(COMPILER_RT_ARM_OPTIMIZED_FP - "On 32-bit Arm, use optimized assembly implementations of FP arithmetic. Likely to increase code size, but be faster." ON) - -if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm") - check_assembler_flag(COMPILER_RT_HAS_MIMPLICIT_IT -mimplicit-it=always) - if(COMPILER_RT_HAS_MIMPLICIT_IT) - set(implicit_it_flag -mimplicit-it=always) - else() - check_assembler_flag( - COMPILER_RT_HAS_WA_MIMPLICIT_IT -Wa,-mimplicit-it=always) - if(COMPILER_RT_HAS_WA_MIMPLICIT_IT) - set(implicit_it_flag -Wa,-mimplicit-it=always) - else() - message(WARNING "Don't know how to set the -mimplicit-it=always flag in this assembler; not including Arm optimized implementations") - set(implicit_it_flag "") - endif() - endif() - - if(implicit_it_flag) - set(assembly_files - arm/mulsf3.S - arm/divsf3.S) - set_source_files_properties(${assembly_files} - PROPERTIES COMPILE_OPTIONS ${implicit_it_flag}) - set(arm_or_thumb2_base_SOURCES - ${assembly_files} - arm/fnan2.c - arm/fnorm2.c - arm/funder.c - ${arm_or_thumb2_base_SOURCES} - ) - endif() -endif() - set(arm_sync_SOURCES arm/sync_fetch_and_add_4.S arm/sync_fetch_and_add_8.S @@ -491,16 +456,6 @@ set(thumb1_base_SOURCES ${GENERIC_SOURCES} ) -if(COMPILER_RT_ARM_OPTIMIZED_FP) - set(thumb1_base_SOURCES - arm/thumb1/mulsf3.S - arm/fnan2.c - arm/fnorm2.c - arm/funder.c - ${thumb1_base_SOURCES} - ) -endif() - set(arm_EABI_RT_SOURCES arm/aeabi_cdcmp.S arm/aeabi_cdcmpeq_check_nan.c diff --git a/compiler-rt/lib/builtins/arm/divsf3.S b/compiler-rt/lib/builtins/arm/divsf3.S deleted file mode 100644 index 2f37234457b7b..0000000000000 --- a/compiler-rt/lib/builtins/arm/divsf3.S +++ /dev/null @@ -1,608 +0,0 @@ -//===-- divsf3.S - single-precision floating point division ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements single-precision soft-float division with the IEEE-754 -// default rounding (to nearest, ties to even), in optimized AArch32 assembly -// language suitable to be built as either Arm or Thumb2. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - - - .syntax unified - .text - .p2align 2 - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fdiv, __divsf3) - -DEFINE_COMPILERRT_FUNCTION(__divsf3) - // Extract the exponents of the inputs into r2 and r3, occupying bits 16-23 - // of each register so that there will be space lower down to store extra - // data without exponent arithmetic carrying into it. In the process, check - // both exponents for 00 or FF and branch out of line to handle all the - // uncommon types of value (infinity, NaN, zero, denormals). - // - // Chaining conditional instructions like this means that the second - // instruction (setting up r3) might not be executed at all, so fdiv_uncommon - // will have to redo it just in case. That saves an instruction here, - // executed for _all_ inputs, and moves it to the uncommon path run for only - // some inputs. - mov r12, #0xFF0000 - ands r2, r12, r0, lsr #7 // r2 has exponent of numerator. (Is it 0?) - andsne r3, r12, r1, lsr #7 // r3 has exponent of denominator. (Is it 0?) - teqne r2, r12 // if neither was 0, is one FF? - teqne r3, r12 // or the other? - beq LOCAL_LABEL(uncommon) // branch out of line if any answer was yes - - // Calculate the output sign, which is always just the XOR of the input - // signs. Store it in bit 8 of r2, below the numerator exponent. - teq r0, r1 // is the output sign bit 1? - orrmi r2, r2, #0x100 // if so, set bit 8 of r2 - - // Isolate the mantissas of both values, by setting bit 23 of each one and - // clearing the 8 bits above that. - // - // In the process, swap the register allocations (which doesn't cost extra - // instructions if we do it as part of this manipulation). We want the - // numerator not to be in r0, because r0 is where we'll build up the quotient - // while subtracting things from the numerator. - orr r12, r0, #1 << 23 - orr r0, r1, #1 << 23 - bic r1, r12, #0xFF000000 - bic r0, r0, #0xFF000000 - -LOCAL_LABEL(div): - // Start of the main division. We get here knowing that: - // - // r0 = mantissa of denominator, with the leading 1 at bit 23 - // r1 = mantissa of numerator, similarly - // r2 = (exponent of numerator << 16) + (result sign << 8) - // r3 = (exponent of denominator << 16) - - push {r14} // we'll need an extra register - - // Calculate the initial result exponent by just subtracting the two input - // exponents. This doesn't affect the sign bit lower down in r2. - sub r2, r2, r3 - - // That initial exponent might need to be adjusted by 1, depending on whether - // dividing the mantissas gives a value >=1 or <1. We don't need to wait - // until the division is finished to work that out: we can tell immediately - // by just comparing the mantissas. - // - // The basic idea is to do the comparison in a way that sets the C flag if - // numerator >= denominator. Then we recombine the sign and exponent by doing - // "ADC r2, r2, r2, asr #16": the exponent in the top half of r2 is shifted - // down to the low 8 bits, just below the sign bit, and using ADC rather than - // ADD folds in the conditional increment from the mantissa comparison. - // - // If we're not incrementing the output exponent, we instead shift the - // numerator mantissa left by 1, so that it _is_ greater than the denominator - // mantissa. Otherwise we'd generate only a 22-bit quotient, instead of 23. - // - // The exponent also needs to be rebiased, so that dividing two numbers the - // same gives an output exponent of 0x7F. If the two inputs have the same - // exponent then we'll have computed an exponent of 0 via the SUB instruction - // above; if the mantissas are the same as well then the ADC will increment - // it; also, the leading bit of the quotient will increment the exponent - // again when we recombine it with the output mantissa later. So we need to - // add (0x7F - 2) to the mantissa now, to make an exponent of 0 from the SUB - // come to 0x7F after both of those increments. - // - // Putting all of that together, what we _want_ to do is this: - // - // [#1] CMP r1, r0 // set C if num >= den - // [#2] MOVLO r1, r1, lsl #1 // if num < den, shift num left - // [#3] ADD r2, r2, #0x7D0000 // rebias exponent - // [#4] ADC r2, r2, r2, asr #16 // combine sign + exp + adjustment - // - // However, we only do the first of those four instructions right here. The - // other three are distributed through the code below, after unrelated load - // or multiply instructions which will have a result delay slot on simple - // CPUs. Each is labelled "exponent setup [#n]" in a comment. - // - // (Since instruction #4 depends on the flags set up by #2, we must avoid - // clobbering the flags in _any_ of the instructions interleaved with this!) - cmp r1, r0 // exponent setup [#1] - - // Start the mantissa division by making an approximation to the reciprocal - // of the denominator. We first obtain an 8-bit approximation using a table - // lookup indexed by the top 7 denominator bits (counting the leading 1, so - // really there are only 6 bits in the table index). - // - // (r0 >> 17) is the table index, and its top bit is always set, so it ranges - // from 64 to 127 inclusive. So we point the base register 64 bytes before - // the actual table. - adr r12, LOCAL_LABEL(tab) - 64 -#if __thumb__ - // Thumb can't do this particular shift+add+load in one instruction - it only - // supports left shifts of 0 to 3 bits, not right shifts of 17. So we must - // calculate the load offset separately. - add r14, r12, r0, lsr #17 - ldrb r14, [r14] -#else - ldrb r14, [r12, r0, lsr #17] -#endif - - // Now do an iteration of Newton-Raphson to improve that 8-bit approximation - // to have 15-16 accurate bits. - // - // Basics of Newton-Raphson for finding a reciprocal: if you want to find 1/d - // and you have some approximation x, your next approximation is X = x(2-dx). - // Looked at one way, this is the result of applying the N-R formula - // X=x-f(x)/f'(x) to the function f(x) = 1/x - d. Another way to look at it - // is to suppose that dx = 1 - e, for some e which is small (because dx is - // already reasonably close to 1). Then you want to double the number of - // correct bits in the next approximation, i.e. square the error. So you want - // dX = 1-e^2 = (1-e)(1+e) = dx(2-dx). Cancelling d gives X = x(2-dx) again. - // - // In this situation, we're working in fixed-point integers rather than real - // numbers, and all the scales are different: - // * our input denominator d is in the range [2^23,2^24) - // * our input approximation x is in the range [2^7,2^8) - // * we want the output approximation to be in the range [2^15,2^16) - // Those factors combine to mean that we want - // x(2^32-dx) / 2^23 - // = (2^9 x) - (dx^2 / 2^23) - // - // But we also want to compute this using ordinary MUL, not a long multiply - // instruction (those are slower). So we need to worry about the product - // overflowing. dx fits in 32 bits, because it's the product of something - // <2^24 with something <2^8; but we must shift it right before multiplying - // by x again. - - mul r12, r0, r14 // r12 = dx - movlo r1, r1, lsl #1 // exponent setup [#2] in the MUL delay slot - mvn r12, r12, lsr #8 // r12 ~= -dx/2^8 - mul r3, r12, r14 // r3 ~= -dx^2/2^8 - mov r14, r14, lsl #9 // r14 = 2^9 x - add r14, r14, r3, asr #15 // r14 ~= 2^9 x - dx^2 / 2^23 - - // Now r14 is a 16-bit approximation to the reciprocal of the input mantissa, - // scaled by 2^39 (so that the min mantissa 2^23 would have reciprocal 2^16 - // in principle, and the max mantissa 2^24-1 would have reciprocal just over - // 2^15). The error is always negative (r14 is an underestimate of the true - // value), and the maximum error is 6 and a bit ULP (that is, the true - // reciprocal is strictly less than (r14+7)). Also, r14 is always strictly - // less than 0x10000 (even in the case of the min mantissa, where the true - // value would be _exactly_ 0x10000), which eliminates a case of integer - // overflow. - // - // All of these properties of the reciprocal approximation are checked by - // exhaustively iterating over all 2^23 possible input mantissas. (The nice - // thing about doing this in single rather than double precision!) - // - // Now we extract most of the quotient by two steps of long division, using - // the reciprocal estimate to identify a multiple of the denominator to - // subtract from the numerator. To avoid integer overflow, the numerator - // mantissa is shifted down 8 bits so that it's less than 0x10000. After we - // calculate an approximate quotient, we shift the numerator left and - // subtract that multiple of the denominator, moving the next portion of the - // numerator into range for the next iteration. - - // First iteration of long division. We shift the numerator left 11 bits, and - // since the quotient approximation is scaled by 2^31, we must shift that - // right by 20 to make the right product to subtract from the numerator. - mov r12, r1, lsr #8 // shift the numerator down - mul r12, r14, r12 // make the quotient approximation - mov r1, r1, lsl #11 // shift numerator left, ready for subtraction - mov r3, r12, lsr #20 // make first 12-bit block of quotient bits - mls r1, r0, r3, r1 // subtract that multiple of den from num - - add r2, r2, #0x7D0000 // exponent setup [#3] in the MLS delay slot - - // Second iteration of long division. Differences from the first step: this - // time we shift the numerator 12 bits instead of 11, so that the total of - // both steps is 23 bits, i.e. we've shifted up by exactly the full width of - // the output mantissa. Also, the block of output quotient bits is left in a - // different register: it was in r3 the first time, and this time it's in - // r12, so that we still have both available at the end of the process. - mov r12, r1, lsr #8 // shift the numerator down - mul r12, r14, r12 // make the quotient approximation - mov r1, r1, lsl #12 // shift numerator left, ready for subtraction - mov r12, r12, lsr #19 // make second 11-bit block of quotient - mls r1, r0, r12, r1 // subtract that multiple of den from num - - adc r2, r2, r2, asr #16 // exponent setup [#4] in the MLS delay slot - - // Now r1 contains the original numerator, shifted left 23, minus _some_ - // multiple of the original denominator (which is still in r0). The bounds on - // the error in the above steps should make the error at most 1: that is, we - // may have to subtract the denominator one more time to make r1 < r0, and - // increment the quotient by one more. - // - // Our quotient is still in two pieces, computed separately in the above long - // division steps. We fold the final increment into the same instruction that - // recombines them, by doing the comparison in such a way that it sets the - // carry flag if the increment is needed. - - cmp r1, r0 // Set carry flag if num >= den - subhs r1, r1, r0 // If so, subtract den from num - adc r3, r12, r3, lsl #12 // Recombine quotient halves, plus optional +1 - - // We've finished with r14 as a temporary register, so we can unstack it now. - pop {r14} - - // Now r3 contains the _rounded-down_ output quotient, and r1 contains the - // remainder. That is, (denominator * r3 + r1) = (numerator << 23), and - // 0 <= r1 < denominator. - // - // Next we must round to nearest, by checking if r1 is greater than half the - // denominator. In division, it's not possible to hit an exact round-to-even - // halfway case, so we don't need to spend any time checking for it. - // - // Proof of no round-to-even: define the 'width' of a dyadic rational to be - // the distance between the lowest and highest 1 bits in its binary - // representation, or equivalently, the index of its high bit if you scale it - // by a power of 2 to make it an odd integer. E.g. any actual power of 2 has - // width 0, and all of 0b11110, 0b1111, 0b11.11 and 0b0.01111 have width 3. - // Then for any dyadic rationals a,b, width(ab) >= width(a)+width(b). Let w - // be the maximum width that the input precision supports (so that for single - // precision, w=23). Then if some division n/d were a round-to-even case, the - // true quotient q=n/d would have width exactly w+1. But we have qd=n, so - // width(n) >= width(q)+width(d) > w, which can't happen, because n is in the - // input precision, hence had width <= w.) - // - // So we don't need to check for an exact _halfway_ case and clear the low - // bit of the quotient after rounding up, as addition and multiplication both - // need to do. But we do need to remember if the quotient itself was exact, - // that is, if there was no remainder at all. That's needed in underflow - // handling. - - // The rounding check wants to compare remainder with denominator/2. But of - // course in integers it's easier to compare 2*remainder with denominator. So - // we start by shifting the remainder left by 1, and in the process, set Z if - // it's exactly 0 (i.e. the result needs no rounding at all). - lsls r1, r1, #1 - // Now trial-subtract the denominator. We don't do this at all if the result - // was exact. If we do do it, r1 goes negative precisely if we need to round - // up, which sets the C flag. (The previous instruction will have left C - // clear, since r1 had its top 8 bits all clear. So now C is set _only_ if - // we're rounding up.) - subsne r1, r1, r0 - // Recombine the quotient with the sign + exponent, and use the C flag from - // the previous instruction to increment the quotient if we're rounding up. - adc r0, r3, r2, lsl #23 - - // If we haven't either overflowed or underflowed, we're done. We can - // identify most of the safe cases by doing an unsigned comparison of the - // initial output exponent (in the top half of r2) with 0xFC: if 0 <= r2 < - // 0xFC0000 then we have neither underflow nor overflow. - // - // Rationale: the value in the top half of r2 had three chances to be - // incremented before becoming the exponent field of the actual output float. - // It was incremented if we found the numerator mantissa was >= the - // denominator (producing the value in the _bottom_ half of r2, which we just - // ADCed into the output). Then it gets unconditionally incremented again - // when the ADC combines it with the leading mantissa bit. And finally, - // round-up might increment it a third time. So 0xFC is the smallest value - // that can possibly turn into the overflowed value 0xFF after all those - // increments. - // - // On the underflow side, (top half of r2) = 0 corresponds to a value of 1 in - // the final result's exponent field (and then rounding might increase it - // further); if the exponent was less than that then r2 wraps round and looks - // like a very large positive integer from the point of view of this unsigned - // comparison. - cmp r2, #0xFC0000 - bxlo lr - - // The same comparison will have set the N and V flags to reflect the result - // of comparing r2 with 0xFC0000 as a _signed_ integer. That reliably - // distinguishes potential underflow (r2 is negative) from potential overflow - // (r2 is positive and at least 0xFC0000) - bge LOCAL_LABEL(overflow) - - // Here we might or might not have underflow (but we know we don't have - // overflow). To check more carefully, we look at the _bottom_ half of r2, - // which contains the exponent after the first adjustment (for num >= denom), - // That is, it's still off by 1 (compensating for the leading quotient bit), - // and is also before rounding. - // - // We neglect the effect of rounding: division results that are tiny (less - // than the smallest normalised number) before rounding, but then round up to - // the smallest normal number, are an acceptable edge case to handle slowly. - // We pass those to funder without worrying about them. - // - // So we want to check whether the bottom half of r2 was negative. It would - // be nice to check bits 8-15 of it, but unfortunately, it's already been - // combined with the sign (at bit 8), so those bits don't tell us anything - // useful. Instead we look at the top 4 bits of the exponent field, i.e. the - // 0xF0 bits. The largest _non_-overflowing exponent that might reach here is - // less than 3, so it doesn't reach those bits; the smallest possible - // underflow, obtained by dividing the smallest denormal by the largest - // finite number, is -151 (before the leading bit increments it), which will - // set the low 8 bits of r2 to 0x69. That is, the 0xF0 nibble of r2 will be - // 0x60 or greater for a (pre-rounding) underflow, and zero for a - // non-underflow. - - tst r2, #0xF0 - bxeq lr // no underflow after all; return - - // Rebias the exponent for funder, which also corrects the sign bit. - add r0, r0, #192 << 23 - // Tell funder whether the true value is greater or less than the number in - // r0. This is obtained from the sign of the remainder (still in r1), with - // the only problem being that it's currently reversed. So negate r1 (leaving - // 0 at 0 to indicate exactness). - rsbs r1, r1, #0 - b SYMBOL_NAME(__compiler_rt_funder) - -LOCAL_LABEL(overflow): - // Here we might or might not have overflow (but we know we don't have - // underflow). We must check whether we really have overflowed. - // - // For this it's easiest to check the exponent field in the actual output - // value in r0, after _all_ the adjustments have been completed. The largest - // overflowed exponent is 0x193, and the smallest exponent that can reach - // this is 0xFD (we checked against 0xFC above, but then the leading quotient - // bit incremented it). So it's enough to shift the output left by one - // (moving the exponent field to the top), increment it once more (so that - // the smallest overflowed exponent 0xFF wraps round to 0), and then compare - // against 0xFE000000 as an unsigned integer. - mov r12, r0, lsl #1 - add r12, r12, #1 << 24 - cmp r12, #0xFE << 24 // Check for exp = 253 or 254 - bxhs lr - // We have actual overflow. Rebias r0 to bring the exponent back into range, - // which ensures its sign is correct. Then make an infinity of that sign to - // return. - subs r0, r0, #0xC0 << 23 - movs r12, #0xFF // exponent of infinity - orrs r12, r12, r0, lsr #23 // exponent and sign at bottom of r12 - movs r0, r12, lsl #23 // shift it up to the top of r0 to return - bx lr - -LOCAL_LABEL(uncommon): - // We come here from the start of the function if either input is an uncommon - // value: zero, denormal, infinity or NaN. - // - // We arrive here with r12 = 0xFF000000, and r2 containing the exponent of x - // in bits 16..23. But r3 doesn't necessarily contain the exponent of y, - // because the instruction that set it up was conditional. So first we - // unconditionally repeat it. - and r3, r12, r1, lsr #7 - - // In all cases not involving a NaN as output, the sign of the output is made - // in the same way as for finite numbers, as the XOR of the input signs. So - // repeat the sign setup from the main branch. - teq r0, r1 // is the output sign bit 1? - orrmi r2, r2, #0x100 // if so, set bit 8 of r2 - - // Detect infinities and NaNs, by checking if either of r2 or r3 is at least - // 0xFF0000. - cmp r2, #0xFF0000 - cmplo r3, #0xFF0000 - bhs LOCAL_LABEL(inf_NaN) - - // Now we know there are no infinities or NaNs, but there's at least one zero - // or denormal. - movs r12, r1, lsl #1 // is y zero? - beq LOCAL_LABEL(divbyzero) // if so, go and handle division by zero - movs r12, r0, lsl #1 // is x zero? (now we know that y is not) - moveq r0, r2, lsl #23 // if so, 0/nonzero is just 0 (of right sign) - bxeq lr - - // Now we've eliminated zeroes as well, leaving only denormals: either x or - // y, or both, is a denormal. Call fnorm2 to convert both into a normalised - // mantissa and a (potentially small) exponent. - and r12, r2, #0x100 // save the result sign from r2 - lsr r2, #16 // shift extracted exponents down to bit 0 - lsr r3, #16 // where fnorm2 will expect them - push {r0, r1, r2, r3, r12, lr} - mov r0, sp // tell fnorm2 where to find its data - bl SYMBOL_NAME(__compiler_rt_fnorm2) - pop {r0, r1, r2, r3, r12, lr} - lsl r3, #16 // shift exponents back up to bit 16 - orr r2, r12, r2, lsl #16 // and put the result sign back in r2 - - // Now rejoin the main code path, having finished the setup it will expect: - // swap x and y, and shift the fractions back down to the low 24 bits. - mov r12, r0, lsr #8 - mov r0, r1, lsr #8 - mov r1, r12 - b LOCAL_LABEL(div) - -LOCAL_LABEL(inf_NaN): - // We come here if at least one input is a NaN or infinity. If either or both - // inputs are NaN then we hand off to fnan2 to propagate a NaN from the - // input. - mov r12, #0xFF000000 - cmp r12, r0, lsl #1 // if (r0 << 1) > 0xFF000000, r0 is a NaN - blo SYMBOL_NAME(__compiler_rt_fnan2) - cmp r12, r1, lsl #1 - blo SYMBOL_NAME(__compiler_rt_fnan2) - - // No NaNs, so we have three options: inf/inf = NaN, inf/finite = inf, and - // finite/inf = 0. - - // If both operands are infinity, we return a NaN. Since we know at - // least _one_ is infinity, we can test this by checking if they're - // equal apart from the sign bits. - eor r3, r0, r1 - lsls r3, #1 // were all bits of XOR zero other than top? - beq LOCAL_LABEL(invalid) // if so, both operands are infinity - - // See if x is infinite - cmp r12, r0, lsl #1 // (r0 << 1) == 0xFF000000? - beq LOCAL_LABEL(infret) // if so, infinity/finite = infinity - - // y is infinite and x is not, so we return a zero of the - // combined sign. - eor r0, r0, r1 // calculate the right sign - and r0, r0, #0x80000000 // throw away everything else - bx lr - -LOCAL_LABEL(divbyzero): - // Here, we know y is zero. But we don't know if x is zero or nonzero. So we - // might be calculating 0/0 (invalid operation, generating a NaN), or - // nonzero/0 (the IEEE "division by zero" exception, generating infinity). - movs r12, r0, lsl #1 // is x zero too? - beq LOCAL_LABEL(invalid) // if so, go and return a NaN - -LOCAL_LABEL(infret): - // Here, we're either dividing infinity by a finite number, or dividing a - // nonzero number by 0. (Or both, if we're dividing infinity by 0.) In all - // these cases we return infinity with the sign from r2. - // - // If we were implementing IEEE exceptions, we'd have to separate these - // cases: infinity / finite is not an _exception_, it just returns infinity, - // whereas (finite and nonzero) / 0 is a division-by-zero exception. But here - // we're not implementing exceptions, so we can treat all three cases the - // same. - // - // r2 contains the output sign in bit 8, which is a convenient place to find - // it when making an infinity, because we can fill in the 8 exponent bits - // below that and then shift it left. - orr r2, r2, #0xff // sign + maximum exponent - lsl r0, r2, #23 // shift up to the top - bx lr - -LOCAL_LABEL(invalid): - // Return the default NaN, from an invalid operation (either dividing - // infinity by infinity, or 0 by 0). - ldr r0, =0x7FC00000 - bx lr - -// Finally, the lookup table for the initial reciprocal approximation. -// -// The table index is made from the top 7 bits of the denominator mantissa. But -// the topmost bit is always 1, so only the other 6 bits vary. So it only has -// 64 entries, not 128. -// -// Each table entry is a single byte, with its top bit set. So the table -// entries correspond to the reciprocal of a 7-bit mantissa prefix scaled up by -// 2^14, or the reciprocal of a whole 24-bit mantissa scaled up by 2^31. -// -// Each of these 64 entries corresponds to a large interval of possible -// mantissas. For example, if the top 7 bits are 1000001 then the overall -// mantissa could be anything from 0x820000 to 0x83FFFF. And because the output -// of this table provides more bits than the input, there are several choices -// of 8-bit reciprocal approximation for a number in that interval. The -// reciprocal of 0x820000 starts with 0xFC plus a fraction, and the reciprocal -// of 0x83FFFF starts with 0xF9 minus a fraction, so there are four reasonable -// choices for that table entry: F9, FA, FB or FC. Which do we pick? -// -// The table below is generated by choosing whichever value minimises the -// maximum possible error _after_ the approximation is improved by the -// Newton-Raphson step. In the example above, we end up with FA. -// -// The Python code below will regenerate the table, complete with the per-entry -// comments. - -/* - -for prefix in range(64, 128): - best = None - - # Max and min 23-bit mantissas with this 7-bit prefix - mmin, mmax = prefix * 2**17, (prefix + 1) * 2**17 - 1 - - # Max and min table entry corresponding to the reciprocal of something in - # that range of mantissas: round up the reciprocal of mmax, and round down - # the reciprocal of mmin. Also clamp to the range [0x80,0xff], because - # 0x100 can't be used as a table entry due to not fitting in a byte, even - # though it's the exact reciprocal of the overall-smallest mantissa - # 0x800000. - gmin = max(128, (2**31 + mmin - 1) // mmax) - gmax = min(255, 2**31 // mmin) - - # For each of those table entries, compute the result of starting from that - # value and doing a Newton-Raphson iteration, with the mantissa at each end - # of the mantissa interval. One of these will be the worst possible error. - # Choose the table entry whose worst error is as small as possible. - # - # (To find the extreme values of a more general function on an interval, - # you must consider its values not only at the interval endpoints but also - # any turning points within the interval. Here, the function has only one - # turning point, and by construction it takes value 0 there, so we needn't - # worry.) - g = max( - range(gmin, gmax + 1), - key=lambda g: min( - (g * (2**32 - d * g) / 2**23 - 2**39 / d) for d in [mmin, mmax] - ), - ) - - print(f" .byte 0x{g:02x} // input [0x{mmin:06x},0x{mmax:06x}]" - f", candidate outputs [0x{gmin:02x},0x{gmax:02x}]" - ) - -*/ - - .p2align 2 // make sure we start on a 4-byte boundary, even in Thumb -LOCAL_LABEL(tab): - .byte 0xfe // input [0x800000,0x81ffff], candidate outputs [0xfd,0xff] - .byte 0xfa // input [0x820000,0x83ffff], candidate outputs [0xf9,0xfc] - .byte 0xf6 // input [0x840000,0x85ffff], candidate outputs [0xf5,0xf8] - .byte 0xf3 // input [0x860000,0x87ffff], candidate outputs [0xf1,0xf4] - .byte 0xef // input [0x880000,0x89ffff], candidate outputs [0xee,0xf0] - .byte 0xec // input [0x8a0000,0x8bffff], candidate outputs [0xeb,0xed] - .byte 0xe8 // input [0x8c0000,0x8dffff], candidate outputs [0xe7,0xea] - .byte 0xe5 // input [0x8e0000,0x8fffff], candidate outputs [0xe4,0xe6] - .byte 0xe2 // input [0x900000,0x91ffff], candidate outputs [0xe1,0xe3] - .byte 0xdf // input [0x920000,0x93ffff], candidate outputs [0xde,0xe0] - .byte 0xdc // input [0x940000,0x95ffff], candidate outputs [0xdb,0xdd] - .byte 0xd9 // input [0x960000,0x97ffff], candidate outputs [0xd8,0xda] - .byte 0xd6 // input [0x980000,0x99ffff], candidate outputs [0xd5,0xd7] - .byte 0xd3 // input [0x9a0000,0x9bffff], candidate outputs [0xd3,0xd4] - .byte 0xd1 // input [0x9c0000,0x9dffff], candidate outputs [0xd0,0xd2] - .byte 0xce // input [0x9e0000,0x9fffff], candidate outputs [0xcd,0xcf] - .byte 0xcc // input [0xa00000,0xa1ffff], candidate outputs [0xcb,0xcc] - .byte 0xc9 // input [0xa20000,0xa3ffff], candidate outputs [0xc8,0xca] - .byte 0xc7 // input [0xa40000,0xa5ffff], candidate outputs [0xc6,0xc7] - .byte 0xc4 // input [0xa60000,0xa7ffff], candidate outputs [0xc4,0xc5] - .byte 0xc2 // input [0xa80000,0xa9ffff], candidate outputs [0xc1,0xc3] - .byte 0xc0 // input [0xaa0000,0xabffff], candidate outputs [0xbf,0xc0] - .byte 0xbd // input [0xac0000,0xadffff], candidate outputs [0xbd,0xbe] - .byte 0xbb // input [0xae0000,0xafffff], candidate outputs [0xbb,0xbc] - .byte 0xb9 // input [0xb00000,0xb1ffff], candidate outputs [0xb9,0xba] - .byte 0xb7 // input [0xb20000,0xb3ffff], candidate outputs [0xb7,0xb8] - .byte 0xb5 // input [0xb40000,0xb5ffff], candidate outputs [0xb5,0xb6] - .byte 0xb3 // input [0xb60000,0xb7ffff], candidate outputs [0xb3,0xb4] - .byte 0xb1 // input [0xb80000,0xb9ffff], candidate outputs [0xb1,0xb2] - .byte 0xaf // input [0xba0000,0xbbffff], candidate outputs [0xaf,0xb0] - .byte 0xad // input [0xbc0000,0xbdffff], candidate outputs [0xad,0xae] - .byte 0xac // input [0xbe0000,0xbfffff], candidate outputs [0xab,0xac] - .byte 0xaa // input [0xc00000,0xc1ffff], candidate outputs [0xa9,0xaa] - .byte 0xa8 // input [0xc20000,0xc3ffff], candidate outputs [0xa8,0xa8] - .byte 0xa6 // input [0xc40000,0xc5ffff], candidate outputs [0xa6,0xa7] - .byte 0xa5 // input [0xc60000,0xc7ffff], candidate outputs [0xa4,0xa5] - .byte 0xa3 // input [0xc80000,0xc9ffff], candidate outputs [0xa3,0xa3] - .byte 0xa1 // input [0xca0000,0xcbffff], candidate outputs [0xa1,0xa2] - .byte 0xa0 // input [0xcc0000,0xcdffff], candidate outputs [0xa0,0xa0] - .byte 0x9e // input [0xce0000,0xcfffff], candidate outputs [0x9e,0x9f] - .byte 0x9d // input [0xd00000,0xd1ffff], candidate outputs [0x9d,0x9d] - .byte 0x9b // input [0xd20000,0xd3ffff], candidate outputs [0x9b,0x9c] - .byte 0x9a // input [0xd40000,0xd5ffff], candidate outputs [0x9a,0x9a] - .byte 0x98 // input [0xd60000,0xd7ffff], candidate outputs [0x98,0x99] - .byte 0x97 // input [0xd80000,0xd9ffff], candidate outputs [0x97,0x97] - .byte 0x96 // input [0xda0000,0xdbffff], candidate outputs [0x95,0x96] - .byte 0x94 // input [0xdc0000,0xddffff], candidate outputs [0x94,0x94] - .byte 0x93 // input [0xde0000,0xdfffff], candidate outputs [0x93,0x93] - .byte 0x92 // input [0xe00000,0xe1ffff], candidate outputs [0x91,0x92] - .byte 0x90 // input [0xe20000,0xe3ffff], candidate outputs [0x90,0x90] - .byte 0x8f // input [0xe40000,0xe5ffff], candidate outputs [0x8f,0x8f] - .byte 0x8e // input [0xe60000,0xe7ffff], candidate outputs [0x8e,0x8e] - .byte 0x8d // input [0xe80000,0xe9ffff], candidate outputs [0x8d,0x8d] - .byte 0x8b // input [0xea0000,0xebffff], candidate outputs [0x8b,0x8c] - .byte 0x8a // input [0xec0000,0xedffff], candidate outputs [0x8a,0x8a] - .byte 0x89 // input [0xee0000,0xefffff], candidate outputs [0x89,0x89] - .byte 0x88 // input [0xf00000,0xf1ffff], candidate outputs [0x88,0x88] - .byte 0x87 // input [0xf20000,0xf3ffff], candidate outputs [0x87,0x87] - .byte 0x86 // input [0xf40000,0xf5ffff], candidate outputs [0x86,0x86] - .byte 0x85 // input [0xf60000,0xf7ffff], candidate outputs [0x85,0x85] - .byte 0x84 // input [0xf80000,0xf9ffff], candidate outputs [0x84,0x84] - .byte 0x83 // input [0xfa0000,0xfbffff], candidate outputs [0x83,0x83] - .byte 0x82 // input [0xfc0000,0xfdffff], candidate outputs [0x82,0x82] - .byte 0x81 // input [0xfe0000,0xffffff], candidate outputs [0x80,0x81] - -END_COMPILERRT_FUNCTION(__divsf3) - -NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/fnan2.c b/compiler-rt/lib/builtins/arm/fnan2.c deleted file mode 100644 index 06bbd4339f171..0000000000000 --- a/compiler-rt/lib/builtins/arm/fnan2.c +++ /dev/null @@ -1,42 +0,0 @@ -//===-- fnan2.c - Handle single-precision NaN inputs to binary operation --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This helper function is available for use by single-precision float -// arithmetic implementations to handle propagating NaNs from the input -// operands to the output, in a way that matches Arm hardware FP. -// -// On input, a and b are floating-point numbers in IEEE 754 encoding, and at -// least one of them must be a NaN. The return value is the correct output NaN. -// -// A signalling NaN in the input (with bit 22 clear) takes priority over any -// quiet NaN, and is adjusted on return by setting bit 22 to make it quiet. If -// both inputs are the same type of NaN then the first input takes priority: -// the input a is used instead of b. -// -//===----------------------------------------------------------------------===// - -#include - -uint32_t __compiler_rt_fnan2(uint32_t a, uint32_t b) { - // Make shifted-left copies of a and b to discard the sign bit. Then add 1 at - // the bit position where the quiet vs signalling bit ended up. This squashes - // all the signalling NaNs to the top of the range of 32-bit values, from - // 0xff800001 to 0xffffffff inclusive; meanwhile, all the quiet NaN values - // wrap round to the bottom, from 0 to 0x007fffff inclusive. So we can detect - // a signalling NaN by asking if it's greater than 0xff800000, and a quiet - // one by asking if it's less than 0x00800000. - uint32_t aadj = (a << 1) + 0x00800000; - uint32_t badj = (b << 1) + 0x00800000; - if (aadj > 0xff800000) // a is a signalling NaN? - return a | 0x00400000; // if so, return it with the quiet bit set - if (badj > 0xff800000) // b is a signalling NaN? - return b | 0x00400000; // if so, return it with the quiet bit set - if (aadj < 0x00800000) // a is a quiet NaN? - return a; // if so, return it - return b; // otherwise we expect b must be a quiet NaN -} diff --git a/compiler-rt/lib/builtins/arm/fnorm2.c b/compiler-rt/lib/builtins/arm/fnorm2.c deleted file mode 100644 index 29eba1cbde59d..0000000000000 --- a/compiler-rt/lib/builtins/arm/fnorm2.c +++ /dev/null @@ -1,62 +0,0 @@ -//===-- fnorm2.c - Handle single-precision denormal inputs to binary op ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This helper function is available for use by single-precision float -// arithmetic implementations, to handle denormal inputs on entry by -// renormalizing the mantissa and modifying the exponent to match. -// -//===----------------------------------------------------------------------===// - -#include - -// Structure containing the function's inputs and outputs. -// -// On entry: a, b are two input floating-point numbers, still in IEEE 754 -// encoding. expa and expb are the 8-bit exponents of those numbers, extracted -// and shifted down to the low 8 bits of the word, with no other change. -// Neither value should be zero, or have the maximum exponent (indicating an -// infinity or NaN). -// -// On exit: each of a and b contains the mantissa of the input value, with the -// leading 1 bit made explicit, and shifted up to the top of the word. If expa -// was zero (indicating that a was denormal) then it is now represented as a -// normalized number with an out-of-range exponent (zero or negative). The same -// applies to expb and b. -struct fnorm2 { - uint32_t a, b, expa, expb; -}; - -void __compiler_rt_fnorm2(struct fnorm2 *values) { - // Shift the mantissas of a and b to the right place to follow a leading 1 in - // the top bit, if there is one. - values->a <<= 8; - values->b <<= 8; - - // Test if a is denormal. - if (values->expa == 0) { - // If so, decide how much further up to shift its mantissa, and adjust its - // exponent to match. This brings the leading 1 of the denormal mantissa to - // the top of values->a. - uint32_t shift = __builtin_clz(values->a); - values->a <<= shift; - values->expa = 1 - shift; - } else { - // Otherwise, leave the mantissa of a in its current position, and OR in - // the explicit leading 1. - values->a |= 0x80000000; - } - - // Do the same operation on b. - if (values->expb == 0) { - uint32_t shift = __builtin_clz(values->b); - values->b <<= shift; - values->expb = 1 - shift; - } else { - values->b |= 0x80000000; - } -} diff --git a/compiler-rt/lib/builtins/arm/funder.c b/compiler-rt/lib/builtins/arm/funder.c deleted file mode 100644 index fd29e157328a3..0000000000000 --- a/compiler-rt/lib/builtins/arm/funder.c +++ /dev/null @@ -1,78 +0,0 @@ -//===-- funder.c - Handle single-precision floating-point underflow -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This helper function is available for use by single-precision float -// arithmetic implementations to handle underflowed output values, if they were -// computed in the form of a normalized mantissa and an out-of-range exponent. -// -// On input: x should be a complete IEEE 754 floating-point value representing -// the desired output scaled up by 2^192 (the same value that would have been -// passed to an underflow trap handler in IEEE 754:1985). -// -// This isn't enough information to re-round to the correct output denormal -// without also knowing whether x itself has already been rounded, and which -// way. 'errsign' gives this information, by indicating the sign of the value -// (true result - x). That is, if errsign > 0 it means the true value was -// larger (x was rounded down); if errsign < 0 then x was rounded up; if -// errsign == 0 then x represents the _exact_ desired output value. -// -//===----------------------------------------------------------------------===// - -#include - -#define SIGNBIT 0x80000000 -#define MANTSIZE 23 -#define BIAS 0xc0 - -uint32_t __compiler_rt_funder(uint32_t x, uint32_t errsign) { - uint32_t sign = x & SIGNBIT; - uint32_t exponent = (x << 1) >> 24; - - // Rule out exponents so small (or large!) that no denormalisation - // is needed. - if (exponent > BIAS) { - // Exponent 0xc1 or above means a normalised number got here by - // mistake, so we just remove the 0xc0 exponent bias and go - // straight home. - return x - (BIAS << MANTSIZE); - } - uint32_t bits_lost = BIAS + 1 - exponent; - if (bits_lost > MANTSIZE + 1) { - // The implicit leading 1 of the intermediate value's mantissa is - // below the lowest mantissa bit of a denormal by at least 2 bits. - // Round down to 0 unconditionally. - return sign; - } - - // Make the full mantissa (with leading bit) at the top of the word. - uint32_t mantissa = 0x80000000 | (x << 8); - // Adjust by 1 depending on the sign of the error. - mantissa -= errsign >> 31; - mantissa += (-errsign) >> 31; - - // Shift down to the output position, keeping the bits shifted off. - uint32_t outmant, shifted_off; - if (bits_lost == MANTSIZE + 1) { - // Special case for the exponent where we have to shift the whole - // of 'mantissa' off the bottom of the word. - outmant = 0; - shifted_off = mantissa; - } else { - outmant = mantissa >> (8 + bits_lost); - shifted_off = mantissa << (32 - (8 + bits_lost)); - } - - // Re-round. - if (shifted_off >> 31) { - outmant++; - if (!(shifted_off << 1)) - outmant &= ~1; // halfway case: round to even - } - - return sign | outmant; -} diff --git a/compiler-rt/lib/builtins/arm/mulsf3.S b/compiler-rt/lib/builtins/arm/mulsf3.S deleted file mode 100644 index b4f4c5e958c52..0000000000000 --- a/compiler-rt/lib/builtins/arm/mulsf3.S +++ /dev/null @@ -1,309 +0,0 @@ -//===-- mulsf3.S - single-precision floating point multiplication ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements single-precision soft-float multiplication with the -// IEEE-754 default rounding (to nearest, ties to even), in optimized AArch32 -// assembly language suitable to be built as either Arm or Thumb2. -// -//===----------------------------------------------------------------------===// - -#include "../assembly.h" - - - .syntax unified - .text - .p2align 2 - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fmul, __mulsf3) - -DEFINE_COMPILERRT_FUNCTION(__mulsf3) - - // Check if either input exponent is 00 or FF (i.e. not a normalized number), - // and if so, branch out of line. If we don't branch out of line, then we've - // also extracted the exponents of the input values r0/r1 into bits 16..23 of - // r2/r3. But if we do, then that hasn't necessarily been done (because the - // second AND might have been skipped). - mov r12, #0xFF0000 - ands r2, r12, r0, lsr #7 // sets Z if exponent of x is 0 - andsne r3, r12, r1, lsr #7 // otherwise, sets Z if exponent of y is 0 - teqne r2, r12 // otherwise, sets Z if exponent of x is FF - teqne r3, r12 // otherwise, sets Z if exponent of y is FF - beq LOCAL_LABEL(uncommon) // branch out of line to handle inf/NaN/0/denorm - - // Calculate the sign of the result, and put it in an unused bit of r2. - teq r0, r1 // sets N to the XOR of x and y's sign bits - orrmi r2, r2, #0x100 // if N set, set bit 8 of r2 - - // Move the input mantissas to the high end of r0/r1, each with its leading - // bit set explicitly, so that they're in the right form to be multiplied. - mov r12, #0x80000000 - orr r0, r12, r0, lsl #8 - orr r1, r12, r1, lsl #8 - - // Now we're ready to multiply mantissas. This is also the place we'll come - // back to after decoding denormal inputs. The denormal decoding will also - // have to set up the same register contents: - // - decoded fractions at the top of r0 and r1 - // - exponents in r2 and r3, starting at bit 16 - // - output sign in r2 bit 8 -LOCAL_LABEL(mul): - - // Here we multiply the mantissas, and compute the output exponent by adding - // the input exponents and rebiasing. These operations are interleaved to - // use a delay slot. - // - // The exponent is rebiased by subtracting 0x80, rather than the 0x7F you'd - // expect. That compensates for the leading bit of the mantissa overlapping - // it, when we recombine the exponent and mantissa by addition. - add r2, r2, r3 // r2 has sum of exponents, freeing up r3 - umull r1, r3, r0, r1 // r3:r1 has the double-width product - sub r2, r2, #(0x80 << 16) // rebias the summed exponent - - // Compress the double-word product into just the high-order word r3, by - // setting its bit 0 if any bit of the low-order word is nonzero. This - // changes the represented value, but not by nearly enough to affect - // rounding, because rounding only depends on the bit below the last output - // bit, and the general question of whether _any_ nonzero bit exists below - // that. - cmp r1, #0 // if low word of full product is nonzero - orrne r3, r3, #1 // then set LSB of high word - - // The two inputs to UMULL had their high bits set, that is, were at least - // 0x80000000. So the 64-bit product was at least 0x4000000000000000, i.e. - // the high bit of the product could be at the top of the word or one bit - // below. Check which, by experimentally shifting left, and then undoing it - // via RRX if we turned out to have shifted off a 1 bit. - lsls r3, r3, #1 // shift left, setting C to the bit shifted off - rrxcs r3, r3 // if that bit was 1, put it back again - - // That ensured the leading 1 bit of the product is now the top of r3, but - // also, set C if the leading 1 was _already_ in the top bit. So now we know - // whether to increment the exponent. The following instruction does the - // conditional increment (because it's ADC), but also, copies the exponent - // field from bit 16 of r2 into bit 0, so as to place it just below the - // output sign bit. - // - // So, if the number hasn't overflowed or underflowed, the low 9 bits of r2 - // are exactly what we need to combine with the rounded mantissa. But the - // full output exponent (with extra bits) is still available in the high half - // of r2, so that we can check _whether_ we overflowed or underflowed. - adc r2, r2, r2, asr #16 - - // Recombine the exponent and mantissa, doing most of the rounding as a side - // effect: we shift the mantissa right so as to put the round bit into C, and - // then we recombine with the exponent using ADC, to increment the mantissa - // if C was set. - movs r12, r3, lsr #8 - adc r0, r12, r2, lsl #23 - - // To complete the rounding, we must check for the round-to-even tiebreaking - // case, by checking if we're in the exact halfway case, which occurs if and - // only if we _did_ round up (we can tell this because C is still set from - // the MOVS), and also, no bit of r3 is set _below_ the round bit. - // - // We combine this with an overflow check, so that C ends up set if anything - // weird happened, and clear if we're completely finished and can return. - // - // The best instruction sequence for this part varies between Arm and Thumb. -#if !__thumb__ - // Arm state: if C was set then we check the low bits of r3, so that Z ends - // up set if we need to round to even. - // - // (We rely here on Z reliably being clear to begin with, because shifting - // down the output mantissa definitely gave a nonzero output. Also, the TST - // doesn't change C, so if Z does end up set, then C was also set.) - // - // Then, if we're not rounding to even, we do a CMP which sets C if there's - // been an overflow or an underflow. An overflow could occur for an output - // exponent as low as 0xFC, because we might increment the exponent by 1 when - // renormalizing, by another when recombining with the mantissa, and by one - // more if rounding up causes a carry off the top of the mantissa. An - // underflow occurs only if the output exponent is negative (because it's - // offset by 1, so an exponent of 0 will be incremented to 1), in which case - // the top 8 bits of r2 will all be set. Therefore, an unsigned comparison to - // see if r2 > 0xFC0000 will catch all overflow and underflow cases. It also - // catches a few very large cases that _don't_ quite overflow (exponents of - // 0xFC and above that don't get maximally unlucky); those will also be - // handled by the slow path. - tstcs r3, #0x7F - cmpne r2, #0xFC0000 -#else - // In Thumb, switching between different conditions has a higher cost due to - // the (implicit in this code) IT instructions, so we prefer a strategy that - // uses CC and CS conditions throughout, at the cost of requiring some extra - // cleanup instructions on the slow path. - // - // If C is set (and hence round-to-even is a possibility), the basic idea is - // to shift the full result word (r3) left by 25, leaving only its bottom 7 - // bits, which are now the top 7 bits; then we want to set C iff these are 0. - // - // The "CMP x,y" instruction sets C if y > x (as unsigned integers). So this - // could be done in one instruction if only we had a register to use as x, - // which has 0 in the top 7 bits and at least one nonzero. Then we could - // compare that against the shifted-up value of r3, setting C precisely if - // the top 7 bits of y are greater than 0. And happily, we _do_ have such a - // register! r12 contains the shifted-down mantissa, which is guaranteed to - // have a 1 in bit 23, and 0 above that. - // - // The shift of r3 happens only in the second operand of the compare, so we - // don't lose the original value of r3 in this process. - // - // The check for over/underflow is exactly as in the Arm branch above, except - // based on a different condition. - cmpcs r12, r3, lsl #25 // now C is set iff we're rounding to even - cmpcc r2, #0xFC0000 // and now it's also set if we've over/underflowed -#endif - - // That's all the checks for difficult cases done. If C is clear, we can - // return. - bxcc lr - - // Now the slower path begins. We have to recover enough information to - // handle all of round-to-even, overflow and underflow. - // - // Round to even is the most likely of these, so we detect it first and - // handle it as fast as possible. - -#if __thumb__ - // First, Thumb-specific compensation code. The Arm branch of the #if above - // will have set Z=0 to indicate round to even, but the Thumb branch didn't - // leave any unambiguous indicator of RTE, so we must retest by checking all - // the bits shifted off the bottom of the mantissa to see if they're exactly - // the half-way value. - lsl r12, r3, #24 // r12 = round bit and everything below - cmp r12, #0x80000000 // set Z if that is exactly 0x80000000 -#endif - - // Now Z is clear iff we have already rounded up and now must replace that - // with rounding to even, which is done by just clearing the low bit of the - // mantissa. - biceq r0, r0, #1 - - // Redo the over/underflow check (the same way as in both branches above), - // and if it doesn't report a danger, we can return the rounded-to-even - // answer. - cmp r2, #0xFC0000 // check for over/underflow - bxcc lr // and return if none. - - // Now we only have overflow and underflow left to handle. First, find out - // which we're looking at. This is easy by testing the top bit of r2, but - // even easier by using the fact that the possible positive and negative - // values of r2 are widely enough separated that the 0xFC0000 subtracted by - // the CMP above won't have made any difference. So the N flag output from - // that comparison _already_ tells us which condition we have: if N is set we - // have underflow, and if N is clear, overflow. - bpl LOCAL_LABEL(overflow) - - // Here we're handling underflow. - - // Add the IEEE 754:1985 exponent bias which funder will expect. This also - // brings the exponent back into a range where it can't possibly have carried - // into the sign bit, so the output sign will now be right. - add r0, r0, #(0xC0 << 23) - - // Determine whether we rounded up, down or not at all. - lsls r2, r3, #1 // input mantissa, without its leading 1 - subs r1, r2, r0, lsl #9 // subtract the output mantissa (likewise) - - // And let funder handle the rest. - b SYMBOL_NAME(__compiler_rt_funder) - -LOCAL_LABEL(overflow): - // We come here to handle overflow, but it's not guaranteed that an overflow - // has actually happened: our check on the fast path erred on the side of - // caution, by catching any output exponent that _could_ cause an overflow. - // So first check whether this really is an overflow, by extracting the - // output exponent. Exponent 0xFF, or anything that wrapped round to having - // the high bit clear, are overflows; 0xFE down to 0xFC are not overflows. - // - // The value in r0 is correct to return, if there's no overflow. - add r12, r0, #(1 << 23) // add 1 to the exponent so 0xFF wraps to 0 - movs r12, r12, lsl #1 // test the top bit of the modified value - bxmi lr // if top bit is still 1, not an overflow - - // This is an overflow, so we need to replace it with an appropriately signed - // infinity. First we correct the sign by applying a downward bias to the - // exponent (the one suggested in IEEE 754:1985, which was chosen to bring - // all possible overflowed results back into range). - subs r0, r0, #(0xC0 << 23) - - // Now the sign bit of r0 is correct. Replace everything else with the - // encoding of an infinity. - mov r1, #0xFF - and r0, r0, #0x80000000 - orr r0, r0, r1, lsl #23 - bx lr - -LOCAL_LABEL(uncommon): - // Handle zeros, denorms, infinities and NaNs. We arrive here knowing that - // we've at least done the first _two_ instructions from the entry point, - // even if all the rest were skipped. So r2 contains the sign and exponent of - // x in bits 16..23, and r12 = 0xFF << 16. - // - // So, first repeat some instructions from the prologue, which were either - // conditionally skipped in the sequence leading to the branch, or skipped - // because they happened after the branch. - and r3, r12, r1, lsr #7 // get exponent of y in r3 bits 16..23 - teq r0, r1 // calculate the sign of the result - orrmi r2, r2, #0x100 // and put it in bit 8 of r2 as before - - // Check for infinities and NaNs, by testing each of r2,r3 to see if it's at - // least 0xFF0000 (hence the exponent field is equal to 0xFF). - cmp r2, r12 - cmplo r3, r12 - bhs LOCAL_LABEL(inf_NaN) - - // If we didn't take that branch, then we have only finite numbers, but at - // least one is denormal or zero. A zero makes the result easy (and also is a - // more likely input than a denormal), so check those first, as fast as - // possible. - movs r12, r0, lsl #1 // Z set if x == 0 - movsne r12, r1, lsl #1 // now Z set if either input is 0 - moveq r0, r2, lsl #23 // in either case, make 0 of the output sign - bxeq lr // and return it - - // Now we know we only have denormals to deal with. Call fnorm2 to sort - // them out, and rejoin the main code path above. - and r12, r2, #0x100 // save the result sign from r2 - lsr r2, #16 // shift extracted exponents down to bit 0 - lsr r3, #16 // where fnorm2 will expect them - push {r0, r1, r2, r3, r12, lr} - mov r0, sp // tell fnorm2 where to find its data - bl SYMBOL_NAME(__compiler_rt_fnorm2) - pop {r0, r1, r2, r3, r12, lr} - lsl r3, #16 // shift exponents back up to bit 16 - orr r2, r12, r2, lsl #16 // and put the result sign back in r2 - b LOCAL_LABEL(mul) - -LOCAL_LABEL(inf_NaN): - // We come here if at least one input is a NaN or infinity. If either or both - // inputs are NaN then we hand off to fnan2 which will propagate a NaN from - // the input; otherwise any multiplication involving infinity returns - // infinity, unless it's infinity * 0 which is an invalid operation and - // returns NaN again. - mov r12, #0xFF000000 - cmp r12, r0, lsl #1 // if (r0 << 1) > 0xFF000000, r0 is a NaN - blo SYMBOL_NAME(__compiler_rt_fnan2) - cmp r12, r1, lsl #1 - blo SYMBOL_NAME(__compiler_rt_fnan2) - - // NaNs are dealt with, so now we have at least one infinity. Check if the - // other operand is 0. This is conveniently done by XORing the two: because - // we know that the low 31 bits of one operand are exactly 0x7F800000, we can - // test if the low 31 bits of the other one are all 0 by checking whether the - // low 31 bits of (x XOR y) equal 0x7F800000. - eor r3, r0, r1 - cmp r12, r3, lsl #1 // if inf * 0, this sets Z - lsr r0, r12, #1 // set up return value of +infinity - orrne r0, r0, r2, lsl #23 // if not inf * 0, put on the output sign - orreq r0, r0, #0x400000 // otherwise, set the 'quiet NaN' bit - bx lr // and return - -END_COMPILERRT_FUNCTION(__mulsf3) - -NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/thumb1/mulsf3.S b/compiler-rt/lib/builtins/arm/thumb1/mulsf3.S deleted file mode 100644 index f2ede1013a9e6..0000000000000 --- a/compiler-rt/lib/builtins/arm/thumb1/mulsf3.S +++ /dev/null @@ -1,251 +0,0 @@ -//===-- mulsf3.S - single-precision floating point multiplication ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements single-precision soft-float multiplication with the -// IEEE-754 default rounding (to nearest, ties to even), in optimized Thumb1 -// assembly language. -// -//===----------------------------------------------------------------------===// - -#include "../../assembly.h" - - .syntax unified - .text - .thumb - .p2align 2 - -DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fmul, __mulsf3) - -DEFINE_COMPILERRT_THUMB_FUNCTION(__mulsf3) - push {r4,r5,r6,lr} - - // Get exponents of the inputs, and check for uncommon values. In the process - // of this we also compute the sign, because it's marginally quicker that - // way. - lsls r2, r0, #1 - adcs r4, r4, r4 // set r4[0] to sign bit of x - lsls r3, r1, #1 - adcs r4, r4, r3 // set r4[0] to the output sign - lsrs r2, r2, #24 - beq LOCAL_LABEL(zerodenorm0) // still do the next LSRS - lsrs r3, r3, #24 - beq LOCAL_LABEL(zerodenorm) - cmp r2, #255 - beq LOCAL_LABEL(naninf) - cmp r3, #255 - beq LOCAL_LABEL(naninf) - // Compute the output exponent. We'll be generating our product _without_ the - // leading bit, so we subtract 0x7f rather than 0x80. - adds r2, r2, r3 - subs r2, r2, #0x7f - // Blank off everything above the mantissas. - lsls r0, r0, #9 - lsls r1, r1, #9 -LOCAL_LABEL(normalised): // we may come back here from zerodenorm - lsrs r0, r0, #9 - lsrs r1, r1, #9 - // Multiply. r0 and r1 are the mantissas of the inputs but without their - // leading bits, so the product we want in principle is P=(r0+2^23)(r1+2^23). - // P is at most (2^24-1)^2 < 2^48, so it fits in a word and a half. - // - // The technique below will actually compute P - 2^46, by not adding on the - // term where the two 2^23 are multiplied. The 48-bit result will be - // delivered in two output registers, one containing its bottom 32 bits and - // the other containing the top 32, so they overlap in the middle 16 bits. - // This is done using only two multiply instructions and some bookkeeping. - // - // In the comments I'll write X and Y for the original input mantissas (again - // without their leading bits). I'll also decompose them as X = xh + xl and - // Y = yh + yl, where xl and yl are in the range 0..2^8-1 and xh,yh are - // multiples of 2^8. - adds r5, r0, r1 - lsls r5, r5, #7 // r5 = (X+Y) << 7 - movs r6, r0 - muls r6, r1, r6 // r6 is congruent mod 2^32 to X*Y - lsrs r0, r0, #8 - lsrs r1, r1, #8 - muls r0, r1, r0 - lsls r1, r0, #16 // r1 is congruent mod 2^32 to xh*yh - subs r3, r6, r1 // now r3 is congruent mod 2^32 to - // (X*Y) - (xh*yh) = xh*yl + xl*yh + xl*yl - // and hence, since that is at most 0xfeff0001, - // is _exactly_ equal to that - adds r0, r0, r5 // r0 is now (xh*yh + (X+Y)<<23) >> 16 - lsrs r1, r3, #16 // r1 is the top 16 bits of r3, i.e. - // (xh*yl + xl*yh + xl*yl) >> 16 - adds r3, r0, r1 // now r3 equals - // (xh*yh + xh*yl + xl*yh + xl*yl + (X+Y)<<23) >> 16 - // i.e. (X*Y + (X+Y)<<23) >> 16, - // i.e. (the right answer) >> 16. - // Meanwhile, r6 is exactly the bottom 32 bits of the - // right answer. - // Renormalise if necessary. - lsrs r1, r3, #30 - beq LOCAL_LABEL(norenorm) - // Here we have to do something fiddly. Renormalisation would be a trivial - // job if we had the leading mantissa bit - just note that it's one bit - // position above where it should be, and shift right by one. But without - // that bit, we currently have (2x - 2^30), and we want (x - 2^30); just - // shifting right would of course give us (x - 2^29), so we must subtract an - // extra 2^29 to fix this up. - lsrs r3, r3, #1 - movs r1, #1 - lsls r1, r1, #29 - subs r3, r3, r1 - adds r2, r2, #1 -LOCAL_LABEL(norenorm): - // Round and shift down to the right bit position. - lsrs r0, r3, #7 // round bit goes into the carry flag - bcc LOCAL_LABEL(rounded) - adds r0, r0, #1 - // In the round-up branch, we must also check if we have to round to even, by - // testing all the bits below the round bit. We will normally not expect to, - // so we do RTE by branching out of line and back again to avoid spending a - // branch in the common case. - lsls r5, r3, #32-7+1 // check the bits shifted out of r3 above - bne LOCAL_LABEL(rounded) // if any is nonzero, we're not rounding to even - lsls r5, r6, #15 // check the bottom 17 bits of the low-order 32 - // (enough to overlap r3 even if we renormalised) - beq LOCAL_LABEL(rte) // if any is nonzero, fall through, else RTE -LOCAL_LABEL(rounded): - // Put on the sign and exponent, check for underflow and overflow, and - // return. - // - // Underflow occurs iff r2 (the output exponent) <= 0. Overflow occurs if - // it's >= 0xFF. (Also if it's 0xFE and we rounded up to overflow, but since - // this code doesn't report exceptions, we can ignore this case because it'll - // happen to return the right answer regardless). So we handle most of this - // via an unsigned comparison against 0xFF, which leaves the one case of a - // zero exponent that we have to filter separately by testing the Z flag - // after we shift the exponent back up into place. - cmp r2, #0xFF // check for most over/underflows - bhs LOCAL_LABEL(outflow) // ... and branch out of line for them - lsls r5, r2, #23 // shift the exponent into its output location - beq LOCAL_LABEL(outflow) // ... and branch again if it was 0 - lsls r4, r4, #31 // shift the output sign into place - orrs r0, r0, r4 // and OR it in to the output - adds r0, r0, r5 // OR in the mantissa - pop {r4,r5,r6,pc} // and return - -LOCAL_LABEL(rte): - // Out-of-line handler for the round-to-even case. Clear the low mantissa bit - // and go back to the post-rounding code. - movs r5, #1 - bics r0, r0, r5 - b LOCAL_LABEL(rounded) - -LOCAL_LABEL(outflow): - cmp r2, #0 - bgt LOCAL_LABEL(overflow) - // To handle underflow, we construct an intermediate value in the IEEE 754 - // style (using our existing full-length mantissa, and bias the exponent by - // +0xC0), and indicate whether that intermediate was rounded up, down or not - // at all. Then call the helper function funder, which will denormalise and - // re-round correctly. - lsls r1, r0, #7 // shift up the post-rounding mantissa - subs r1, r3, r1 // and subtract it from the pre-rounding version - lsls r6, r6, #15 - cmp r6, #1 // if the rest of the low bits are nonzero - adcs r1, r1, r1 // then set an extra bit at the bottom - - lsls r4, r4, #31 - orrs r0, r0, r4 // put on the sign - adds r2, r2, #192 // bias the exponent - lsls r3, r2, #23 - adds r0, r0, r3 // put on the biased exponent - - bl SYMBOL_NAME(__compiler_rt_funder) - pop {r4,r5,r6,pc} - -LOCAL_LABEL(overflow): - // Handle overflow by returning an infinity of the correct sign. - lsls r4, r4, #8 // move the sign up to bit 8 - movs r0, #0xff - orrs r0, r0, r4 // fill in an exponent just below it - lsls r0, r0, #23 // and shift those 9 bits up to the top of the word - pop {r4,r5,r6,pc} - - // We come here if there's at least one zero or denormal. On the fast path - // above, it was convenient to check these before checking NaNs and - // infinities, but NaNs take precedence, so now we're off the fast path, we - // must still check for those. - // - // At the main entry point 'zerodenorm' we want r2 and r3 to be the two input - // exponents. So if we branched after shifting-and-checking r2, we come to - // this earlier entry point 'zerodenorm0' so that we still shift r3. -LOCAL_LABEL(zerodenorm0): - lsrs r3, r3, #24 -LOCAL_LABEL(zerodenorm): - cmp r2, #255 - beq LOCAL_LABEL(naninf) - cmp r3, #255 - beq LOCAL_LABEL(naninf) - // Now we know we have at least one zero or denormal, and no NaN or infinity. - // Check if either input is actually zero. We've ruled out 0 * infinity by - // this point, so any zero input means we return zero of the correct sign. - lsls r6, r0, #1 // is one input zero? - beq LOCAL_LABEL(zero) // yes, go and return zero - lsls r6, r1, #1 // is the other one zero? - bne LOCAL_LABEL(denorm) // if not, one must have been a denormal -LOCAL_LABEL(zero): - lsls r0, r4, #31 // shift up the output sign to make the return value - pop {r4,r5,r6,pc} - - // Handle denormals via the helper function fnorm2, which will break both - // inputs up into mantissa and exponent, renormalising and generating a - // negative exponent if necessary. -LOCAL_LABEL(denorm): - push {r0,r1,r2,r3} - mov r0, sp - bl SYMBOL_NAME(__compiler_rt_fnorm2) - pop {r0,r1,r2,r3} - // Convert fnorm2's return values into the right form to rejoin the main - // code path. - lsls r0, r0, #1 - lsls r1, r1, #1 - adds r2, r2, r3 - subs r2, r2, #0x7f - b LOCAL_LABEL(normalised) - - // We come here if at least one input is a NaN or infinity. There may still - // be zeroes (or denormals, though they make no difference at this stage). -LOCAL_LABEL(naninf): - movs r6, #0xff - lsls r6, r6, #24 - lsls r5, r0, #1 - cmp r5, r6 - bhi LOCAL_LABEL(nan) // first operand is a NaN - lsls r5, r1, #1 - cmp r5, r6 - bhi LOCAL_LABEL(nan) // second operand is a NaN - - // We know we have at least one infinity, and no NaNs. We might also have a - // zero, in which case we return the default quiet NaN. - lsls r6, r0, #1 - beq LOCAL_LABEL(infzero) // if r0 is a zero, r1 must be inf - lsls r6, r1, #1 - beq LOCAL_LABEL(infzero) // if r1 is a zero, r0 must be inf - // Otherwise we have infinity * infinity, or infinity * finite. Just return - // an appropriately signed infinity. - b LOCAL_LABEL(overflow) // reuse the code there - - // We come here if at least one input is a NaN. Hand off to fnan2, which - // propagates an appropriate NaN to the output, dealing with the special - // cases of signalling/quiet NaNs. -LOCAL_LABEL(nan): - bl SYMBOL_NAME(__compiler_rt_fnan2) - pop {r4,r5,r6,pc} - - // Return a quiet NaN as the result of infinity * zero. -LOCAL_LABEL(infzero): - ldr r0, =0x7fc00000 - pop {r4,r5,r6,pc} - -END_COMPILERRT_FUNCTION(__mulsf3) - -NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/test/builtins/CMakeLists.txt b/compiler-rt/test/builtins/CMakeLists.txt index 8e3cb35183ba7..63f4c94605c90 100644 --- a/compiler-rt/test/builtins/CMakeLists.txt +++ b/compiler-rt/test/builtins/CMakeLists.txt @@ -35,10 +35,6 @@ if(APPLE) darwin_filter_host_archs(BUILTIN_SUPPORTED_ARCH BUILTIN_TEST_ARCH) endif() -if(COMPILER_RT_ARM_OPTIMIZED_FP) - list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_ARM_OPTIMIZED_FP) -endif() - foreach(arch ${BUILTIN_TEST_ARCH}) set(BUILTINS_TEST_TARGET_ARCH ${arch}) string(TOLOWER "-${arch}-${OS_NAME}" BUILTINS_TEST_CONFIG_SUFFIX) diff --git a/compiler-rt/test/builtins/Unit/divsf3_test.c b/compiler-rt/test/builtins/Unit/divsf3_test.c index 12c5df5fdaae1..f8cb6169ac283 100644 --- a/compiler-rt/test/builtins/Unit/divsf3_test.c +++ b/compiler-rt/test/builtins/Unit/divsf3_test.c @@ -1,428 +1,115 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - // RUN: %clang_builtins %s %librt -o %t && %run %t // REQUIRES: librt_has_divsf3 #include "int_lib.h" -#include #include #include "fp_test.h" -// By default this test uses compareResultF to check the returned floats, which -// accepts any returned NaN if the expected result is the canonical NaN value -// 0x7fc00000. For the Arm optimized FP implementation, which commits to a more -// detailed handling of NaNs, we tighten up the check and include some extra -// test cases specific to that NaN policy. -#if (__arm__ && !(__thumb__ && !__thumb2__)) && COMPILER_RT_ARM_OPTIMIZED_FP -# define EXPECT_EXACT_RESULTS -# define ARM_NAN_HANDLING -#endif - // Returns: a / b COMPILER_RT_ABI float __divsf3(float a, float b); -int test__divsf3(uint32_t a_rep, uint32_t b_rep, uint32_t expected_rep) { - float a = fromRep32(a_rep), b = fromRep32(b_rep); - float x = __divsf3(a, b); -#ifdef EXPECT_EXACT_RESULTS - int ret = toRep32(x) == expected_rep; -#else - int ret = compareResultF(x, expected_rep); -#endif +int test__divsf3(float a, float b, uint32_t expected) +{ + float x = __divsf3(a, b); + int ret = compareResultF(x, expected); - if (ret) { - printf("error in test__divsf3(%08" PRIx32 ", %08" PRIx32 ") = %08" PRIx32 - ", expected %08" PRIx32 "\n", - a_rep, b_rep, toRep32(x), expected_rep); - } - return ret; + if (ret){ + printf("error in test__divsf3(%.20e, %.20e) = %.20e, " + "expected %.20e\n", a, b, x, + fromRep32(expected)); + } + return ret; } -int main(void) { - int status = 0; +int main() +{ + // Returned NaNs are assumed to be qNaN by default + + // qNaN / any = qNaN + if (test__divsf3(makeQNaN32(), 3.F, UINT32_C(0x7fc00000))) + return 1; + // NaN / any = NaN + if (test__divsf3(makeNaN32(UINT32_C(0x123)), 3.F, UINT32_C(0x7fc00000))) + return 1; + // any / qNaN = qNaN + if (test__divsf3(3.F, makeQNaN32(), UINT32_C(0x7fc00000))) + return 1; + // any / NaN = NaN + if (test__divsf3(3.F, makeNaN32(UINT32_C(0x123)), UINT32_C(0x7fc00000))) + return 1; + + // +Inf / positive = +Inf + if (test__divsf3(makeInf32(), 3.F, UINT32_C(0x7f800000))) + return 1; + // +Inf / negative = -Inf + if (test__divsf3(makeInf32(), -3.F, UINT32_C(0xff800000))) + return 1; + // -Inf / positive = -Inf + if (test__divsf3(makeNegativeInf32(), 3.F, UINT32_C(0xff800000))) + return 1; + // -Inf / negative = +Inf + if (test__divsf3(makeNegativeInf32(), -3.F, UINT32_C(0x7f800000))) + return 1; + + // Inf / Inf = NaN + if (test__divsf3(makeInf32(), makeInf32(), UINT32_C(0x7fc00000))) + return 1; + // 0.0 / 0.0 = NaN + if (test__divsf3(+0x0.0p+0F, +0x0.0p+0F, UINT32_C(0x7fc00000))) + return 1; + // +0.0 / +Inf = +0.0 + if (test__divsf3(+0x0.0p+0F, makeInf32(), UINT32_C(0x0))) + return 1; + // +Inf / +0.0 = +Inf + if (test__divsf3(makeInf32(), +0x0.0p+0F, UINT32_C(0x7f800000))) + return 1; + + // positive / +0.0 = +Inf + if (test__divsf3(+1.F, +0x0.0p+0F, UINT32_C(0x7f800000))) + return 1; + // positive / -0.0 = -Inf + if (test__divsf3(+1.F, -0x0.0p+0F, UINT32_C(0xff800000))) + return 1; + // negative / +0.0 = -Inf + if (test__divsf3(-1.F, +0x0.0p+0F, UINT32_C(0xff800000))) + return 1; + // negative / -0.0 = +Inf + if (test__divsf3(-1.F, -0x0.0p+0F, UINT32_C(0x7f800000))) + return 1; + + // 1/3 + if (test__divsf3(1.F, 3.F, UINT32_C(0x3eaaaaab))) + return 1; + // smallest normal result + if (test__divsf3(0x1.0p-125F, 2.F, UINT32_C(0x00800000))) + return 1; - status |= test__divsf3(0x00000000, 0x00000001, 0x00000000); - status |= test__divsf3(0x00000000, 0x007fffff, 0x00000000); - status |= test__divsf3(0x00000000, 0x00800000, 0x00000000); - status |= test__divsf3(0x00000000, 0x00ffffff, 0x00000000); - status |= test__divsf3(0x00000000, 0x3f800000, 0x00000000); - status |= test__divsf3(0x00000000, 0x40a00000, 0x00000000); - status |= test__divsf3(0x00000000, 0x7effffff, 0x00000000); - status |= test__divsf3(0x00000000, 0x7f000000, 0x00000000); - status |= test__divsf3(0x00000000, 0x7f800000, 0x00000000); - status |= test__divsf3(0x00000000, 0x80000002, 0x80000000); - status |= test__divsf3(0x00000000, 0x807fffff, 0x80000000); - status |= test__divsf3(0x00000000, 0x80800001, 0x80000000); - status |= test__divsf3(0x00000000, 0x81000000, 0x80000000); - status |= test__divsf3(0x00000000, 0xc0400000, 0x80000000); - status |= test__divsf3(0x00000000, 0xc0e00000, 0x80000000); - status |= test__divsf3(0x00000000, 0xfe7fffff, 0x80000000); - status |= test__divsf3(0x00000000, 0xff000000, 0x80000000); - status |= test__divsf3(0x00000000, 0xff800000, 0x80000000); - status |= test__divsf3(0x00000001, 0x00000000, 0x7f800000); - status |= test__divsf3(0x00000001, 0x3e000000, 0x00000008); - status |= test__divsf3(0x00000001, 0x3f000000, 0x00000002); - status |= test__divsf3(0x00000001, 0x40000000, 0x00000000); - status |= test__divsf3(0x00000001, 0x7f7fffff, 0x00000000); - status |= test__divsf3(0x00000001, 0x7f800000, 0x00000000); - status |= test__divsf3(0x00000001, 0xc0000000, 0x80000000); - status |= test__divsf3(0x00000001, 0xff7fffff, 0x80000000); - status |= test__divsf3(0x00000002, 0x80000000, 0xff800000); - status |= test__divsf3(0x00000002, 0xff800000, 0x80000000); - status |= test__divsf3(0x00000009, 0x41100000, 0x00000001); - status |= test__divsf3(0x00000009, 0xc1100000, 0x80000001); - status |= test__divsf3(0x007ffff7, 0x3f7ffffe, 0x007ffff8); - status |= test__divsf3(0x007ffffe, 0x3f7ffffe, 0x007fffff); - status |= test__divsf3(0x007fffff, 0x00000000, 0x7f800000); - status |= test__divsf3(0x007fffff, 0x3b000000, 0x04fffffe); - status |= test__divsf3(0x007fffff, 0x3f000000, 0x00fffffe); - status |= test__divsf3(0x007fffff, 0x3f800000, 0x007fffff); - status |= test__divsf3(0x007fffff, 0x3f800002, 0x007ffffd); - status |= test__divsf3(0x007fffff, 0x7f800000, 0x00000000); - status |= test__divsf3(0x007fffff, 0x80000000, 0xff800000); - status |= test__divsf3(0x007fffff, 0xbf800000, 0x807fffff); - status |= test__divsf3(0x007fffff, 0xff800000, 0x80000000); - status |= test__divsf3(0x00800000, 0x00000000, 0x7f800000); - status |= test__divsf3(0x00800000, 0x3f800001, 0x007fffff); - status |= test__divsf3(0x00800000, 0x7f800000, 0x00000000); - status |= test__divsf3(0x00800001, 0x3f800002, 0x007fffff); - status |= test__divsf3(0x00800001, 0x80000000, 0xff800000); - status |= test__divsf3(0x00800001, 0xff800000, 0x80000000); - status |= test__divsf3(0x00800002, 0x3f800006, 0x007ffffc); - status |= test__divsf3(0x00fffffe, 0x40000000, 0x007fffff); - status |= test__divsf3(0x00ffffff, 0x00000000, 0x7f800000); - status |= test__divsf3(0x00ffffff, 0x40000000, 0x00800000); - status |= test__divsf3(0x00ffffff, 0x7f800000, 0x00000000); - status |= test__divsf3(0x01000000, 0x00800000, 0x40000000); - status |= test__divsf3(0x01000000, 0x80000000, 0xff800000); - status |= test__divsf3(0x01000000, 0xc0000000, 0x80800000); - status |= test__divsf3(0x01000000, 0xff800000, 0x80000000); - status |= test__divsf3(0x01000001, 0x00800001, 0x40000000); - status |= test__divsf3(0x01000001, 0xc0000000, 0x80800001); - status |= test__divsf3(0x01000003, 0x80800003, 0xc0000000); - status |= test__divsf3(0x01000003, 0xc0000000, 0x80800003); - status |= test__divsf3(0x3f7ffff7, 0x3f7ffffb, 0x3f7ffffc); - status |= test__divsf3(0x3f7ffff7, 0x3f7ffffe, 0x3f7ffff9); - status |= test__divsf3(0x3f7ffff8, 0x3f7ffffc, 0x3f7ffffc); - status |= test__divsf3(0x3f7ffff8, 0x3f7ffffd, 0x3f7ffffb); - status |= test__divsf3(0x3f7ffffa, 0x3f7ffff9, 0x3f800001); - status |= test__divsf3(0x3f7ffffb, 0x3f7ffff9, 0x3f800001); - status |= test__divsf3(0x3f7ffffc, 0x3f7ffff9, 0x3f800002); - status |= test__divsf3(0x3f7ffffc, 0x3f7ffffd, 0x3f7fffff); - status |= test__divsf3(0x3f7ffffc, 0x3f7ffffe, 0x3f7ffffe); - status |= test__divsf3(0x3f7ffffc, 0x3f7fffff, 0x3f7ffffd); - status |= test__divsf3(0x3f7ffffc, 0x3f800001, 0x3f7ffffa); - status |= test__divsf3(0x3f7ffffd, 0x3f7ffff9, 0x3f800002); - status |= test__divsf3(0x3f7ffffd, 0x3f7ffffc, 0x3f800001); - status |= test__divsf3(0x3f7ffffd, 0x3f7ffffe, 0x3f7fffff); - status |= test__divsf3(0x3f7ffffd, 0x3f7fffff, 0x3f7ffffe); - status |= test__divsf3(0x3f7ffffd, 0x3f800001, 0x3f7ffffb); - status |= test__divsf3(0x3f7ffffd, 0x3f800002, 0x3f7ffff9); - status |= test__divsf3(0x3f7ffffe, 0x3f7ffff9, 0x3f800003); - status |= test__divsf3(0x3f7ffffe, 0x3f7ffffc, 0x3f800001); - status |= test__divsf3(0x3f7ffffe, 0x3f7ffffd, 0x3f800001); - status |= test__divsf3(0x3f7ffffe, 0x3f7fffff, 0x3f7fffff); - status |= test__divsf3(0x3f7ffffe, 0x3f800001, 0x3f7ffffc); - status |= test__divsf3(0x3f7ffffe, 0x3f800002, 0x3f7ffffa); - status |= test__divsf3(0x3f7ffffe, 0x3f800003, 0x3f7ffff8); - status |= test__divsf3(0x3f7fffff, 0x3f7ffff9, 0x3f800003); - status |= test__divsf3(0x3f7fffff, 0x3f7ffffc, 0x3f800002); - status |= test__divsf3(0x3f7fffff, 0x3f7ffffd, 0x3f800001); - status |= test__divsf3(0x3f7fffff, 0x3f7ffffe, 0x3f800001); - status |= test__divsf3(0x3f7fffff, 0x3f800001, 0x3f7ffffd); - status |= test__divsf3(0x3f7fffff, 0x3f800002, 0x3f7ffffb); - status |= test__divsf3(0x3f7fffff, 0x3f800003, 0x3f7ffff9); - status |= test__divsf3(0x3f7fffff, 0x3f800004, 0x3f7ffff7); - status |= test__divsf3(0x3f800000, 0x00000000, 0x7f800000); - status |= test__divsf3(0x3f800000, 0x3f7ffff7, 0x3f800005); - status |= test__divsf3(0x3f800000, 0x3f7ffff8, 0x3f800004); - status |= test__divsf3(0x3f800000, 0x3f7ffffb, 0x3f800003); - status |= test__divsf3(0x3f800000, 0x3f7ffffc, 0x3f800002); - status |= test__divsf3(0x3f800000, 0x3f7ffffd, 0x3f800002); - status |= test__divsf3(0x3f800000, 0x3f7ffffe, 0x3f800001); - status |= test__divsf3(0x3f800000, 0x3f7fffff, 0x3f800001); - status |= test__divsf3(0x3f800000, 0x3f800000, 0x3f800000); - status |= test__divsf3(0x3f800000, 0x3f800001, 0x3f7ffffe); - status |= test__divsf3(0x3f800000, 0x3f800002, 0x3f7ffffc); - status |= test__divsf3(0x3f800000, 0x3f800003, 0x3f7ffffa); - status |= test__divsf3(0x3f800000, 0x3f800004, 0x3f7ffff8); - status |= test__divsf3(0x3f800000, 0x7f800000, 0x00000000); - status |= test__divsf3(0x3f800001, 0x3f7ffffb, 0x3f800004); - status |= test__divsf3(0x3f800001, 0x3f7ffffd, 0x3f800003); - status |= test__divsf3(0x3f800001, 0x3f7ffffe, 0x3f800002); - status |= test__divsf3(0x3f800001, 0x3f7fffff, 0x3f800002); - status |= test__divsf3(0x3f800001, 0x3f800002, 0x3f7ffffe); - status |= test__divsf3(0x3f800001, 0x3f800003, 0x3f7ffffc); - status |= test__divsf3(0x3f800002, 0x3f7ffffc, 0x3f800004); - status |= test__divsf3(0x3f800002, 0x3f7ffffd, 0x3f800004); - status |= test__divsf3(0x3f800002, 0x3f7ffffe, 0x3f800003); - status |= test__divsf3(0x3f800002, 0x3f7fffff, 0x3f800003); - status |= test__divsf3(0x3f800002, 0x3f800001, 0x3f800001); - status |= test__divsf3(0x3f800002, 0x3f800003, 0x3f7ffffe); - status |= test__divsf3(0x3f800003, 0x3f7ffffd, 0x3f800005); - status |= test__divsf3(0x3f800003, 0x3f7ffffe, 0x3f800004); - status |= test__divsf3(0x3f800003, 0x3f7fffff, 0x3f800004); - status |= test__divsf3(0x3f800003, 0x3f800001, 0x3f800002); - status |= test__divsf3(0x3f800004, 0x3f7ffffe, 0x3f800005); - status |= test__divsf3(0x3f800004, 0x3f800001, 0x3f800003); - status |= test__divsf3(0x3f800004, 0x3f800007, 0x3f7ffffa); - status |= test__divsf3(0x3f800005, 0x3f7fffff, 0x3f800006); - status |= test__divsf3(0x3f800006, 0x3f800008, 0x3f7ffffc); - status |= test__divsf3(0x3f800007, 0x3f800002, 0x3f800005); - status |= test__divsf3(0x3f800009, 0x3f800008, 0x3f800001); - status |= test__divsf3(0x40000000, 0x3f800000, 0x40000000); - status |= test__divsf3(0x40000000, 0xbf800000, 0xc0000000); - status |= test__divsf3(0x40400000, 0x80000000, 0xff800000); - status |= test__divsf3(0x40400000, 0xc0400000, 0xbf800000); - status |= test__divsf3(0x40400000, 0xff800000, 0x80000000); - status |= test__divsf3(0x40a00000, 0x00000000, 0x7f800000); - status |= test__divsf3(0x40a00000, 0x40a00000, 0x3f800000); - status |= test__divsf3(0x40a00000, 0x7f800000, 0x00000000); - status |= test__divsf3(0x40e00000, 0x80000000, 0xff800000); - status |= test__divsf3(0x40e00000, 0xff800000, 0x80000000); - status |= test__divsf3(0x41000000, 0x40000000, 0x40800000); - status |= test__divsf3(0x41100000, 0x40400000, 0x40400000); - status |= test__divsf3(0x7b000000, 0x05000000, 0x7f800000); - status |= test__divsf3(0x7e7fffff, 0x80000000, 0xff800000); - status |= test__divsf3(0x7efffffd, 0xc0000000, 0xfe7ffffd); - status |= test__divsf3(0x7effffff, 0x00000000, 0x7f800000); - status |= test__divsf3(0x7effffff, 0x7f800000, 0x00000000); - status |= test__divsf3(0x7f000000, 0x00000000, 0x7f800000); - status |= test__divsf3(0x7f000000, 0x007fffff, 0x7f800000); - status |= test__divsf3(0x7f000000, 0x3f000000, 0x7f800000); - status |= test__divsf3(0x7f000000, 0x40000000, 0x7e800000); - status |= test__divsf3(0x7f000000, 0x7f800000, 0x00000000); - status |= test__divsf3(0x7f000000, 0x80000000, 0xff800000); - status |= test__divsf3(0x7f000000, 0xbf000000, 0xff800000); - status |= test__divsf3(0x7f000000, 0xc0000000, 0xfe800000); - status |= test__divsf3(0x7f000000, 0xff800000, 0x80000000); - status |= test__divsf3(0x7f000003, 0xfe800003, 0xc0000000); - status |= test__divsf3(0x7f7ffffd, 0x40800000, 0x7e7ffffd); - status |= test__divsf3(0x7f7ffffd, 0xc0800000, 0xfe7ffffd); - status |= test__divsf3(0x7f7fffff, 0x00000001, 0x7f800000); - status |= test__divsf3(0x7f7fffff, 0x3f7fffff, 0x7f800000); - status |= test__divsf3(0x7f7fffff, 0x7e7fffff, 0x40800000); - status |= test__divsf3(0x7f7fffff, 0x7effffff, 0x40000000); - status |= test__divsf3(0x7f7fffff, 0xc0000000, 0xfeffffff); - status |= test__divsf3(0x7f7fffff, 0xfe7fffff, 0xc0800000); - status |= test__divsf3(0x7f7fffff, 0xff800000, 0x80000000); - status |= test__divsf3(0x7f800000, 0x00000000, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x00000001, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x007fffff, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x00800000, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x00ffffff, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x3f800000, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x40a00000, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x7effffff, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x7f000000, 0x7f800000); - status |= test__divsf3(0x7f800000, 0x80000000, 0xff800000); - status |= test__divsf3(0x7f800000, 0x80000002, 0xff800000); - status |= test__divsf3(0x7f800000, 0x807fffff, 0xff800000); - status |= test__divsf3(0x7f800000, 0x80800001, 0xff800000); - status |= test__divsf3(0x7f800000, 0x81000000, 0xff800000); - status |= test__divsf3(0x7f800000, 0xc0400000, 0xff800000); - status |= test__divsf3(0x7f800000, 0xc0e00000, 0xff800000); - status |= test__divsf3(0x7f800000, 0xfe7fffff, 0xff800000); - status |= test__divsf3(0x7f800000, 0xff000000, 0xff800000); - status |= test__divsf3(0x7f800000, 0xff7fffff, 0xff800000); - status |= test__divsf3(0x80000000, 0x00000003, 0x80000000); - status |= test__divsf3(0x80000000, 0x007fffff, 0x80000000); - status |= test__divsf3(0x80000000, 0x00800001, 0x80000000); - status |= test__divsf3(0x80000000, 0x01000000, 0x80000000); - status |= test__divsf3(0x80000000, 0x40000000, 0x80000000); - status |= test__divsf3(0x80000000, 0x40c00000, 0x80000000); - status |= test__divsf3(0x80000000, 0x7e7fffff, 0x80000000); - status |= test__divsf3(0x80000000, 0x7e800000, 0x80000000); - status |= test__divsf3(0x80000000, 0x7f800000, 0x80000000); - status |= test__divsf3(0x80000000, 0x80000004, 0x00000000); - status |= test__divsf3(0x80000000, 0x807fffff, 0x00000000); - status |= test__divsf3(0x80000000, 0x80800000, 0x00000000); - status |= test__divsf3(0x80000000, 0x80ffffff, 0x00000000); - status |= test__divsf3(0x80000000, 0xc0800000, 0x00000000); - status |= test__divsf3(0x80000000, 0xc1000000, 0x00000000); - status |= test__divsf3(0x80000000, 0xfe800000, 0x00000000); - status |= test__divsf3(0x80000000, 0xfeffffff, 0x00000000); - status |= test__divsf3(0x80000000, 0xff800000, 0x00000000); - status |= test__divsf3(0x80000001, 0x3f000000, 0x80000002); - status |= test__divsf3(0x80000001, 0x40000000, 0x80000000); - status |= test__divsf3(0x80000001, 0x7f7fffff, 0x80000000); - status |= test__divsf3(0x80000001, 0xc0000000, 0x00000000); - status |= test__divsf3(0x80000001, 0xff7fffff, 0x00000000); - status |= test__divsf3(0x80000003, 0x00000000, 0xff800000); - status |= test__divsf3(0x80000003, 0x7f800000, 0x80000000); - status |= test__divsf3(0x80000004, 0x80000000, 0x7f800000); - status |= test__divsf3(0x80000004, 0xff800000, 0x00000000); - status |= test__divsf3(0x807ffff8, 0x3f7ffffe, 0x807ffff9); - status |= test__divsf3(0x807fffff, 0x00000000, 0xff800000); - status |= test__divsf3(0x807fffff, 0x7f800000, 0x80000000); - status |= test__divsf3(0x807fffff, 0x80000000, 0x7f800000); - status |= test__divsf3(0x807fffff, 0xff800000, 0x00000000); - status |= test__divsf3(0x80800000, 0x3f800001, 0x807fffff); - status |= test__divsf3(0x80800000, 0x80000000, 0x7f800000); - status |= test__divsf3(0x80800000, 0xff800000, 0x00000000); - status |= test__divsf3(0x80800001, 0x00000000, 0xff800000); - status |= test__divsf3(0x80800001, 0x7f800000, 0x80000000); - status |= test__divsf3(0x80ffffff, 0x80000000, 0x7f800000); - status |= test__divsf3(0x80ffffff, 0xff800000, 0x00000000); - status |= test__divsf3(0x81000000, 0x00000000, 0xff800000); - status |= test__divsf3(0x81000000, 0x7f800000, 0x80000000); - status |= test__divsf3(0x81000001, 0x00800001, 0xc0000000); - status |= test__divsf3(0x81000005, 0x00800005, 0xc0000000); - status |= test__divsf3(0xbf800000, 0x3f800000, 0xbf800000); - status |= test__divsf3(0xbf800000, 0xbf800000, 0x3f800000); - status |= test__divsf3(0xc0000000, 0x00000000, 0xff800000); - status |= test__divsf3(0xc0000000, 0x3f800000, 0xc0000000); - status |= test__divsf3(0xc0000000, 0x7f800000, 0x80000000); - status |= test__divsf3(0xc0000000, 0xbf800000, 0x40000000); - status |= test__divsf3(0xc0800000, 0x80000000, 0x7f800000); - status |= test__divsf3(0xc0800000, 0xff800000, 0x00000000); - status |= test__divsf3(0xc0c00000, 0x00000000, 0xff800000); - status |= test__divsf3(0xc0c00000, 0x7f800000, 0x80000000); - status |= test__divsf3(0xc0c00000, 0xc0400000, 0x40000000); - status |= test__divsf3(0xc0e00000, 0x40e00000, 0xbf800000); - status |= test__divsf3(0xc1000000, 0x40000000, 0xc0800000); - status |= test__divsf3(0xc1000000, 0x80000000, 0x7f800000); - status |= test__divsf3(0xc1000000, 0xff800000, 0x00000000); - status |= test__divsf3(0xc1100000, 0xc0400000, 0x40400000); - status |= test__divsf3(0xfe7fffff, 0x00000000, 0xff800000); - status |= test__divsf3(0xfe7fffff, 0x7f800000, 0x80000000); - status |= test__divsf3(0xfe800000, 0x00000000, 0xff800000); - status |= test__divsf3(0xfe800000, 0x7f800000, 0x80000000); - status |= test__divsf3(0xfe800000, 0x80000000, 0x7f800000); - status |= test__divsf3(0xfe800000, 0xff800000, 0x00000000); - status |= test__divsf3(0xfeffffff, 0x40000000, 0xfe7fffff); - status |= test__divsf3(0xfeffffff, 0x80000000, 0x7f800000); - status |= test__divsf3(0xff000000, 0x3f000000, 0xff800000); - status |= test__divsf3(0xff000000, 0xbf000000, 0x7f800000); - status |= test__divsf3(0xff000001, 0x7e800001, 0xc0000000); - status |= test__divsf3(0xff7ffffd, 0x40800000, 0xfe7ffffd); - status |= test__divsf3(0xff7ffffd, 0xc0800000, 0x7e7ffffd); - status |= test__divsf3(0xff7fffff, 0x7e7fffff, 0xc0800000); - status |= test__divsf3(0xff7fffff, 0xfe7fffff, 0x40800000); - status |= test__divsf3(0xff7fffff, 0xff800000, 0x00000000); - status |= test__divsf3(0xff800000, 0x00000000, 0xff800000); - status |= test__divsf3(0xff800000, 0x00000003, 0xff800000); - status |= test__divsf3(0xff800000, 0x007fffff, 0xff800000); - status |= test__divsf3(0xff800000, 0x00800001, 0xff800000); - status |= test__divsf3(0xff800000, 0x01000000, 0xff800000); - status |= test__divsf3(0xff800000, 0x40000000, 0xff800000); - status |= test__divsf3(0xff800000, 0x40c00000, 0xff800000); - status |= test__divsf3(0xff800000, 0x7e800000, 0xff800000); - status |= test__divsf3(0xff800000, 0x80000000, 0x7f800000); - status |= test__divsf3(0xff800000, 0x80000004, 0x7f800000); - status |= test__divsf3(0xff800000, 0x807fffff, 0x7f800000); - status |= test__divsf3(0xff800000, 0x80800000, 0x7f800000); - status |= test__divsf3(0xff800000, 0x80ffffff, 0x7f800000); - status |= test__divsf3(0xff800000, 0xc0800000, 0x7f800000); - status |= test__divsf3(0xff800000, 0xc1000000, 0x7f800000); - status |= test__divsf3(0xff800000, 0xfe800000, 0x7f800000); - status |= test__divsf3(0xff800000, 0xff7fffff, 0x7f800000); - status |= test__divsf3(0x2cbed883, 0x333f6113, 0x38ff4953); - status |= test__divsf3(0x3f87ffff, 0x7f001000, 0x0043f781); + // divisor is exactly 1.0 + if (test__divsf3(0x1.0p+0F, 0x1.0p+0F, UINT32_C(0x3f800000))) + return 1; + // divisor is truncated to exactly 1.0 in UQ1.15 + if (test__divsf3(0x1.0p+0F, 0x1.0001p+0F, UINT32_C(0x3f7fff00))) + return 1; - // Test that the result of an operation is a NaN at all when it should be. - // - // In most configurations these tests' results are checked compared using - // compareResultF, so we set all the answers to the canonical NaN 0x7fc00000, - // which causes compareResultF to accept any NaN encoding. We also use the - // same value as the input NaN in tests that have one, so that even in - // EXPECT_EXACT_RESULTS mode these tests should pass, because 0x7fc00000 is - // still the exact expected NaN. - status |= test__divsf3(0x00000000, 0x00000000, 0x7fc00000); - status |= test__divsf3(0x00000000, 0x80000000, 0x7fc00000); - status |= test__divsf3(0x7f800000, 0x7f800000, 0x7fc00000); - status |= test__divsf3(0x7f800000, 0xff800000, 0x7fc00000); - status |= test__divsf3(0x80000000, 0x00000000, 0x7fc00000); - status |= test__divsf3(0x80000000, 0x80000000, 0x7fc00000); - status |= test__divsf3(0xff800000, 0x7f800000, 0x7fc00000); - status |= test__divsf3(0xff800000, 0xff800000, 0x7fc00000); - status |= test__divsf3(0x3f800000, 0x7fc00000, 0x7fc00000); - status |= test__divsf3(0x7fc00000, 0x3f800000, 0x7fc00000); - status |= test__divsf3(0x7fc00000, 0x7fc00000, 0x7fc00000); + // smallest normal value divided by 2.0 + if (test__divsf3(0x1.0p-126F, 2.0F, UINT32_C(0x00400000))) + return 1; + // smallest subnormal result + if (test__divsf3(0x1.0p-126F, 0x1p+23F, UINT32_C(0x00000001))) + return 1; -#ifdef ARM_NAN_HANDLING - // Tests specific to the NaN handling of Arm hardware, mimicked by - // arm/divsf3.S: - // - // - a quiet NaN is distinguished by the top mantissa bit being 1 - // - // - if a signalling NaN appears in the input, the output quiet NaN is - // obtained by setting its top mantissa bit and leaving everything else - // unchanged - // - // - if both operands are signalling NaNs then the output NaN is derived - // from the first operand - // - // - if both operands are quiet NaNs then the output NaN is the first - // operand - // - // - invalid operations not involving an input NaN return the quiet - // NaN with fewest bits set, 0x7fc00000. + // some misc test cases obtained by fuzzing against h/w implementation + if (test__divsf3(-0x1.3e75e6p-108F, -0x1.cf372p+38F, UINT32_C(0x00000006))) + return 1; + if (test__divsf3(0x1.e77c54p+81F, -0x1.e77c52p-47F, UINT32_C(0xff800000))) + return 1; + if (test__divsf3(0x1.fffffep-126F, 2.F, UINT32_C(0x00800000))) + return 1; - status |= test__divsf3(0x00000000, 0x00000000, 0x7fc00000); - status |= test__divsf3(0x00000000, 0x7fad4be3, 0x7fed4be3); - status |= test__divsf3(0x00000000, 0x7fdf48c7, 0x7fdf48c7); - status |= test__divsf3(0x00000000, 0x80000000, 0x7fc00000); - status |= test__divsf3(0x00000001, 0x7f970eba, 0x7fd70eba); - status |= test__divsf3(0x00000001, 0x7fc35716, 0x7fc35716); - status |= test__divsf3(0x007fffff, 0x7fbf52d6, 0x7fff52d6); - status |= test__divsf3(0x007fffff, 0x7fc7a2df, 0x7fc7a2df); - status |= test__divsf3(0x3f800000, 0x7f987a85, 0x7fd87a85); - status |= test__divsf3(0x3f800000, 0x7fc50124, 0x7fc50124); - status |= test__divsf3(0x7f7fffff, 0x7f95fd6f, 0x7fd5fd6f); - status |= test__divsf3(0x7f7fffff, 0x7ffc28dc, 0x7ffc28dc); - status |= test__divsf3(0x7f800000, 0x7f800000, 0x7fc00000); - status |= test__divsf3(0x7f800000, 0x7f8dd790, 0x7fcdd790); - status |= test__divsf3(0x7f800000, 0x7fd2ef2b, 0x7fd2ef2b); - status |= test__divsf3(0x7f800000, 0xff800000, 0x7fc00000); - status |= test__divsf3(0x7f99b09d, 0x00000000, 0x7fd9b09d); - status |= test__divsf3(0x7f93541e, 0x00000001, 0x7fd3541e); - status |= test__divsf3(0x7f9fc002, 0x007fffff, 0x7fdfc002); - status |= test__divsf3(0x7fb5db77, 0x3f800000, 0x7ff5db77); - status |= test__divsf3(0x7f9f5d92, 0x7f7fffff, 0x7fdf5d92); - status |= test__divsf3(0x7fac7a36, 0x7f800000, 0x7fec7a36); - status |= test__divsf3(0x7fb42008, 0x7fb0ee07, 0x7ff42008); - status |= test__divsf3(0x7f8bd740, 0x7fc7aaf1, 0x7fcbd740); - status |= test__divsf3(0x7f9bb57b, 0x80000000, 0x7fdbb57b); - status |= test__divsf3(0x7f951a78, 0x80000001, 0x7fd51a78); - status |= test__divsf3(0x7f9ba63b, 0x807fffff, 0x7fdba63b); - status |= test__divsf3(0x7f89463c, 0xbf800000, 0x7fc9463c); - status |= test__divsf3(0x7fb63563, 0xff7fffff, 0x7ff63563); - status |= test__divsf3(0x7f90886e, 0xff800000, 0x7fd0886e); - status |= test__divsf3(0x7fe8c15e, 0x00000000, 0x7fe8c15e); - status |= test__divsf3(0x7fe915ae, 0x00000001, 0x7fe915ae); - status |= test__divsf3(0x7ffa9b42, 0x007fffff, 0x7ffa9b42); - status |= test__divsf3(0x7fdad0f5, 0x3f800000, 0x7fdad0f5); - status |= test__divsf3(0x7fd10dcb, 0x7f7fffff, 0x7fd10dcb); - status |= test__divsf3(0x7fd08e8a, 0x7f800000, 0x7fd08e8a); - status |= test__divsf3(0x7fc3a9e6, 0x7f91a816, 0x7fd1a816); - status |= test__divsf3(0x7fdb229c, 0x7fc26c68, 0x7fdb229c); - status |= test__divsf3(0x7fc9f6bb, 0x80000000, 0x7fc9f6bb); - status |= test__divsf3(0x7ffa178b, 0x80000001, 0x7ffa178b); - status |= test__divsf3(0x7fef2a0b, 0x807fffff, 0x7fef2a0b); - status |= test__divsf3(0x7ffc885b, 0xbf800000, 0x7ffc885b); - status |= test__divsf3(0x7fd26e8c, 0xff7fffff, 0x7fd26e8c); - status |= test__divsf3(0x7fc55329, 0xff800000, 0x7fc55329); - status |= test__divsf3(0x80000000, 0x00000000, 0x7fc00000); - status |= test__divsf3(0x80000000, 0x7fa833ae, 0x7fe833ae); - status |= test__divsf3(0x80000000, 0x7fc4df63, 0x7fc4df63); - status |= test__divsf3(0x80000000, 0x80000000, 0x7fc00000); - status |= test__divsf3(0x80000001, 0x7f98827d, 0x7fd8827d); - status |= test__divsf3(0x80000001, 0x7fd7acc5, 0x7fd7acc5); - status |= test__divsf3(0x807fffff, 0x7fad19c0, 0x7fed19c0); - status |= test__divsf3(0x807fffff, 0x7ffe1907, 0x7ffe1907); - status |= test__divsf3(0xbf800000, 0x7fa95487, 0x7fe95487); - status |= test__divsf3(0xbf800000, 0x7fd2bbee, 0x7fd2bbee); - status |= test__divsf3(0xff7fffff, 0x7f86ba21, 0x7fc6ba21); - status |= test__divsf3(0xff7fffff, 0x7feb00d7, 0x7feb00d7); - status |= test__divsf3(0xff800000, 0x7f800000, 0x7fc00000); - status |= test__divsf3(0xff800000, 0x7f857fdc, 0x7fc57fdc); - status |= test__divsf3(0xff800000, 0x7fde0397, 0x7fde0397); - status |= test__divsf3(0xff800000, 0xff800000, 0x7fc00000); -#endif // ARM_NAN_HANDLING + // test 1 / (1 - eps(0.5)) = 1 + eps(1) + if (test__divsf3(1.0F, 0x1.fffffep-1F, UINT32_C(0x3f800001))) + return 1; - return status; + return 0; } diff --git a/compiler-rt/test/builtins/Unit/mulsf3_test.c b/compiler-rt/test/builtins/Unit/mulsf3_test.c deleted file mode 100644 index 7dc7c8ad39c32..0000000000000 --- a/compiler-rt/test/builtins/Unit/mulsf3_test.c +++ /dev/null @@ -1,616 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -// RUN: %clang_builtins %s %librt -o %t && %run %t -// REQUIRES: librt_has_mulsf3 - -#include "int_lib.h" -#include -#include - -#include "fp_test.h" - -// By default this test uses compareResultF to check the returned floats, which -// accepts any returned NaN if the expected result is the canonical NaN value -// 0x7fc00000. For the Arm optimized FP implementation, which commits to a more -// detailed handling of NaNs, we tighten up the check and include some extra -// test cases specific to that NaN policy. -#if (__arm__ && !(__thumb__ && !__thumb2__)) && COMPILER_RT_ARM_OPTIMIZED_FP -# define EXPECT_EXACT_RESULTS -# define ARM_NAN_HANDLING -#endif - -// Returns: a * b -COMPILER_RT_ABI float __mulsf3(float a, float b); - -int test__mulsf3(uint32_t a_rep, uint32_t b_rep, uint32_t expected_rep) { - float a = fromRep32(a_rep), b = fromRep32(b_rep); - float x = __mulsf3(a, b); -#ifdef EXPECT_EXACT_RESULTS - int ret = toRep32(x) == expected_rep; -#else - int ret = compareResultF(x, expected_rep); -#endif - - if (ret) { - printf("error in test__mulsf3(%08" PRIx32 ", %08" PRIx32 ") = %08" PRIx32 - ", expected %08" PRIx32 "\n", - a_rep, b_rep, toRep32(x), expected_rep); - } - return ret; -} - -int main(void) { - int status = 0; - - status |= test__mulsf3(0x00000000, 0x00000000, 0x00000000); - status |= test__mulsf3(0x00000000, 0x007fffff, 0x00000000); - status |= test__mulsf3(0x00000000, 0x00ffffff, 0x00000000); - status |= test__mulsf3(0x00000000, 0x3f800000, 0x00000000); - status |= test__mulsf3(0x00000000, 0x7effffff, 0x00000000); - status |= test__mulsf3(0x00000000, 0x80000000, 0x80000000); - status |= test__mulsf3(0x00000000, 0x80000002, 0x80000000); - status |= test__mulsf3(0x00000000, 0x807fffff, 0x80000000); - status |= test__mulsf3(0x00000000, 0x80800001, 0x80000000); - status |= test__mulsf3(0x00000000, 0x81000000, 0x80000000); - status |= test__mulsf3(0x00000000, 0xc0400000, 0x80000000); - status |= test__mulsf3(0x00000000, 0xfe7fffff, 0x80000000); - status |= test__mulsf3(0x00000000, 0xff000000, 0x80000000); - status |= test__mulsf3(0x00000000, 0xff7fffff, 0x80000000); - status |= test__mulsf3(0x00000001, 0x00000000, 0x00000000); - status |= test__mulsf3(0x00000001, 0x00000001, 0x00000000); - status |= test__mulsf3(0x00000001, 0x3f000000, 0x00000000); - status |= test__mulsf3(0x00000001, 0x3f7fffff, 0x00000001); - status |= test__mulsf3(0x00000001, 0x3f800000, 0x00000001); - status |= test__mulsf3(0x00000001, 0x40000000, 0x00000002); - status |= test__mulsf3(0x00000001, 0x7f800000, 0x7f800000); - status |= test__mulsf3(0x00000001, 0xbf7fffff, 0x80000001); - status |= test__mulsf3(0x00000006, 0x3f000000, 0x00000003); - status |= test__mulsf3(0x00000006, 0xbf000000, 0x80000003); - status |= test__mulsf3(0x00000008, 0x3e000000, 0x00000001); - status |= test__mulsf3(0x007ffff7, 0x81000003, 0x80000000); - status |= test__mulsf3(0x007ffff8, 0x3f800001, 0x007ffff9); - status |= test__mulsf3(0x007ffff8, 0x3f800008, 0x00800000); - status |= test__mulsf3(0x007ffff8, 0xbf800001, 0x807ffff9); - status |= test__mulsf3(0x007ffff8, 0xbf800008, 0x80800000); - status |= test__mulsf3(0x007ffffc, 0x40000000, 0x00fffff8); - status |= test__mulsf3(0x007ffffe, 0x3f7ffffc, 0x007ffffc); - status |= test__mulsf3(0x007ffffe, 0x3f800001, 0x007fffff); - status |= test__mulsf3(0x007ffffe, 0xbf800001, 0x807fffff); - status |= test__mulsf3(0x007fffff, 0x007ffffe, 0x00000000); - status |= test__mulsf3(0x007fffff, 0x3f800001, 0x00800000); - status |= test__mulsf3(0x007fffff, 0x40000000, 0x00fffffe); - status |= test__mulsf3(0x00800000, 0x00000000, 0x00000000); - status |= test__mulsf3(0x00800000, 0x00800000, 0x00000000); - status |= test__mulsf3(0x00800000, 0x3f7ffffe, 0x007fffff); - status |= test__mulsf3(0x00800000, 0x7f800000, 0x7f800000); - status |= test__mulsf3(0x00800000, 0x80800000, 0x80000000); - status |= test__mulsf3(0x00800000, 0xc0000000, 0x81000000); - status |= test__mulsf3(0x00800001, 0x3f7ffffa, 0x007ffffe); - status |= test__mulsf3(0x00800001, 0x3f7ffffe, 0x00800000); - status |= test__mulsf3(0x00800001, 0xc0000000, 0x81000001); - status |= test__mulsf3(0x00800002, 0x3f7ffffc, 0x00800000); - status |= test__mulsf3(0x00fffff8, 0x3f000000, 0x007ffffc); - status |= test__mulsf3(0x00fffffe, 0x3f000000, 0x007fffff); - status |= test__mulsf3(0x00fffffe, 0xbf000000, 0x807fffff); - status |= test__mulsf3(0x00ffffff, 0x3f000000, 0x00800000); - status |= test__mulsf3(0x00ffffff, 0xbf000000, 0x80800000); - status |= test__mulsf3(0x3f000000, 0x80000001, 0x80000000); - status |= test__mulsf3(0x3f800000, 0x007ffffd, 0x007ffffd); - status |= test__mulsf3(0x3f800000, 0x01000003, 0x01000003); - status |= test__mulsf3(0x3f800000, 0x3f800000, 0x3f800000); - status |= test__mulsf3(0x3f800000, 0x40000000, 0x40000000); - status |= test__mulsf3(0x3f800000, 0x80000001, 0x80000001); - status |= test__mulsf3(0x3f800000, 0x80000009, 0x80000009); - status |= test__mulsf3(0x3f800001, 0x3f800001, 0x3f800002); - status |= test__mulsf3(0x3f800001, 0xbf800001, 0xbf800002); - status |= test__mulsf3(0x3f800001, 0xbf800002, 0xbf800003); - status |= test__mulsf3(0x3f800002, 0x3f800001, 0x3f800003); - status |= test__mulsf3(0x3f800002, 0x7f7ffffe, 0x7f800000); - status |= test__mulsf3(0x3f800001, 0x7f7ffffe, 0x7f800000); - status |= test__mulsf3(0x40000000, 0x00800000, 0x01000000); - status |= test__mulsf3(0x40000000, 0x00800001, 0x01000001); - status |= test__mulsf3(0x40000000, 0x3f800000, 0x40000000); - status |= test__mulsf3(0x40000000, 0x40400000, 0x40c00000); - status |= test__mulsf3(0x40000000, 0x7e800000, 0x7f000000); - status |= test__mulsf3(0x40000000, 0x7effffff, 0x7f7fffff); - status |= test__mulsf3(0x40000000, 0x807ffffd, 0x80fffffa); - status |= test__mulsf3(0x40000000, 0x80800003, 0x81000003); - status |= test__mulsf3(0x40000000, 0x80800005, 0x81000005); - status |= test__mulsf3(0x40000000, 0xbf800000, 0xc0000000); - status |= test__mulsf3(0x40000000, 0xfe7ffffd, 0xfefffffd); - status |= test__mulsf3(0x40000000, 0xfe800003, 0xff000003); - status |= test__mulsf3(0x403fffff, 0x3f7ffffd, 0x403ffffd); - status |= test__mulsf3(0x403fffff, 0x3f7ffffe, 0x403ffffe); - status |= test__mulsf3(0x403fffff, 0x3f7fffff, 0x403ffffe); - status |= test__mulsf3(0x403fffff, 0xbf7ffffd, 0xc03ffffd); - status |= test__mulsf3(0x40400000, 0x00000002, 0x00000006); - status |= test__mulsf3(0x40400000, 0x40000000, 0x40c00000); - status |= test__mulsf3(0x40400000, 0x40400000, 0x41100000); - status |= test__mulsf3(0x40400000, 0xc0000000, 0xc0c00000); - status |= test__mulsf3(0x40400001, 0x3f800001, 0x40400003); - status |= test__mulsf3(0x40400001, 0x3f800003, 0x40400006); - status |= test__mulsf3(0x40400001, 0xbf800003, 0xc0400006); - status |= test__mulsf3(0x40800000, 0x00000002, 0x00000008); - status |= test__mulsf3(0x40800000, 0x7e7fffff, 0x7f7fffff); - status |= test__mulsf3(0x40800000, 0xfe7fffff, 0xff7fffff); - status |= test__mulsf3(0x409fffff, 0x3f7fffff, 0x409ffffe); - status |= test__mulsf3(0x40a00000, 0x00000000, 0x00000000); - status |= test__mulsf3(0x40a00000, 0x7f800000, 0x7f800000); - status |= test__mulsf3(0x40a00001, 0x3f800001, 0x40a00002); - status |= test__mulsf3(0x40dfffff, 0x3f7ffffc, 0x40dffffc); - status |= test__mulsf3(0x40dfffff, 0x3f7fffff, 0x40dffffe); - status |= test__mulsf3(0x40e00000, 0x80000000, 0x80000000); - status |= test__mulsf3(0x40e00000, 0xff800000, 0xff800000); - status |= test__mulsf3(0x40e00001, 0x3f800001, 0x40e00003); - status |= test__mulsf3(0x7e7ffffd, 0x40800000, 0x7f7ffffd); - status |= test__mulsf3(0x7e7ffffd, 0xc0800000, 0xff7ffffd); - status |= test__mulsf3(0x7e800000, 0xc0000000, 0xff000000); - status |= test__mulsf3(0x7efffffd, 0xc0000008, 0xff800000); - status |= test__mulsf3(0x7effffff, 0xc0000000, 0xff7fffff); - status |= test__mulsf3(0x7f000000, 0x00000000, 0x00000000); - status |= test__mulsf3(0x7f000000, 0x40000000, 0x7f800000); - status |= test__mulsf3(0x7f000000, 0x7f000000, 0x7f800000); - status |= test__mulsf3(0x7f000000, 0x7f7ffffe, 0x7f800000); - status |= test__mulsf3(0x7f000000, 0x7f800000, 0x7f800000); - status |= test__mulsf3(0x7f000000, 0xfe800000, 0xff800000); - status |= test__mulsf3(0x7f000000, 0xfe800004, 0xff800000); - status |= test__mulsf3(0x7f000000, 0xff000000, 0xff800000); - status |= test__mulsf3(0x7f000009, 0x7f7ffffa, 0x7f800000); - status |= test__mulsf3(0x7f000009, 0xc0c00002, 0xff800000); - status |= test__mulsf3(0x7f7fffff, 0x00000000, 0x00000000); - status |= test__mulsf3(0x7f800000, 0x007fffff, 0x7f800000); - status |= test__mulsf3(0x7f800000, 0x00ffffff, 0x7f800000); - status |= test__mulsf3(0x7f800000, 0x3f800000, 0x7f800000); - status |= test__mulsf3(0x7f800000, 0x7effffff, 0x7f800000); - status |= test__mulsf3(0x7f800000, 0x7f800000, 0x7f800000); - status |= test__mulsf3(0x7f800000, 0x80000002, 0xff800000); - status |= test__mulsf3(0x7f800000, 0x807fffff, 0xff800000); - status |= test__mulsf3(0x7f800000, 0x80800001, 0xff800000); - status |= test__mulsf3(0x7f800000, 0x81000000, 0xff800000); - status |= test__mulsf3(0x7f800000, 0xc0400000, 0xff800000); - status |= test__mulsf3(0x7f800000, 0xff000000, 0xff800000); - status |= test__mulsf3(0x7f800000, 0xff7fffff, 0xff800000); - status |= test__mulsf3(0x7f800000, 0xff800000, 0xff800000); - status |= test__mulsf3(0x80000000, 0x00000000, 0x80000000); - status |= test__mulsf3(0x80000000, 0x40c00000, 0x80000000); - status |= test__mulsf3(0x80000000, 0x7f7fffff, 0x80000000); - status |= test__mulsf3(0x80000000, 0x80000000, 0x00000000); - status |= test__mulsf3(0x80000000, 0x80000004, 0x00000000); - status |= test__mulsf3(0x80000000, 0x80800000, 0x00000000); - status |= test__mulsf3(0x80000000, 0xc1000000, 0x00000000); - status |= test__mulsf3(0x80000000, 0xfe800000, 0x00000000); - status |= test__mulsf3(0x80000001, 0x00000001, 0x80000000); - status |= test__mulsf3(0x80000001, 0x40a00000, 0x80000005); - status |= test__mulsf3(0x80000002, 0x3f800000, 0x80000002); - status |= test__mulsf3(0x80000003, 0x00000000, 0x80000000); - status |= test__mulsf3(0x80000003, 0x7f800000, 0xff800000); - status |= test__mulsf3(0x80000004, 0xbf800000, 0x00000004); - status |= test__mulsf3(0x80000008, 0x3e000000, 0x80000001); - status |= test__mulsf3(0x807ffff7, 0x01000003, 0x80000000); - status |= test__mulsf3(0x807ffff7, 0x3f800001, 0x807ffff8); - status |= test__mulsf3(0x807ffffd, 0xc0000000, 0x00fffffa); - status |= test__mulsf3(0x807fffff, 0x00000000, 0x80000000); - status |= test__mulsf3(0x807fffff, 0x3f800001, 0x80800000); - status |= test__mulsf3(0x807fffff, 0x7f800000, 0xff800000); - status |= test__mulsf3(0x807fffff, 0x80000000, 0x00000000); - status |= test__mulsf3(0x807fffff, 0x807ffffe, 0x00000000); - status |= test__mulsf3(0x807fffff, 0xbf800000, 0x007fffff); - status |= test__mulsf3(0x807fffff, 0xff800000, 0x7f800000); - status |= test__mulsf3(0x80800000, 0x00800000, 0x80000000); - status |= test__mulsf3(0x80800000, 0x80800000, 0x00000000); - status |= test__mulsf3(0x80800001, 0x00000000, 0x80000000); - status |= test__mulsf3(0x80800001, 0x7f800000, 0xff800000); - status |= test__mulsf3(0x80800001, 0xbf800000, 0x00800001); - status |= test__mulsf3(0x80fffffc, 0x3f000000, 0x807ffffe); - status |= test__mulsf3(0x80fffffc, 0xbf000000, 0x007ffffe); - status |= test__mulsf3(0x80fffffe, 0x3f800000, 0x80fffffe); - status |= test__mulsf3(0x80ffffff, 0x80000000, 0x00000000); - status |= test__mulsf3(0x80ffffff, 0xff800000, 0x7f800000); - status |= test__mulsf3(0x81000000, 0x00000000, 0x80000000); - status |= test__mulsf3(0x81000000, 0x7f800000, 0xff800000); - status |= test__mulsf3(0xbf7fffff, 0xff7fffff, 0x7f7ffffe); - status |= test__mulsf3(0xbf800000, 0x00000009, 0x80000009); - status |= test__mulsf3(0xbf800000, 0x00800009, 0x80800009); - status |= test__mulsf3(0xbf800000, 0x3f800000, 0xbf800000); - status |= test__mulsf3(0xbf800000, 0x40000000, 0xc0000000); - status |= test__mulsf3(0xbf800000, 0xbf800000, 0x3f800000); - status |= test__mulsf3(0xbf800000, 0xc0000000, 0x40000000); - status |= test__mulsf3(0xbf800001, 0x3f800001, 0xbf800002); - status |= test__mulsf3(0xbf800001, 0xbf800001, 0x3f800002); - status |= test__mulsf3(0xbf800001, 0xbf800002, 0x3f800003); - status |= test__mulsf3(0xbf800002, 0x3f800001, 0xbf800003); - status |= test__mulsf3(0xbf800002, 0xbf800001, 0x3f800003); - status |= test__mulsf3(0xc0000000, 0x00000000, 0x80000000); - status |= test__mulsf3(0xc0000000, 0x007ffffd, 0x80fffffa); - status |= test__mulsf3(0xc0000000, 0x00800001, 0x81000001); - status |= test__mulsf3(0xc0000000, 0x00800005, 0x81000005); - status |= test__mulsf3(0xc0000000, 0x00800009, 0x81000009); - status |= test__mulsf3(0xc0000000, 0x40400000, 0xc0c00000); - status |= test__mulsf3(0xc0000000, 0x7e7fffff, 0xfeffffff); - status |= test__mulsf3(0xc0000000, 0x7e800001, 0xff000001); - status |= test__mulsf3(0xc0000000, 0x7f800000, 0xff800000); - status |= test__mulsf3(0xc0000000, 0xbf800000, 0x40000000); - status |= test__mulsf3(0xc0000000, 0xc0400000, 0x40c00000); - status |= test__mulsf3(0xc03ffffe, 0x7f000000, 0xff800000); - status |= test__mulsf3(0xc03fffff, 0x3f7fffff, 0xc03ffffe); - status |= test__mulsf3(0xc0400000, 0x40400000, 0xc1100000); - status |= test__mulsf3(0xc0400000, 0xc0000000, 0x40c00000); - status |= test__mulsf3(0xc0400000, 0xc0400000, 0x41100000); - status |= test__mulsf3(0xc0400000, 0xff000000, 0x7f800000); - status |= test__mulsf3(0xc0400001, 0x3f800001, 0xc0400003); - status |= test__mulsf3(0xc0800000, 0x7e7fffff, 0xff7fffff); - status |= test__mulsf3(0xc0800000, 0x80000000, 0x00000000); - status |= test__mulsf3(0xc0800000, 0xfe7fffff, 0x7f7fffff); - status |= test__mulsf3(0xc0800000, 0xff800000, 0x7f800000); - status |= test__mulsf3(0xc09ffffe, 0xff000000, 0x7f800000); - status |= test__mulsf3(0xc09fffff, 0xbf7fffff, 0x409ffffe); - status |= test__mulsf3(0xc0a00001, 0xbf800001, 0x40a00002); - status |= test__mulsf3(0xc0dffff9, 0x7f000000, 0xff800000); - status |= test__mulsf3(0xc1100000, 0x7f000000, 0xff800000); - status |= test__mulsf3(0xc1100001, 0xff000000, 0x7f800000); - status |= test__mulsf3(0xfe7ffff9, 0x7f000000, 0xff800000); - status |= test__mulsf3(0xfe7ffff9, 0xc07fffff, 0x7f7ffff8); - status |= test__mulsf3(0xfe7ffffd, 0x40800000, 0xff7ffffd); - status |= test__mulsf3(0xfe7ffffd, 0xc0800000, 0x7f7ffffd); - status |= test__mulsf3(0xfe7fffff, 0x00000000, 0x80000000); - status |= test__mulsf3(0xfe7fffff, 0x40000001, 0xff000000); - status |= test__mulsf3(0xfe7fffff, 0x7f800000, 0xff800000); - status |= test__mulsf3(0xfe800000, 0x00000000, 0x80000000); - status |= test__mulsf3(0xfe800000, 0x7f800000, 0xff800000); - status |= test__mulsf3(0xfefffff7, 0x7e800001, 0xff800000); - status |= test__mulsf3(0xfeffffff, 0x3f800001, 0xff000000); - status |= test__mulsf3(0xfeffffff, 0x80000000, 0x00000000); - status |= test__mulsf3(0xff000005, 0xff000001, 0x7f800000); - status |= test__mulsf3(0xff7ffffd, 0x7f000000, 0xff800000); - status |= test__mulsf3(0xff7ffffd, 0xc0400001, 0x7f800000); - status |= test__mulsf3(0xff7ffffd, 0xff000001, 0x7f800000); - status |= test__mulsf3(0xff7fffff, 0x80000000, 0x00000000); - status |= test__mulsf3(0xff7fffff, 0xff7fffff, 0x7f800000); - status |= test__mulsf3(0xff7fffff, 0xff800000, 0x7f800000); - status |= test__mulsf3(0xff800000, 0x40c00000, 0xff800000); - status |= test__mulsf3(0xff800000, 0x7f800000, 0xff800000); - status |= test__mulsf3(0xff800000, 0x80000004, 0x7f800000); - status |= test__mulsf3(0xff800000, 0x80800000, 0x7f800000); - status |= test__mulsf3(0xff800000, 0xc1000000, 0x7f800000); - status |= test__mulsf3(0xff800000, 0xfe800000, 0x7f800000); - status |= test__mulsf3(0xff800000, 0xff800000, 0x7f800000); - status |= test__mulsf3(0x3089705f, 0x0ef36390, 0x0041558f); - status |= test__mulsf3(0x3089705f, 0x0e936390, 0x0027907d); - status |= test__mulsf3(0x3109705f, 0x0ef36390, 0x0082ab1e); - status |= test__mulsf3(0x3109705f, 0x0e936390, 0x004f20fa); - status |= test__mulsf3(0x3189705f, 0x0ef36390, 0x0102ab1e); - status |= test__mulsf3(0x3189705f, 0x0e936390, 0x009e41f5); - status |= test__mulsf3(0xb089705f, 0x0ef36390, 0x8041558f); - status |= test__mulsf3(0xb089705f, 0x0e936390, 0x8027907d); - status |= test__mulsf3(0xb109705f, 0x0ef36390, 0x8082ab1e); - status |= test__mulsf3(0xb109705f, 0x0e936390, 0x804f20fa); - status |= test__mulsf3(0xb189705f, 0x0ef36390, 0x8102ab1e); - status |= test__mulsf3(0xb189705f, 0x0e936390, 0x809e41f5); - status |= test__mulsf3(0x3089705f, 0x8ef36390, 0x8041558f); - status |= test__mulsf3(0x3089705f, 0x8e936390, 0x8027907d); - status |= test__mulsf3(0x3109705f, 0x8ef36390, 0x8082ab1e); - status |= test__mulsf3(0x3109705f, 0x8e936390, 0x804f20fa); - status |= test__mulsf3(0x3189705f, 0x8ef36390, 0x8102ab1e); - status |= test__mulsf3(0x3189705f, 0x8e936390, 0x809e41f5); - status |= test__mulsf3(0xb089705f, 0x8ef36390, 0x0041558f); - status |= test__mulsf3(0xb089705f, 0x8e936390, 0x0027907d); - status |= test__mulsf3(0xb109705f, 0x8ef36390, 0x0082ab1e); - status |= test__mulsf3(0xb109705f, 0x8e936390, 0x004f20fa); - status |= test__mulsf3(0xb189705f, 0x8ef36390, 0x0102ab1e); - status |= test__mulsf3(0xb189705f, 0x8e936390, 0x009e41f5); - status |= test__mulsf3(0x1f800001, 0x1fc00000, 0x00300000); - status |= test__mulsf3(0x1f800003, 0x1fc00000, 0x00300001); - status |= test__mulsf3(0x1f800001, 0x1fc00800, 0x00300200); - status |= test__mulsf3(0x1f800003, 0x1fc00800, 0x00300201); - status |= test__mulsf3(0x36e4588a, 0x29b47cbd, 0x2120fd85); - status |= test__mulsf3(0x3fea3b26, 0x3f400000, 0x3fafac5c); - status |= test__mulsf3(0x6fea3b26, 0x4f400000, 0x7f800000); - status |= test__mulsf3(0x20ea3b26, 0x1ec00000, 0x0057d62e); - status |= test__mulsf3(0x3f8f11bb, 0x3fc00000, 0x3fd69a98); - status |= test__mulsf3(0x6f8f11bb, 0x4fc00000, 0x7f800000); - status |= test__mulsf3(0x208f11bb, 0x1f400000, 0x006b4d4c); - status |= test__mulsf3(0x3f8f11bb, 0x3f800000, 0x3f8f11bb); - status |= test__mulsf3(0x6f8f11bb, 0x4f800000, 0x7f800000); - status |= test__mulsf3(0x208f11bb, 0x1f000000, 0x004788de); - status |= test__mulsf3(0x3f8f11bb, 0x3fd7f48d, 0x3ff1611f); - status |= test__mulsf3(0x6f8f11bb, 0x4fd7f48d, 0x7f800000); - status |= test__mulsf3(0x208f11bb, 0x1f57f48d, 0x0078b090); - status |= test__mulsf3(0x3f8f11bb, 0x3fa80b73, 0x3fbbd412); - status |= test__mulsf3(0x6f8f11bb, 0x4fa80b73, 0x7f800000); - status |= test__mulsf3(0x208f11bb, 0x1f280b73, 0x005dea09); - status |= test__mulsf3(0x3f8f11bb, 0x3f97f48d, 0x3fa9d842); - status |= test__mulsf3(0x6f8f11bb, 0x4f97f48d, 0x7f800000); - status |= test__mulsf3(0x208f11bb, 0x1f17f48d, 0x0054ec21); - status |= test__mulsf3(0x3f8f11bb, 0x3f680b73, 0x3f81ae78); - status |= test__mulsf3(0x6f8f11bb, 0x4f680b73, 0x7f800000); - status |= test__mulsf3(0x208f11bb, 0x1ee80b73, 0x0040d73c); - status |= test__mulsf3(0x3fff5dd8, 0x3f600000, 0x3fdf721d); - status |= test__mulsf3(0x6fff5dd8, 0x4f600000, 0x7f800000); - status |= test__mulsf3(0x20ff5dd8, 0x1ee00000, 0x006fb90e); - status |= test__mulsf3(0x3fff5dd8, 0x3f100000, 0x3f8fa4ca); - status |= test__mulsf3(0x6fff5dd8, 0x4f100000, 0x7f800000); - status |= test__mulsf3(0x20ff5dd8, 0x1e900000, 0x0047d265); - status |= test__mulsf3(0x3fffe96b, 0x3f7efb43, 0x3ffee4c5); - status |= test__mulsf3(0x6fffe96b, 0x4f7efb43, 0x7f800000); - status |= test__mulsf3(0x20ffe96b, 0x1efefb43, 0x007f7263); - status |= test__mulsf3(0x3fffe96b, 0x3f0104bd, 0x3f80f95b); - status |= test__mulsf3(0x6fffe96b, 0x4f0104bd, 0x7f800000); - status |= test__mulsf3(0x20ffe96b, 0x1e8104bd, 0x00407cae); - status |= test__mulsf3(0x3f8fbbb7, 0x3fa6edf9, 0x3fbb72aa); - status |= test__mulsf3(0x6f8fbbb7, 0x4fa6edf9, 0x7f800000); - status |= test__mulsf3(0x208fbbb7, 0x1f26edf9, 0x005db955); - status |= test__mulsf3(0x3f8fbbb7, 0x3fd91207, 0x3ff3c07b); - status |= test__mulsf3(0x6f8fbbb7, 0x4fd91207, 0x7f800000); - status |= test__mulsf3(0x208fbbb7, 0x1f591207, 0x0079e03d); - status |= test__mulsf3(0x3f8fbbb7, 0x3f991207, 0x3fabe29f); - status |= test__mulsf3(0x6f8fbbb7, 0x4f991207, 0x7f800000); - status |= test__mulsf3(0x208fbbb7, 0x1f191207, 0x0055f150); - status |= test__mulsf3(0x3f8fbbb7, 0x3f66edf9, 0x3f81a843); - status |= test__mulsf3(0x6f8fbbb7, 0x4f66edf9, 0x7f800000); - status |= test__mulsf3(0x208fbbb7, 0x1ee6edf9, 0x0040d421); - status |= test__mulsf3(0x3fdb62f3, 0x3f7879c5, 0x3fd4f036); - status |= test__mulsf3(0x6fdb62f3, 0x4f7879c5, 0x7f800000); - status |= test__mulsf3(0x20db62f3, 0x1ef879c5, 0x006a781b); - status |= test__mulsf3(0x3faaea45, 0x3f8b6773, 0x3fba2489); - status |= test__mulsf3(0x6faaea45, 0x4f8b6773, 0x7f800000); - status |= test__mulsf3(0x20aaea45, 0x1f0b6773, 0x005d1244); - status |= test__mulsf3(0x3fafa7ec, 0x3f900000, 0x3fc59cea); - status |= test__mulsf3(0x6fafa7ec, 0x4f900000, 0x7f800000); - status |= test__mulsf3(0x20afa7ec, 0x1f100000, 0x0062ce75); - status |= test__mulsf3(0x3fcf8c8d, 0x3f271645, 0x3f8776be); - status |= test__mulsf3(0x6fcf8c8d, 0x4f271645, 0x7f800000); - status |= test__mulsf3(0x20cf8c8d, 0x1ea71645, 0x0043bb5f); - status |= test__mulsf3(0x3fc173ef, 0x3f901b0f, 0x3fd9cb52); - status |= test__mulsf3(0x6fc173ef, 0x4f901b0f, 0x7f800000); - status |= test__mulsf3(0x20c173ef, 0x1f101b0f, 0x006ce5a9); - status |= test__mulsf3(0x3fb48d33, 0x3f4a35fb, 0x3f8e9d7d); - status |= test__mulsf3(0x6fb48d33, 0x4f4a35fb, 0x7f800000); - status |= test__mulsf3(0x20b48d33, 0x1eca35fb, 0x00474ebe); - status |= test__mulsf3(0x3fc6f87b, 0x3f65d94d, 0x3fb2a52a); - status |= test__mulsf3(0x6fc6f87b, 0x4f65d94d, 0x7f800000); - status |= test__mulsf3(0x20c6f87b, 0x1ee5d94d, 0x00595295); - status |= test__mulsf3(0x3f860ae7, 0x3f969729, 0x3f9db312); - status |= test__mulsf3(0x6f860ae7, 0x4f969729, 0x7f800000); - status |= test__mulsf3(0x20860ae7, 0x1f169729, 0x004ed989); - status |= test__mulsf3(0x3f860ae7, 0x3fc00000, 0x3fc9105a); - status |= test__mulsf3(0x6f860ae7, 0x4fc00000, 0x7f800000); - status |= test__mulsf3(0x20860ae7, 0x1f400000, 0x0064882d); - status |= test__mulsf3(0x3f860ae7, 0x3fe968d7, 0x3ff46da3); - status |= test__mulsf3(0x6f860ae7, 0x4fe968d7, 0x7f800000); - status |= test__mulsf3(0x20860ae7, 0x1f6968d7, 0x007a36d1); - status |= test__mulsf3(0x3f860ae7, 0x3f800000, 0x3f860ae7); - status |= test__mulsf3(0x6f860ae7, 0x4f800000, 0x7f800000); - status |= test__mulsf3(0x20860ae7, 0x1f000000, 0x00430574); - status |= test__mulsf3(0x3f860ae7, 0x3fa968d7, 0x3fb1682f); - status |= test__mulsf3(0x6f860ae7, 0x4fa968d7, 0x7f800000); - status |= test__mulsf3(0x20860ae7, 0x1f2968d7, 0x0058b418); - status |= test__mulsf3(0x3f860ae7, 0x3fd69729, 0x3fe0b886); - status |= test__mulsf3(0x6f860ae7, 0x4fd69729, 0x7f800000); - status |= test__mulsf3(0x20860ae7, 0x1f569729, 0x00705c43); - status |= test__mulsf3(0x3f9aecdd, 0x3fb14b75, 0x3fd696de); - status |= test__mulsf3(0x6f9aecdd, 0x4fb14b75, 0x7f800000); - status |= test__mulsf3(0x209aecdd, 0x1f314b75, 0x006b4b6f); - status |= test__mulsf3(0x3f9aecdd, 0x3fceb48b, 0x3ffa2fb9); - status |= test__mulsf3(0x6f9aecdd, 0x4fceb48b, 0x7f800000); - status |= test__mulsf3(0x209aecdd, 0x1f4eb48b, 0x007d17dc); - status |= test__mulsf3(0x3f9aecdd, 0x3fc00000, 0x3fe8634c); - status |= test__mulsf3(0x6f9aecdd, 0x4fc00000, 0x7f800000); - status |= test__mulsf3(0x209aecdd, 0x1f400000, 0x007431a6); - status |= test__mulsf3(0x3fd65dc6, 0x3f400000, 0x3fa0c654); - status |= test__mulsf3(0x6fd65dc6, 0x4f400000, 0x7f800000); - status |= test__mulsf3(0x20d65dc6, 0x1ec00000, 0x0050632a); - status |= test__mulsf3(0x3feecf03, 0x3f5f93ab, 0x3fd09014); - status |= test__mulsf3(0x6feecf03, 0x4f5f93ab, 0x7f800000); - status |= test__mulsf3(0x20eecf03, 0x1edf93ab, 0x0068480a); - status |= test__mulsf3(0x3feecf03, 0x3f206c55, 0x3f95a670); - status |= test__mulsf3(0x6feecf03, 0x4f206c55, 0x7f800000); - status |= test__mulsf3(0x20eecf03, 0x1ea06c55, 0x004ad338); - status |= test__mulsf3(0x3f98feed, 0x3f60f11b, 0x3f866f27); - status |= test__mulsf3(0x6f98feed, 0x4f60f11b, 0x7f800000); - status |= test__mulsf3(0x2098feed, 0x1ee0f11b, 0x00433794); - status |= test__mulsf3(0x3f9a1b9d, 0x3f9c42b5, 0x3fbc21f8); - status |= test__mulsf3(0x6f9a1b9d, 0x4f9c42b5, 0x7f800000); - status |= test__mulsf3(0x209a1b9d, 0x1f1c42b5, 0x005e10fc); - status |= test__mulsf3(0x3f9a1b9d, 0x3f5c42b5, 0x3f8497e3); - status |= test__mulsf3(0x6f9a1b9d, 0x4f5c42b5, 0x7f800000); - status |= test__mulsf3(0x209a1b9d, 0x1edc42b5, 0x00424bf2); - status |= test__mulsf3(0x3f947044, 0x3f600000, 0x3f81e23c); - status |= test__mulsf3(0x6f947044, 0x4f600000, 0x7f800000); - status |= test__mulsf3(0x20947044, 0x1ee00000, 0x0040f11e); - status |= test__mulsf3(0x3fa3fb77, 0x3f6eb1b9, 0x3f98e5a0); - status |= test__mulsf3(0x6fa3fb77, 0x4f6eb1b9, 0x7f800000); - status |= test__mulsf3(0x20a3fb77, 0x1eeeb1b9, 0x004c72d0); - status |= test__mulsf3(0x3fb291df, 0x3f466a1f, 0x3f8a66d9); - status |= test__mulsf3(0x6fb291df, 0x4f466a1f, 0x7f800000); - status |= test__mulsf3(0x20b291df, 0x1ec66a1f, 0x0045336c); - status |= test__mulsf3(0x3fde13d5, 0x3f6b7283, 0x3fcc3f8b); - status |= test__mulsf3(0x6fde13d5, 0x4f6b7283, 0x7f800000); - status |= test__mulsf3(0x20de13d5, 0x1eeb7283, 0x00661fc5); - status |= test__mulsf3(0x3fd5b211, 0x3f80810f, 0x3fd68987); - status |= test__mulsf3(0x6fd5b211, 0x4f80810f, 0x7f800000); - status |= test__mulsf3(0x20d5b211, 0x1f00810f, 0x006b44c4); - status |= test__mulsf3(0x3fd5b211, 0x3f3f7ef1, 0x3f9fd9d2); - status |= test__mulsf3(0x6fd5b211, 0x4f3f7ef1, 0x7f800000); - status |= test__mulsf3(0x20d5b211, 0x1ebf7ef1, 0x004fece9); - status |= test__mulsf3(0x3fadfbc4, 0x3f400000, 0x3f827cd3); - status |= test__mulsf3(0x6fadfbc4, 0x4f400000, 0x7f800000); - status |= test__mulsf3(0x20adfbc4, 0x1ec00000, 0x00413e6a); - status |= test__mulsf3(0x3fd0ef03, 0x3f800000, 0x3fd0ef03); - status |= test__mulsf3(0x6fd0ef03, 0x4f800000, 0x7f800000); - status |= test__mulsf3(0x20d0ef03, 0x1f000000, 0x00687782); - status |= test__mulsf3(0x3fd0ef03, 0x3f8673ab, 0x3fdb7705); - status |= test__mulsf3(0x6fd0ef03, 0x4f8673ab, 0x7f800000); - status |= test__mulsf3(0x20d0ef03, 0x1f0673ab, 0x006dbb83); - status |= test__mulsf3(0x3fd0ef03, 0x3f798c55, 0x3fcbab02); - status |= test__mulsf3(0x6fd0ef03, 0x4f798c55, 0x7f800000); - status |= test__mulsf3(0x20d0ef03, 0x1ef98c55, 0x0065d581); - status |= test__mulsf3(0x3fdd1181, 0x3f8ad17f, 0x3fefc0b1); - status |= test__mulsf3(0x6fdd1181, 0x4f8ad17f, 0x7f800000); - status |= test__mulsf3(0x20dd1181, 0x1f0ad17f, 0x0077e058); - status |= test__mulsf3(0x3fdd1181, 0x3f752e81, 0x3fd3b9e9); - status |= test__mulsf3(0x6fdd1181, 0x4f752e81, 0x7f800000); - status |= test__mulsf3(0x20dd1181, 0x1ef52e81, 0x0069dcf5); - status |= test__mulsf3(0x3f92efc6, 0x3fa00000, 0x3fb7abb8); - status |= test__mulsf3(0x6f92efc6, 0x4fa00000, 0x7f800000); - status |= test__mulsf3(0x2092efc6, 0x1f200000, 0x005bd5dc); - status |= test__mulsf3(0x3fdcefe6, 0x3f400000, 0x3fa5b3ec); - status |= test__mulsf3(0x6fdcefe6, 0x4f400000, 0x7f800000); - status |= test__mulsf3(0x20dcefe6, 0x1ec00000, 0x0052d9f6); - status |= test__mulsf3(0x3fad6507, 0x3fa2f8b7, 0x3fdcc4c9); - status |= test__mulsf3(0x6fad6507, 0x4fa2f8b7, 0x7f800000); - status |= test__mulsf3(0x20ad6507, 0x1f22f8b7, 0x006e6264); - status |= test__mulsf3(0x3fad6507, 0x3f62f8b7, 0x3f99bba6); - status |= test__mulsf3(0x6fad6507, 0x4f62f8b7, 0x7f800000); - status |= test__mulsf3(0x20ad6507, 0x1ee2f8b7, 0x004cddd3); - status |= test__mulsf3(0x3fbfde6b, 0x3f8721bd, 0x3fca8f27); - status |= test__mulsf3(0x6fbfde6b, 0x4f8721bd, 0x7f800000); - status |= test__mulsf3(0x20bfde6b, 0x1f0721bd, 0x00654794); - status |= test__mulsf3(0x3fbfde6b, 0x3f4721bd, 0x3f953f2e); - status |= test__mulsf3(0x6fbfde6b, 0x4f4721bd, 0x7f800000); - status |= test__mulsf3(0x20bfde6b, 0x1ec721bd, 0x004a9f97); - status |= test__mulsf3(0x3ff40db4, 0x3f400000, 0x3fb70a47); - status |= test__mulsf3(0x6ff40db4, 0x4f400000, 0x7f800000); - status |= test__mulsf3(0x20f40db4, 0x1ec00000, 0x005b8524); - status |= test__mulsf3(0x3ff40db4, 0x3f600000, 0x3fd58bfe); - status |= test__mulsf3(0x6ff40db4, 0x4f600000, 0x7f800000); - status |= test__mulsf3(0x20f40db4, 0x1ee00000, 0x006ac5ff); - status |= test__mulsf3(0x3f9e20d3, 0x3f90c8a5, 0x3fb2dccc); - status |= test__mulsf3(0x6f9e20d3, 0x4f90c8a5, 0x7f800000); - status |= test__mulsf3(0x209e20d3, 0x1f10c8a5, 0x00596e66); - status |= test__mulsf3(0x3f9e20d3, 0x3fc00000, 0x3fed313c); - status |= test__mulsf3(0x6f9e20d3, 0x4fc00000, 0x7f800000); - status |= test__mulsf3(0x209e20d3, 0x1f400000, 0x0076989e); - status |= test__mulsf3(0x3f9e20d3, 0x3f50c8a5, 0x3f80f69b); - status |= test__mulsf3(0x6f9e20d3, 0x4f50c8a5, 0x7f800000); - status |= test__mulsf3(0x209e20d3, 0x1ed0c8a5, 0x00407b4d); - status |= test__mulsf3(0x3f82e641, 0x3f8fd63f, 0x3f931856); - status |= test__mulsf3(0x6f82e641, 0x4f8fd63f, 0x7f800000); - status |= test__mulsf3(0x2082e641, 0x1f0fd63f, 0x00498c2b); - status |= test__mulsf3(0x3f9a1901, 0x3f96e701, 0x3fb5ab68); - status |= test__mulsf3(0x6f9a1901, 0x4f96e701, 0x7f800000); - status |= test__mulsf3(0x209a1901, 0x1f16e701, 0x005ad5b4); - status |= test__mulsf3(0x3fa21aa1, 0x3f7c4961, 0x3f9fc0ae); - status |= test__mulsf3(0x6fa21aa1, 0x4f7c4961, 0x7f800000); - status |= test__mulsf3(0x20a21aa1, 0x1efc4961, 0x004fe057); - status |= test__mulsf3(0x3fcd0767, 0x3f782457, 0x3fc6bc47); - status |= test__mulsf3(0x6fcd0767, 0x4f782457, 0x7f800000); - status |= test__mulsf3(0x20cd0767, 0x1ef82457, 0x00635e23); - status |= test__mulsf3(0x3fb875e1, 0x3f968e21, 0x3fd8f6f6); - status |= test__mulsf3(0x6fb875e1, 0x4f968e21, 0x7f800000); - status |= test__mulsf3(0x20b875e1, 0x1f168e21, 0x006c7b7b); - status |= test__mulsf3(0x3fc2f0d7, 0x3f5efd19, 0x3fa9cd95); - status |= test__mulsf3(0x6fc2f0d7, 0x4f5efd19, 0x7f800000); - status |= test__mulsf3(0x20c2f0d7, 0x1edefd19, 0x0054e6cb); - status |= test__mulsf3(0x7f7ffffe, 0x3f800001, 0x7f800000); - status |= test__mulsf3(0x00000003, 0xc00fffff, 0x80000007); - status |= test__mulsf3(0x00000003, 0x400fffff, 0x00000007); - status |= test__mulsf3(0x80000003, 0xc00fffff, 0x00000007); - status |= test__mulsf3(0x80000003, 0x400fffff, 0x80000007); - status |= test__mulsf3(0x00000003, 0xc00ffffd, 0x80000007); - status |= test__mulsf3(0x00000003, 0x400ffffd, 0x00000007); - status |= test__mulsf3(0x80000003, 0xc00ffffd, 0x00000007); - status |= test__mulsf3(0x80000003, 0x400ffffd, 0x80000007); - status |= test__mulsf3(0x3e00007f, 0x017c0000, 0x003f003f); - status |= test__mulsf3(0xcf7fff00, 0xc0ffff00, 0x50fffe00); - status |= test__mulsf3(0x3fdf7f00, 0x3fffff00, 0x405f7e21); - status |= test__mulsf3(0x19b92144, 0x1a310000, 0x00000001); - status |= test__mulsf3(0x19ffc008, 0x1a002004, 0x00000001); - status |= test__mulsf3(0x7f7ffff0, 0xc0000008, 0xff800000); - - // Test that the result of an operation is a NaN at all when it should be. - // - // In most configurations these tests' results are checked compared using - // compareResultF, so we set all the answers to the canonical NaN 0x7fc00000, - // which causes compareResultF to accept any NaN encoding. We also use the - // same value as the input NaN in tests that have one, so that even in - // EXPECT_EXACT_RESULTS mode these tests should pass, because 0x7fc00000 is - // still the exact expected NaN. - status |= test__mulsf3(0x7f800000, 0x00000000, 0x7fc00000); - status |= test__mulsf3(0x7f800000, 0x80000000, 0x7fc00000); - status |= test__mulsf3(0x80000000, 0x7f800000, 0x7fc00000); - status |= test__mulsf3(0x80000000, 0xff800000, 0x7fc00000); - status |= test__mulsf3(0x3f800000, 0x7fc00000, 0x7fc00000); - status |= test__mulsf3(0x7fc00000, 0x3f800000, 0x7fc00000); - status |= test__mulsf3(0x7fc00000, 0x7fc00000, 0x7fc00000); - -#ifdef ARM_NAN_HANDLING - // Tests specific to the NaN handling of Arm hardware, mimicked by - // arm/mulsf3.S: - // - // - a quiet NaN is distinguished by the top mantissa bit being 1 - // - // - if a signalling NaN appears in the input, the output quiet NaN is - // obtained by setting its top mantissa bit and leaving everything else - // unchanged - // - // - if both operands are signalling NaNs then the output NaN is derived - // from the first operand - // - // - if both operands are quiet NaNs then the output NaN is the first - // operand - // - // - invalid operations not involving an input NaN return the quiet - // NaN with fewest bits set, 0x7fc00000. - - status |= test__mulsf3(0x00000000, 0x7fad4be3, 0x7fed4be3); - status |= test__mulsf3(0x00000000, 0x7fdf48c7, 0x7fdf48c7); - status |= test__mulsf3(0x00000001, 0x7f970eba, 0x7fd70eba); - status |= test__mulsf3(0x00000001, 0x7fc35716, 0x7fc35716); - status |= test__mulsf3(0x007fffff, 0x7fbf52d6, 0x7fff52d6); - status |= test__mulsf3(0x007fffff, 0x7fc7a2df, 0x7fc7a2df); - status |= test__mulsf3(0x3f800000, 0x7f987a85, 0x7fd87a85); - status |= test__mulsf3(0x3f800000, 0x7fc50124, 0x7fc50124); - status |= test__mulsf3(0x7f7fffff, 0x7f95fd6f, 0x7fd5fd6f); - status |= test__mulsf3(0x7f7fffff, 0x7ffc28dc, 0x7ffc28dc); - status |= test__mulsf3(0x7f800000, 0x00000000, 0x7fc00000); - status |= test__mulsf3(0x7f800000, 0x7f8dd790, 0x7fcdd790); - status |= test__mulsf3(0x7f800000, 0x7fd2ef2b, 0x7fd2ef2b); - status |= test__mulsf3(0x7f800000, 0x80000000, 0x7fc00000); - status |= test__mulsf3(0x7f99b09d, 0x00000000, 0x7fd9b09d); - status |= test__mulsf3(0x7f93541e, 0x00000001, 0x7fd3541e); - status |= test__mulsf3(0x7f9fc002, 0x007fffff, 0x7fdfc002); - status |= test__mulsf3(0x7fb5db77, 0x3f800000, 0x7ff5db77); - status |= test__mulsf3(0x7f9f5d92, 0x7f7fffff, 0x7fdf5d92); - status |= test__mulsf3(0x7fac7a36, 0x7f800000, 0x7fec7a36); - status |= test__mulsf3(0x7fb42008, 0x7fb0ee07, 0x7ff42008); - status |= test__mulsf3(0x7f8bd740, 0x7fc7aaf1, 0x7fcbd740); - status |= test__mulsf3(0x7f9bb57b, 0x80000000, 0x7fdbb57b); - status |= test__mulsf3(0x7f951a78, 0x80000001, 0x7fd51a78); - status |= test__mulsf3(0x7f9ba63b, 0x807fffff, 0x7fdba63b); - status |= test__mulsf3(0x7f89463c, 0xbf800000, 0x7fc9463c); - status |= test__mulsf3(0x7fb63563, 0xff7fffff, 0x7ff63563); - status |= test__mulsf3(0x7f90886e, 0xff800000, 0x7fd0886e); - status |= test__mulsf3(0x7fe8c15e, 0x00000000, 0x7fe8c15e); - status |= test__mulsf3(0x7fe915ae, 0x00000001, 0x7fe915ae); - status |= test__mulsf3(0x7ffa9b42, 0x007fffff, 0x7ffa9b42); - status |= test__mulsf3(0x7fdad0f5, 0x3f800000, 0x7fdad0f5); - status |= test__mulsf3(0x7fd10dcb, 0x7f7fffff, 0x7fd10dcb); - status |= test__mulsf3(0x7fd08e8a, 0x7f800000, 0x7fd08e8a); - status |= test__mulsf3(0x7fc3a9e6, 0x7f91a816, 0x7fd1a816); - status |= test__mulsf3(0x7fdb229c, 0x7fc26c68, 0x7fdb229c); - status |= test__mulsf3(0x7fc9f6bb, 0x80000000, 0x7fc9f6bb); - status |= test__mulsf3(0x7ffa178b, 0x80000001, 0x7ffa178b); - status |= test__mulsf3(0x7fef2a0b, 0x807fffff, 0x7fef2a0b); - status |= test__mulsf3(0x7ffc885b, 0xbf800000, 0x7ffc885b); - status |= test__mulsf3(0x7fd26e8c, 0xff7fffff, 0x7fd26e8c); - status |= test__mulsf3(0x7fc55329, 0xff800000, 0x7fc55329); - status |= test__mulsf3(0x80000000, 0x7f800000, 0x7fc00000); - status |= test__mulsf3(0x80000000, 0x7fa833ae, 0x7fe833ae); - status |= test__mulsf3(0x80000000, 0x7fc4df63, 0x7fc4df63); - status |= test__mulsf3(0x80000000, 0xff800000, 0x7fc00000); - status |= test__mulsf3(0x80000001, 0x7f98827d, 0x7fd8827d); - status |= test__mulsf3(0x80000001, 0x7fd7acc5, 0x7fd7acc5); - status |= test__mulsf3(0x807fffff, 0x7fad19c0, 0x7fed19c0); - status |= test__mulsf3(0x807fffff, 0x7ffe1907, 0x7ffe1907); - status |= test__mulsf3(0xbf800000, 0x7fa95487, 0x7fe95487); - status |= test__mulsf3(0xbf800000, 0x7fd2bbee, 0x7fd2bbee); - status |= test__mulsf3(0xff7fffff, 0x7f86ba21, 0x7fc6ba21); - status |= test__mulsf3(0xff7fffff, 0x7feb00d7, 0x7feb00d7); - status |= test__mulsf3(0xff800000, 0x7f857fdc, 0x7fc57fdc); - status |= test__mulsf3(0xff800000, 0x7fde0397, 0x7fde0397); -#endif // ARM_NAN_HANDLING - - return status; -}