Skip to content

Commit

Permalink
Reland "[builtins] Define fmax and scalbn inline"
Browse files Browse the repository at this point in the history
This reverts commit 680f836.

Disable the non-default-rounding-mode scalbn[f] tests when we're using
the MSVC libraries.

Differential Revision: https://reviews.llvm.org/D91841
  • Loading branch information
rprichard committed Feb 27, 2021
1 parent 155c49e commit d202201
Show file tree
Hide file tree
Showing 13 changed files with 505 additions and 34 deletions.
12 changes: 7 additions & 5 deletions compiler-rt/lib/builtins/divdc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,19 @@
COMPILER_RT_ABI Dcomplex __divdc3(double __a, double __b, double __c,
double __d) {
int __ilogbw = 0;
double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d)));
double __logbw = __compiler_rt_logb(__compiler_rt_fmax(crt_fabs(__c),
crt_fabs(__d)));
if (crt_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = crt_scalbn(__c, -__ilogbw);
__d = crt_scalbn(__d, -__ilogbw);
__c = __compiler_rt_scalbn(__c, -__ilogbw);
__d = __compiler_rt_scalbn(__d, -__ilogbw);
}
double __denom = __c * __c + __d * __d;
Dcomplex z;
COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_REAL(z) =
__compiler_rt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) =
crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
__compiler_rt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a;
Expand Down
11 changes: 6 additions & 5 deletions compiler-rt/lib/builtins/divsc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,18 @@
COMPILER_RT_ABI Fcomplex __divsc3(float __a, float __b, float __c, float __d) {
int __ilogbw = 0;
float __logbw =
__compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d)));
__compiler_rt_logbf(__compiler_rt_fmaxf(crt_fabsf(__c), crt_fabsf(__d)));
if (crt_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = crt_scalbnf(__c, -__ilogbw);
__d = crt_scalbnf(__d, -__ilogbw);
__c = __compiler_rt_scalbnf(__c, -__ilogbw);
__d = __compiler_rt_scalbnf(__d, -__ilogbw);
}
float __denom = __c * __c + __d * __d;
Fcomplex z;
COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_REAL(z) =
__compiler_rt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) =
crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
__compiler_rt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a;
Expand Down
11 changes: 6 additions & 5 deletions compiler-rt/lib/builtins/divtc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,18 @@ COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b,
long double __c, long double __d) {
int __ilogbw = 0;
long double __logbw =
__compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
__compiler_rt_logbl(__compiler_rt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
if (crt_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = crt_scalbnl(__c, -__ilogbw);
__d = crt_scalbnl(__d, -__ilogbw);
__c = __compiler_rt_scalbnl(__c, -__ilogbw);
__d = __compiler_rt_scalbnl(__d, -__ilogbw);
}
long double __denom = __c * __c + __d * __d;
Lcomplex z;
COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_REAL(z) =
__compiler_rt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) =
crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
__compiler_rt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
Expand Down
95 changes: 93 additions & 2 deletions compiler-rt/lib/builtins/fp_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,28 +299,119 @@ static __inline fp_t __compiler_rt_logbX(fp_t x) {
return exp - exponentBias - shift; // Unbias exponent
}
}

// Avoid using scalbn from libm. Unlike libc/libm scalbn, this function never
// sets errno on underflow/overflow.
static __inline fp_t __compiler_rt_scalbnX(fp_t x, int y) {
const rep_t rep = toRep(x);
int exp = (rep & exponentMask) >> significandBits;

if (x == 0.0 || exp == maxExponent)
return x; // +/- 0.0, NaN, or inf: return x

// Normalize subnormal input.
rep_t sig = rep & significandMask;
if (exp == 0) {
exp += normalize(&sig);
sig &= ~implicitBit; // clear the implicit bit again
}

if (__builtin_sadd_overflow(exp, y, &exp)) {
// Saturate the exponent, which will guarantee an underflow/overflow below.
exp = (y >= 0) ? INT_MAX : INT_MIN;
}

// Return this value: [+/-] 1.sig * 2 ** (exp - exponentBias).
const rep_t sign = rep & signBit;
if (exp >= maxExponent) {
// Overflow, which could produce infinity or the largest-magnitude value,
// depending on the rounding mode.
return fromRep(sign | ((rep_t)(maxExponent - 1) << significandBits)) * 2.0f;
} else if (exp <= 0) {
// Subnormal or underflow. Use floating-point multiply to handle truncation
// correctly.
fp_t tmp = fromRep(sign | (REP_C(1) << significandBits) | sig);
exp += exponentBias - 1;
if (exp < 1)
exp = 1;
tmp *= fromRep((rep_t)exp << significandBits);
return tmp;
} else
return fromRep(sign | ((rep_t)exp << significandBits) | sig);
}

// Avoid using fmax from libm.
static __inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) {
// If either argument is NaN, return the other argument. If both are NaN,
// arbitrarily return the second one. Otherwise, if both arguments are +/-0,
// arbitrarily return the first one.
return (crt_isnan(x) || x < y) ? y : x;
}

#endif

#if defined(SINGLE_PRECISION)

static __inline fp_t __compiler_rt_logbf(fp_t x) {
return __compiler_rt_logbX(x);
}
static __inline fp_t __compiler_rt_scalbnf(fp_t x, int y) {
return __compiler_rt_scalbnX(x, y);
}
static __inline fp_t __compiler_rt_fmaxf(fp_t x, fp_t y) {
#if defined(__aarch64__)
// Use __builtin_fmaxf which turns into an fmaxnm instruction on AArch64.
return __builtin_fmaxf(x, y);
#else
// __builtin_fmaxf frequently turns into a libm call, so inline the function.
return __compiler_rt_fmaxX(x, y);
#endif
}

#elif defined(DOUBLE_PRECISION)

static __inline fp_t __compiler_rt_logb(fp_t x) {
return __compiler_rt_logbX(x);
}
static __inline fp_t __compiler_rt_scalbn(fp_t x, int y) {
return __compiler_rt_scalbnX(x, y);
}
static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) {
#if defined(__aarch64__)
// Use __builtin_fmax which turns into an fmaxnm instruction on AArch64.
return __builtin_fmax(x, y);
#else
// __builtin_fmax frequently turns into a libm call, so inline the function.
return __compiler_rt_fmaxX(x, y);
#endif
}

#elif defined(QUAD_PRECISION)

#if defined(CRT_LDBL_128BIT)
static __inline fp_t __compiler_rt_logbl(fp_t x) {
return __compiler_rt_logbX(x);
}
static __inline fp_t __compiler_rt_scalbnl(fp_t x, int y) {
return __compiler_rt_scalbnX(x, y);
}
static __inline fp_t __compiler_rt_fmaxl(fp_t x, fp_t y) {
return __compiler_rt_fmaxX(x, y);
}
#else
// The generic implementation only works for ieee754 floating point. For other
// floating point types, continue to rely on the libm implementation for now.
static __inline long double __compiler_rt_logbl(long double x) {
return crt_logbl(x);
}
#endif
#endif
static __inline long double __compiler_rt_scalbnl(long double x, int y) {
return crt_scalbnl(x, y);
}
static __inline long double __compiler_rt_fmaxl(long double x, long double y) {
return crt_fmaxl(x, y);
}
#endif // CRT_LDBL_128BIT

#endif // *_PRECISION

#endif // FP_LIB_HEADER
13 changes: 13 additions & 0 deletions compiler-rt/lib/builtins/int_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,19 @@ int __inline __builtin_clzll(uint64_t value) {
#endif

#define __builtin_clzl __builtin_clzll

bool __inline __builtin_sadd_overflow(int x, int y, int *result) {
if ((x < 0) != (y < 0)) {
*result = x + y;
return false;
}
int tmp = (unsigned int)x + (unsigned int)y;
if ((tmp < 0) != (x < 0))
return true;
*result = tmp;
return false;
}

#endif // defined(_MSC_VER) && !defined(__clang__)

#endif // INT_LIB_H
8 changes: 0 additions & 8 deletions compiler-rt/lib/builtins/int_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,8 @@
#endif

#if defined(_MSC_VER) && !defined(__clang__)
#define crt_fmax(x, y) __max((x), (y))
#define crt_fmaxf(x, y) __max((x), (y))
#define crt_fmaxl(x, y) __max((x), (y))
#else
#define crt_fmax(x, y) __builtin_fmax((x), (y))
#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y))
#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y))
#endif

Expand All @@ -94,12 +90,8 @@
#endif

#if defined(_MSC_VER) && !defined(__clang__)
#define crt_scalbn(x, y) scalbn((x), (y))
#define crt_scalbnf(x, y) scalbnf((x), (y))
#define crt_scalbnl(x, y) scalbnl((x), (y))
#else
#define crt_scalbn(x, y) __builtin_scalbn((x), (y))
#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y))
#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y))
#endif

Expand Down
19 changes: 10 additions & 9 deletions compiler-rt/lib/builtins/ppc/divtc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ long double _Complex __divtc3(long double a, long double b, long double c,

int ilogbw = 0;
const double logbw =
__compiler_rt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi)));
__compiler_rt_logb(__compiler_rt_fmax(crt_fabs(cDD.s.hi),
crt_fabs(dDD.s.hi)));

if (crt_isfinite(logbw)) {
ilogbw = (int)logbw;

cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw);
cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw);
dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw);
dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw);
cDD.s.hi = __compiler_rt_scalbn(cDD.s.hi, -ilogbw);
cDD.s.lo = __compiler_rt_scalbn(cDD.s.lo, -ilogbw);
dDD.s.hi = __compiler_rt_scalbn(dDD.s.hi, -ilogbw);
dDD.s.lo = __compiler_rt_scalbn(dDD.s.lo, -ilogbw);
}

const long double denom =
Expand All @@ -48,10 +49,10 @@ long double _Complex __divtc3(long double a, long double b, long double c,
DD real = {.ld = __gcc_qdiv(realNumerator, denom)};
DD imag = {.ld = __gcc_qdiv(imagNumerator, denom)};

real.s.hi = crt_scalbn(real.s.hi, -ilogbw);
real.s.lo = crt_scalbn(real.s.lo, -ilogbw);
imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw);
imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw);
real.s.hi = __compiler_rt_scalbn(real.s.hi, -ilogbw);
real.s.lo = __compiler_rt_scalbn(real.s.lo, -ilogbw);
imag.s.hi = __compiler_rt_scalbn(imag.s.hi, -ilogbw);
imag.s.lo = __compiler_rt_scalbn(imag.s.lo, -ilogbw);

if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) {
DD aDD = {.ld = a};
Expand Down
41 changes: 41 additions & 0 deletions compiler-rt/test/builtins/Unit/compiler_rt_fmax_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// RUN: %clang_builtins %s %librt -o %t && %run %t

#define DOUBLE_PRECISION
#include <fenv.h>
#include <math.h>
#include <stdio.h>
#include "fp_lib.h"

int test__compiler_rt_fmax(fp_t x, fp_t y) {
fp_t crt_value = __compiler_rt_fmax(x, y);
fp_t libm_value = fmax(x, y);
// Consider +0 and -0 equal, and also disregard the sign/payload of two NaNs.
if (crt_value != libm_value &&
!(crt_isnan(crt_value) && crt_isnan(libm_value))) {
printf("error: in __compiler_rt_fmax(%a [%llX], %a [%llX]) = %a [%llX] "
"!= %a [%llX]\n",
x, (unsigned long long)toRep(x),
y, (unsigned long long)toRep(y),
crt_value, (unsigned long long)toRep(crt_value),
libm_value, (unsigned long long)toRep(libm_value));
return 1;
}
return 0;
}

fp_t cases[] = {
-NAN, NAN, -INFINITY, INFINITY, -0.0, 0.0, -1, 1, -2, 2,
-0x1.0p-1023, 0x1.0p-1023, -0x1.0p-1024, 0x1.0p-1024, // subnormals
-1.001, 1.001, -1.002, 1.002,
};

int main() {
const unsigned N = sizeof(cases) / sizeof(cases[0]);
unsigned i, j;
for (i = 0; i < N; ++i) {
for (j = 0; j < N; ++j) {
if (test__compiler_rt_fmax(cases[i], cases[j])) return 1;
}
}
return 0;
}
39 changes: 39 additions & 0 deletions compiler-rt/test/builtins/Unit/compiler_rt_fmaxf_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %clang_builtins %s %librt -o %t && %run %t

#define SINGLE_PRECISION
#include <fenv.h>
#include <math.h>
#include <stdio.h>
#include "fp_lib.h"

int test__compiler_rt_fmaxf(fp_t x, fp_t y) {
fp_t crt_value = __compiler_rt_fmaxf(x, y);
fp_t libm_value = fmaxf(x, y);
// Consider +0 and -0 equal, and also disregard the sign/payload of two NaNs.
if (crt_value != libm_value &&
!(crt_isnan(crt_value) && crt_isnan(libm_value))) {
printf("error: in __compiler_rt_fmaxf(%a [%X], %a [%X]) = %a [%X] "
"!= %a [%X]\n",
x, toRep(x), y, toRep(y), crt_value, toRep(crt_value), libm_value,
toRep(libm_value));
return 1;
}
return 0;
}

fp_t cases[] = {
-NAN, NAN, -INFINITY, INFINITY, -0.0, 0.0, -1, 1, -2, 2,
-0x1.0p-127, 0x1.0p-127, -0x1.0p-128, 0x1.0p-128, // subnormals
-1.001, 1.001, -1.002, 1.002,
};

int main() {
const unsigned N = sizeof(cases) / sizeof(cases[0]);
unsigned i, j;
for (i = 0; i < N; ++i) {
for (j = 0; j < N; ++j) {
if (test__compiler_rt_fmaxf(cases[i], cases[j])) return 1;
}
}
return 0;
}
Loading

0 comments on commit d202201

Please sign in to comment.