86 changes: 42 additions & 44 deletions compiler-rt/lib/builtins/arm/sync-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,50 +14,48 @@

#include "../assembly.h"

#define SYNC_OP_4(op) \
.p2align 2 ; \
.thumb ; \
.syntax unified ; \
DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \
dmb ; \
mov r12, r0 ; \
LOCAL_LABEL(tryatomic_ ## op): \
ldrex r0, [r12] ; \
op(r2, r0, r1) ; \
strex r3, r2, [r12] ; \
cmp r3, #0 ; \
bne LOCAL_LABEL(tryatomic_ ## op) ; \
dmb ; \
bx lr
#define SYNC_OP_4(op) \
.p2align 2; \
.thumb; \
.syntax unified; \
DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_##op) \
dmb; \
mov r12, r0; \
LOCAL_LABEL(tryatomic_##op) : ldrex r0, [r12]; \
op(r2, r0, r1); \
strex r3, r2, [r12]; \
cmp r3, #0; \
bne LOCAL_LABEL(tryatomic_##op); \
dmb; \
bx lr

#define SYNC_OP_8(op) \
.p2align 2 ; \
.thumb ; \
.syntax unified ; \
DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \
push {r4, r5, r6, lr} ; \
dmb ; \
mov r12, r0 ; \
LOCAL_LABEL(tryatomic_ ## op): \
ldrexd r0, r1, [r12] ; \
op(r4, r5, r0, r1, r2, r3) ; \
strexd r6, r4, r5, [r12] ; \
cmp r6, #0 ; \
bne LOCAL_LABEL(tryatomic_ ## op) ; \
dmb ; \
pop {r4, r5, r6, pc}
#define SYNC_OP_8(op) \
.p2align 2; \
.thumb; \
.syntax unified; \
DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_##op) \
push{r4, r5, r6, lr}; \
dmb; \
mov r12, r0; \
LOCAL_LABEL(tryatomic_##op) : ldrexd r0, r1, [r12]; \
op(r4, r5, r0, r1, r2, r3); \
strexd r6, r4, r5, [r12]; \
cmp r6, #0; \
bne LOCAL_LABEL(tryatomic_##op); \
dmb; \
pop { r4, r5, r6, pc }

#define MINMAX_4(rD, rN, rM, cmp_kind) \
cmp rN, rM ; \
mov rD, rM ; \
it cmp_kind ; \
mov##cmp_kind rD, rN
#define MINMAX_4(rD, rN, rM, cmp_kind) \
cmp rN, rM; \
mov rD, rM; \
it cmp_kind; \
mov##cmp_kind rD, rN

#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \
cmp rN_LO, rM_LO ; \
sbcs rN_HI, rM_HI ; \
mov rD_LO, rM_LO ; \
mov rD_HI, rM_HI ; \
itt cmp_kind ; \
mov##cmp_kind rD_LO, rN_LO ; \
mov##cmp_kind rD_HI, rN_HI
#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \
cmp rN_LO, rM_LO; \
sbcs rN_HI, rM_HI; \
mov rD_LO, rM_LO; \
mov rD_HI, rM_HI; \
itt cmp_kind; \
mov##cmp_kind rD_LO, rN_LO; \
mov##cmp_kind rD_HI, rN_HI
40 changes: 19 additions & 21 deletions compiler-rt/lib/builtins/ashldi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,26 @@

/* Precondition: 0 <= b < bits_in_dword */

COMPILER_RT_ABI di_int
__ashldi3(di_int a, si_int b)
{
const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
dwords input;
dwords result;
input.all = a;
if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */
{
result.s.low = 0;
result.s.high = input.s.low << (b - bits_in_word);
}
else /* 0 <= b < bits_in_word */
{
if (b == 0)
return a;
result.s.low = input.s.low << b;
result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b));
}
return result.all;
COMPILER_RT_ABI di_int __ashldi3(di_int a, si_int b) {
const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
dwords input;
dwords result;
input.all = a;
if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */
{
result.s.low = 0;
result.s.high = input.s.low << (b - bits_in_word);
} else /* 0 <= b < bits_in_word */
{
if (b == 0)
return a;
result.s.low = input.s.low << b;
result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b));
}
return result.all;
}

#if defined(__ARM_EABI__)
AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) COMPILER_RT_ALIAS(__ashldi3);
AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b)
COMPILER_RT_ALIAS(__ashldi3);
#endif
37 changes: 17 additions & 20 deletions compiler-rt/lib/builtins/ashlti3.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,23 @@

/* Precondition: 0 <= b < bits_in_tword */

COMPILER_RT_ABI ti_int
__ashlti3(ti_int a, si_int b)
{
const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
twords input;
twords result;
input.all = a;
if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */
{
result.s.low = 0;
result.s.high = input.s.low << (b - bits_in_dword);
}
else /* 0 <= b < bits_in_dword */
{
if (b == 0)
return a;
result.s.low = input.s.low << b;
result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b));
}
return result.all;
COMPILER_RT_ABI ti_int __ashlti3(ti_int a, si_int b) {
const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
twords input;
twords result;
input.all = a;
if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */
{
result.s.low = 0;
result.s.high = input.s.low << (b - bits_in_dword);
} else /* 0 <= b < bits_in_dword */
{
if (b == 0)
return a;
result.s.low = input.s.low << b;
result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b));
}
return result.all;
}

#endif /* CRT_HAS_128BIT */
42 changes: 20 additions & 22 deletions compiler-rt/lib/builtins/ashrdi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,27 @@

/* Precondition: 0 <= b < bits_in_dword */

COMPILER_RT_ABI di_int
__ashrdi3(di_int a, si_int b)
{
const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
dwords input;
dwords result;
input.all = a;
if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */
{
/* result.s.high = input.s.high < 0 ? -1 : 0 */
result.s.high = input.s.high >> (bits_in_word - 1);
result.s.low = input.s.high >> (b - bits_in_word);
}
else /* 0 <= b < bits_in_word */
{
if (b == 0)
return a;
result.s.high = input.s.high >> b;
result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
}
return result.all;
COMPILER_RT_ABI di_int __ashrdi3(di_int a, si_int b) {
const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
dwords input;
dwords result;
input.all = a;
if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */
{
/* result.s.high = input.s.high < 0 ? -1 : 0 */
result.s.high = input.s.high >> (bits_in_word - 1);
result.s.low = input.s.high >> (b - bits_in_word);
} else /* 0 <= b < bits_in_word */
{
if (b == 0)
return a;
result.s.high = input.s.high >> b;
result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
}
return result.all;
}

#if defined(__ARM_EABI__)
AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) COMPILER_RT_ALIAS(__ashrdi3);
AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b)
COMPILER_RT_ALIAS(__ashrdi3);
#endif
39 changes: 18 additions & 21 deletions compiler-rt/lib/builtins/ashrti3.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,24 @@

/* Precondition: 0 <= b < bits_in_tword */

COMPILER_RT_ABI ti_int
__ashrti3(ti_int a, si_int b)
{
const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
twords input;
twords result;
input.all = a;
if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */
{
/* result.s.high = input.s.high < 0 ? -1 : 0 */
result.s.high = input.s.high >> (bits_in_dword - 1);
result.s.low = input.s.high >> (b - bits_in_dword);
}
else /* 0 <= b < bits_in_dword */
{
if (b == 0)
return a;
result.s.high = input.s.high >> b;
result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
}
return result.all;
COMPILER_RT_ABI ti_int __ashrti3(ti_int a, si_int b) {
const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
twords input;
twords result;
input.all = a;
if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */
{
/* result.s.high = input.s.high < 0 ? -1 : 0 */
result.s.high = input.s.high >> (bits_in_dword - 1);
result.s.low = input.s.high >> (b - bits_in_dword);
} else /* 0 <= b < bits_in_dword */
{
if (b == 0)
return a;
result.s.high = input.s.high >> b;
result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
}
return result.all;
}

#endif /* CRT_HAS_128BIT */
269 changes: 133 additions & 136 deletions compiler-rt/lib/builtins/atomic.c

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions compiler-rt/lib/builtins/bswapsi2.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
#include "int_lib.h"

COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
return (
(((u)&0xff000000) >> 24) |
(((u)&0x00ff0000) >> 8) |
(((u)&0x0000ff00) << 8) |
(((u)&0x000000ff) << 24));
return ((((u)&0xff000000) >> 24) |
(((u)&0x00ff0000) >> 8) |
(((u)&0x0000ff00) << 8) |
(((u)&0x000000ff) << 24));
}
215 changes: 106 additions & 109 deletions compiler-rt/lib/builtins/clear_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <stddef.h>

#if __APPLE__
#include <libkern/OSCacheControl.h>
#include <libkern/OSCacheControl.h>
#endif

#if defined(_WIN32)
Expand All @@ -24,73 +24,71 @@ uintptr_t GetCurrentProcess(void);
#endif

#if defined(__FreeBSD__) && defined(__arm__)
#include <sys/types.h>
#include <machine/sysarch.h>
#include <machine/sysarch.h>
#include <sys/types.h>
#endif

#if defined(__NetBSD__) && defined(__arm__)
#include <machine/sysarch.h>
#include <machine/sysarch.h>
#endif

#if defined(__OpenBSD__) && defined(__mips__)
#include <sys/types.h>
#include <machine/sysarch.h>
#include <machine/sysarch.h>
#include <sys/types.h>
#endif

#if defined(__linux__) && defined(__mips__)
#include <sys/cachectl.h>
#include <sys/syscall.h>
#include <unistd.h>
#if defined(__ANDROID__) && defined(__LP64__)
/*
* clear_mips_cache - Invalidates instruction cache for Mips.
*/
static void clear_mips_cache(const void* Addr, size_t Size) {
__asm__ volatile (
".set push\n"
".set noreorder\n"
".set noat\n"
"beq %[Size], $zero, 20f\n" /* If size == 0, branch around. */
"nop\n"
"daddu %[Size], %[Addr], %[Size]\n" /* Calculate end address + 1 */
"rdhwr $v0, $1\n" /* Get step size for SYNCI.
$1 is $HW_SYNCI_Step */
"beq $v0, $zero, 20f\n" /* If no caches require
synchronization, branch
around. */
"nop\n"
"10:\n"
"synci 0(%[Addr])\n" /* Synchronize all caches around
address. */
"daddu %[Addr], %[Addr], $v0\n" /* Add step size. */
"sltu $at, %[Addr], %[Size]\n" /* Compare current with end
address. */
"bne $at, $zero, 10b\n" /* Branch if more to do. */
"nop\n"
"sync\n" /* Clear memory hazards. */
"20:\n"
"bal 30f\n"
"nop\n"
"30:\n"
"daddiu $ra, $ra, 12\n" /* $ra has a value of $pc here.
Add offset of 12 to point to the
instruction after the last nop.
*/
"jr.hb $ra\n" /* Return, clearing instruction
hazards. */
"nop\n"
".set pop\n"
: [Addr] "+r"(Addr), [Size] "+r"(Size)
:: "at", "ra", "v0", "memory"
);
}
#endif
#include <sys/cachectl.h>
#include <sys/syscall.h>
#include <unistd.h>
#if defined(__ANDROID__) && defined(__LP64__)
/*
* clear_mips_cache - Invalidates instruction cache for Mips.
*/
static void clear_mips_cache(const void *Addr, size_t Size) {
__asm__ volatile(
".set push\n"
".set noreorder\n"
".set noat\n"
"beq %[Size], $zero, 20f\n" /* If size == 0, branch around. */
"nop\n"
"daddu %[Size], %[Addr], %[Size]\n" /* Calculate end address + 1 */
"rdhwr $v0, $1\n" /* Get step size for SYNCI.
$1 is $HW_SYNCI_Step */
"beq $v0, $zero, 20f\n" /* If no caches require
synchronization, branch
around. */
"nop\n"
"10:\n"
"synci 0(%[Addr])\n" /* Synchronize all caches around
address. */
"daddu %[Addr], %[Addr], $v0\n" /* Add step size. */
"sltu $at, %[Addr], %[Size]\n" /* Compare current with end
address. */
"bne $at, $zero, 10b\n" /* Branch if more to do. */
"nop\n"
"sync\n" /* Clear memory hazards. */
"20:\n"
"bal 30f\n"
"nop\n"
"30:\n"
"daddiu $ra, $ra, 12\n" /* $ra has a value of $pc here.
Add offset of 12 to point to the
instruction after the last nop.
*/
"jr.hb $ra\n" /* Return, clearing instruction
hazards. */
"nop\n"
".set pop\n"
: [ Addr ] "+r"(Addr), [ Size ] "+r"(Size)::"at", "ra", "v0", "memory");
}
#endif
#endif

/*
* The compiler generates calls to __clear_cache() when creating
* The compiler generates calls to __clear_cache() when creating
* trampoline functions on the stack for use with nested functions.
* It is expected to invalidate the instruction cache for the
* It is expected to invalidate the instruction cache for the
* specified range.
*/

Expand All @@ -101,56 +99,55 @@ void __clear_cache(void *start, void *end) {
* so there is nothing to do
*/
#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
FlushInstructionCache(GetCurrentProcess(), start, end - start);
FlushInstructionCache(GetCurrentProcess(), start, end - start);
#elif defined(__arm__) && !defined(__APPLE__)
#if defined(__FreeBSD__) || defined(__NetBSD__)
struct arm_sync_icache_args arg;

arg.addr = (uintptr_t)start;
arg.len = (uintptr_t)end - (uintptr_t)start;

sysarch(ARM_SYNC_ICACHE, &arg);
#elif defined(__linux__)
/*
* We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
* it also brought many other unused defines, as well as a dependency on
* kernel headers to be installed.
*
* This value is stable at least since Linux 3.13 and should remain so for
* compatibility reasons, warranting it's re-definition here.
*/
#define __ARM_NR_cacheflush 0x0f0002
register int start_reg __asm("r0") = (int) (intptr_t) start;
const register int end_reg __asm("r1") = (int) (intptr_t) end;
const register int flags __asm("r2") = 0;
const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
__asm __volatile("svc 0x0"
: "=r"(start_reg)
: "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
"r"(flags));
assert(start_reg == 0 && "Cache flush syscall failed.");
#else
compilerrt_abort();
#endif
#if defined(__FreeBSD__) || defined(__NetBSD__)
struct arm_sync_icache_args arg;

arg.addr = (uintptr_t)start;
arg.len = (uintptr_t)end - (uintptr_t)start;

sysarch(ARM_SYNC_ICACHE, &arg);
#elif defined(__linux__)
/*
* We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
* it also brought many other unused defines, as well as a dependency on
* kernel headers to be installed.
*
* This value is stable at least since Linux 3.13 and should remain so for
* compatibility reasons, warranting it's re-definition here.
*/
#define __ARM_NR_cacheflush 0x0f0002
register int start_reg __asm("r0") = (int)(intptr_t)start;
const register int end_reg __asm("r1") = (int)(intptr_t)end;
const register int flags __asm("r2") = 0;
const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
__asm __volatile("svc 0x0"
: "=r"(start_reg)
: "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags));
assert(start_reg == 0 && "Cache flush syscall failed.");
#else
compilerrt_abort();
#endif
#elif defined(__linux__) && defined(__mips__)
const uintptr_t start_int = (uintptr_t) start;
const uintptr_t end_int = (uintptr_t) end;
#if defined(__ANDROID__) && defined(__LP64__)
// Call synci implementation for short address range.
const uintptr_t address_range_limit = 256;
if ((end_int - start_int) <= address_range_limit) {
clear_mips_cache(start, (end_int - start_int));
} else {
syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
}
#else
syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
#endif
const uintptr_t start_int = (uintptr_t)start;
const uintptr_t end_int = (uintptr_t)end;
#if defined(__ANDROID__) && defined(__LP64__)
// Call synci implementation for short address range.
const uintptr_t address_range_limit = 256;
if ((end_int - start_int) <= address_range_limit) {
clear_mips_cache(start, (end_int - start_int));
} else {
syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
}
#else
syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
#endif
#elif defined(__mips__) && defined(__OpenBSD__)
cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE);
#elif defined(__aarch64__) && !defined(__APPLE__)
uint64_t xstart = (uint64_t)(uintptr_t) start;
uint64_t xend = (uint64_t)(uintptr_t) end;
uint64_t xstart = (uint64_t)(uintptr_t)start;
uint64_t xend = (uint64_t)(uintptr_t)end;
uint64_t addr;

// Get Cache Type Info
Expand All @@ -164,15 +161,15 @@ void __clear_cache(void *start, void *end) {
const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
for (addr = xstart & ~(dcache_line_size - 1); addr < xend;
addr += dcache_line_size)
__asm __volatile("dc cvau, %0" :: "r"(addr));
__asm __volatile("dc cvau, %0" ::"r"(addr));
__asm __volatile("dsb ish");

const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
for (addr = xstart & ~(icache_line_size - 1); addr < xend;
addr += icache_line_size)
__asm __volatile("ic ivau, %0" :: "r"(addr));
__asm __volatile("ic ivau, %0" ::"r"(addr));
__asm __volatile("isb sy");
#elif defined (__powerpc64__)
#elif defined(__powerpc64__)
const size_t line_size = 32;
const size_t len = (uintptr_t)end - (uintptr_t)start;

Expand All @@ -188,11 +185,11 @@ void __clear_cache(void *start, void *end) {
__asm__ volatile("icbi 0, %0" : : "r"(line));
__asm__ volatile("isync");
#else
#if __APPLE__
/* On Darwin, sys_icache_invalidate() provides this functionality */
sys_icache_invalidate(start, end-start);
#else
compilerrt_abort();
#endif
#if __APPLE__
/* On Darwin, sys_icache_invalidate() provides this functionality */
sys_icache_invalidate(start, end - start);
#else
compilerrt_abort();
#endif
#endif
}
17 changes: 7 additions & 10 deletions compiler-rt/lib/builtins/clzdi2.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
/* Returns: the number of leading 0-bits */

#if !defined(__clang__) && \
((defined(__sparc__) && defined(__arch64__)) || \
defined(__mips64) || \
((defined(__sparc__) && defined(__arch64__)) || defined(__mips64) || \
(defined(__riscv) && __SIZEOF_POINTER__ >= 8))
/* On 64-bit architectures with neither a native clz instruction nor a native
* ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than
Expand All @@ -28,12 +27,10 @@ extern si_int __clzsi2(si_int);

/* Precondition: a != 0 */

COMPILER_RT_ABI si_int
__clzdi2(di_int a)
{
dwords x;
x.all = a;
const si_int f = -(x.s.high == 0);
return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) +
(f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
COMPILER_RT_ABI si_int __clzdi2(di_int a) {
dwords x;
x.all = a;
const si_int f = -(x.s.high == 0);
return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) +
(f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
}
62 changes: 30 additions & 32 deletions compiler-rt/lib/builtins/clzsi2.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,34 @@

/* Precondition: a != 0 */

COMPILER_RT_ABI si_int
__clzsi2(si_int a)
{
su_int x = (su_int)a;
si_int t = ((x & 0xFFFF0000) == 0) << 4; /* if (x is small) t = 16 else 0 */
x >>= 16 - t; /* x = [0 - 0xFFFF] */
su_int r = t; /* r = [0, 16] */
/* return r + clz(x) */
t = ((x & 0xFF00) == 0) << 3;
x >>= 8 - t; /* x = [0 - 0xFF] */
r += t; /* r = [0, 8, 16, 24] */
/* return r + clz(x) */
t = ((x & 0xF0) == 0) << 2;
x >>= 4 - t; /* x = [0 - 0xF] */
r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */
/* return r + clz(x) */
t = ((x & 0xC) == 0) << 1;
x >>= 2 - t; /* x = [0 - 3] */
r += t; /* r = [0 - 30] and is even */
/* return r + clz(x) */
/* switch (x)
* {
* case 0:
* return r + 2;
* case 1:
* return r + 1;
* case 2:
* case 3:
* return r;
* }
*/
return r + ((2 - x) & -((x & 2) == 0));
COMPILER_RT_ABI si_int __clzsi2(si_int a) {
su_int x = (su_int)a;
si_int t = ((x & 0xFFFF0000) == 0) << 4; /* if (x is small) t = 16 else 0 */
x >>= 16 - t; /* x = [0 - 0xFFFF] */
su_int r = t; /* r = [0, 16] */
/* return r + clz(x) */
t = ((x & 0xFF00) == 0) << 3;
x >>= 8 - t; /* x = [0 - 0xFF] */
r += t; /* r = [0, 8, 16, 24] */
/* return r + clz(x) */
t = ((x & 0xF0) == 0) << 2;
x >>= 4 - t; /* x = [0 - 0xF] */
r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */
/* return r + clz(x) */
t = ((x & 0xC) == 0) << 1;
x >>= 2 - t; /* x = [0 - 3] */
r += t; /* r = [0 - 30] and is even */
/* return r + clz(x) */
/* switch (x)
* {
* case 0:
* return r + 2;
* case 1:
* return r + 1;
* case 2:
* case 3:
* return r;
* }
*/
return r + ((2 - x) & -((x & 2) == 0));
}
14 changes: 6 additions & 8 deletions compiler-rt/lib/builtins/clzti2.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@

/* Precondition: a != 0 */

COMPILER_RT_ABI si_int
__clzti2(ti_int a)
{
twords x;
x.all = a;
const di_int f = -(x.s.high == 0);
return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) +
((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
COMPILER_RT_ABI si_int __clzti2(ti_int a) {
twords x;
x.all = a;
const di_int f = -(x.s.high == 0);
return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) +
((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
}

#endif /* CRT_HAS_128BIT */
49 changes: 22 additions & 27 deletions compiler-rt/lib/builtins/cmpdi2.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,32 @@
#include "int_lib.h"

/* Returns: if (a < b) returns 0
* if (a == b) returns 1
* if (a > b) returns 2
*/
* if (a == b) returns 1
* if (a > b) returns 2
*/

COMPILER_RT_ABI si_int
__cmpdi2(di_int a, di_int b)
{
dwords x;
x.all = a;
dwords y;
y.all = b;
if (x.s.high < y.s.high)
return 0;
if (x.s.high > y.s.high)
return 2;
if (x.s.low < y.s.low)
return 0;
if (x.s.low > y.s.low)
return 2;
return 1;
COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) {
dwords x;
x.all = a;
dwords y;
y.all = b;
if (x.s.high < y.s.high)
return 0;
if (x.s.high > y.s.high)
return 2;
if (x.s.low < y.s.low)
return 0;
if (x.s.low > y.s.low)
return 2;
return 1;
}

#ifdef __ARM_EABI__
/* Returns: if (a < b) returns -1
* if (a == b) returns 0
* if (a > b) returns 1
*/
COMPILER_RT_ABI si_int
__aeabi_lcmp(di_int a, di_int b)
{
return __cmpdi2(a, b) - 1;
* if (a == b) returns 0
* if (a > b) returns 1
*/
COMPILER_RT_ABI si_int __aeabi_lcmp(di_int a, di_int b) {
return __cmpdi2(a, b) - 1;
}
#endif

30 changes: 14 additions & 16 deletions compiler-rt/lib/builtins/cmpti2.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,20 @@
* if (a > b) returns 2
*/

COMPILER_RT_ABI si_int
__cmpti2(ti_int a, ti_int b)
{
twords x;
x.all = a;
twords y;
y.all = b;
if (x.s.high < y.s.high)
return 0;
if (x.s.high > y.s.high)
return 2;
if (x.s.low < y.s.low)
return 0;
if (x.s.low > y.s.low)
return 2;
return 1;
COMPILER_RT_ABI si_int __cmpti2(ti_int a, ti_int b) {
twords x;
x.all = a;
twords y;
y.all = b;
if (x.s.high < y.s.high)
return 0;
if (x.s.high > y.s.high)
return 2;
if (x.s.low < y.s.low)
return 0;
if (x.s.low > y.s.low)
return 2;
return 1;
}

#endif /* CRT_HAS_128BIT */
162 changes: 78 additions & 84 deletions compiler-rt/lib/builtins/comparedf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,44 +39,46 @@
#define DOUBLE_PRECISION
#include "fp_lib.h"

enum LE_RESULT {
LE_LESS = -1,
LE_EQUAL = 0,
LE_GREATER = 1,
LE_UNORDERED = 1
};
enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERED = 1 };

COMPILER_RT_ABI enum LE_RESULT __ledf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

// If either a or b is NaN, they are unordered.
if (aAbs > infRep || bAbs > infRep)
return LE_UNORDERED;

COMPILER_RT_ABI enum LE_RESULT
__ledf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

// If either a or b is NaN, they are unordered.
if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;

// If a and b are both zeros, they are equal.
if ((aAbs | bAbs) == 0) return LE_EQUAL;

// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a floating-point compare.
if ((aInt & bInt) >= 0) {
if (aInt < bInt) return LE_LESS;
else if (aInt == bInt) return LE_EQUAL;
else return LE_GREATER;
}

// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
else {
if (aInt > bInt) return LE_LESS;
else if (aInt == bInt) return LE_EQUAL;
else return LE_GREATER;
}
// If a and b are both zeros, they are equal.
if ((aAbs | bAbs) == 0)
return LE_EQUAL;

// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a floating-point compare.
if ((aInt & bInt) >= 0) {
if (aInt < bInt)
return LE_LESS;
else if (aInt == bInt)
return LE_EQUAL;
else
return LE_GREATER;
}

// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
else {
if (aInt > bInt)
return LE_LESS;
else if (aInt == bInt)
return LE_EQUAL;
else
return LE_GREATER;
}
}

#if defined(__ELF__)
Expand All @@ -85,67 +87,59 @@ FNALIAS(__cmpdf2, __ledf2);
#endif

enum GE_RESULT {
GE_LESS = -1,
GE_EQUAL = 0,
GE_GREATER = 1,
GE_UNORDERED = -1 // Note: different from LE_UNORDERED
GE_LESS = -1,
GE_EQUAL = 0,
GE_GREATER = 1,
GE_UNORDERED = -1 // Note: different from LE_UNORDERED
};

COMPILER_RT_ABI enum GE_RESULT
__gedf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
if ((aAbs | bAbs) == 0) return GE_EQUAL;
if ((aInt & bInt) >= 0) {
if (aInt < bInt) return GE_LESS;
else if (aInt == bInt) return GE_EQUAL;
else return GE_GREATER;
} else {
if (aInt > bInt) return GE_LESS;
else if (aInt == bInt) return GE_EQUAL;
else return GE_GREATER;
}
COMPILER_RT_ABI enum GE_RESULT __gedf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

if (aAbs > infRep || bAbs > infRep)
return GE_UNORDERED;
if ((aAbs | bAbs) == 0)
return GE_EQUAL;
if ((aInt & bInt) >= 0) {
if (aInt < bInt)
return GE_LESS;
else if (aInt == bInt)
return GE_EQUAL;
else
return GE_GREATER;
} else {
if (aInt > bInt)
return GE_LESS;
else if (aInt == bInt)
return GE_EQUAL;
else
return GE_GREATER;
}
}

COMPILER_RT_ABI int
__unorddf2(fp_t a, fp_t b) {
const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;
return aAbs > infRep || bAbs > infRep;
COMPILER_RT_ABI int __unorddf2(fp_t a, fp_t b) {
const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;
return aAbs > infRep || bAbs > infRep;
}

// The following are alternative names for the preceding routines.

COMPILER_RT_ABI enum LE_RESULT
__eqdf2(fp_t a, fp_t b) {
return __ledf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __eqdf2(fp_t a, fp_t b) { return __ledf2(a, b); }

COMPILER_RT_ABI enum LE_RESULT
__ltdf2(fp_t a, fp_t b) {
return __ledf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __ltdf2(fp_t a, fp_t b) { return __ledf2(a, b); }

COMPILER_RT_ABI enum LE_RESULT
__nedf2(fp_t a, fp_t b) {
return __ledf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __nedf2(fp_t a, fp_t b) { return __ledf2(a, b); }

COMPILER_RT_ABI enum GE_RESULT
__gtdf2(fp_t a, fp_t b) {
return __gedf2(a, b);
}
COMPILER_RT_ABI enum GE_RESULT __gtdf2(fp_t a, fp_t b) { return __gedf2(a, b); }

#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) {
return __unorddf2(a, b);
}
AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { return __unorddf2(a, b); }
#else
AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unorddf2);
#endif
Expand Down
162 changes: 78 additions & 84 deletions compiler-rt/lib/builtins/comparesf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,44 +39,46 @@
#define SINGLE_PRECISION
#include "fp_lib.h"

enum LE_RESULT {
LE_LESS = -1,
LE_EQUAL = 0,
LE_GREATER = 1,
LE_UNORDERED = 1
};
enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERED = 1 };

COMPILER_RT_ABI enum LE_RESULT __lesf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

// If either a or b is NaN, they are unordered.
if (aAbs > infRep || bAbs > infRep)
return LE_UNORDERED;

COMPILER_RT_ABI enum LE_RESULT
__lesf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

// If either a or b is NaN, they are unordered.
if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;

// If a and b are both zeros, they are equal.
if ((aAbs | bAbs) == 0) return LE_EQUAL;

// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a fp_ting-point compare.
if ((aInt & bInt) >= 0) {
if (aInt < bInt) return LE_LESS;
else if (aInt == bInt) return LE_EQUAL;
else return LE_GREATER;
}

// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
else {
if (aInt > bInt) return LE_LESS;
else if (aInt == bInt) return LE_EQUAL;
else return LE_GREATER;
}
// If a and b are both zeros, they are equal.
if ((aAbs | bAbs) == 0)
return LE_EQUAL;

// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a fp_ting-point compare.
if ((aInt & bInt) >= 0) {
if (aInt < bInt)
return LE_LESS;
else if (aInt == bInt)
return LE_EQUAL;
else
return LE_GREATER;
}

// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
else {
if (aInt > bInt)
return LE_LESS;
else if (aInt == bInt)
return LE_EQUAL;
else
return LE_GREATER;
}
}

#if defined(__ELF__)
Expand All @@ -85,67 +87,59 @@ FNALIAS(__cmpsf2, __lesf2);
#endif

enum GE_RESULT {
GE_LESS = -1,
GE_EQUAL = 0,
GE_GREATER = 1,
GE_UNORDERED = -1 // Note: different from LE_UNORDERED
GE_LESS = -1,
GE_EQUAL = 0,
GE_GREATER = 1,
GE_UNORDERED = -1 // Note: different from LE_UNORDERED
};

COMPILER_RT_ABI enum GE_RESULT
__gesf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
if ((aAbs | bAbs) == 0) return GE_EQUAL;
if ((aInt & bInt) >= 0) {
if (aInt < bInt) return GE_LESS;
else if (aInt == bInt) return GE_EQUAL;
else return GE_GREATER;
} else {
if (aInt > bInt) return GE_LESS;
else if (aInt == bInt) return GE_EQUAL;
else return GE_GREATER;
}
COMPILER_RT_ABI enum GE_RESULT __gesf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

if (aAbs > infRep || bAbs > infRep)
return GE_UNORDERED;
if ((aAbs | bAbs) == 0)
return GE_EQUAL;
if ((aInt & bInt) >= 0) {
if (aInt < bInt)
return GE_LESS;
else if (aInt == bInt)
return GE_EQUAL;
else
return GE_GREATER;
} else {
if (aInt > bInt)
return GE_LESS;
else if (aInt == bInt)
return GE_EQUAL;
else
return GE_GREATER;
}
}

COMPILER_RT_ABI int
__unordsf2(fp_t a, fp_t b) {
const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;
return aAbs > infRep || bAbs > infRep;
COMPILER_RT_ABI int __unordsf2(fp_t a, fp_t b) {
const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;
return aAbs > infRep || bAbs > infRep;
}

// The following are alternative names for the preceding routines.

COMPILER_RT_ABI enum LE_RESULT
__eqsf2(fp_t a, fp_t b) {
return __lesf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __eqsf2(fp_t a, fp_t b) { return __lesf2(a, b); }

COMPILER_RT_ABI enum LE_RESULT
__ltsf2(fp_t a, fp_t b) {
return __lesf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __ltsf2(fp_t a, fp_t b) { return __lesf2(a, b); }

COMPILER_RT_ABI enum LE_RESULT
__nesf2(fp_t a, fp_t b) {
return __lesf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __nesf2(fp_t a, fp_t b) { return __lesf2(a, b); }

COMPILER_RT_ABI enum GE_RESULT
__gtsf2(fp_t a, fp_t b) {
return __gesf2(a, b);
}
COMPILER_RT_ABI enum GE_RESULT __gtsf2(fp_t a, fp_t b) { return __gesf2(a, b); }

#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) {
return __unordsf2(a, b);
}
AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) { return __unordsf2(a, b); }
#else
AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unordsf2);
#endif
Expand Down
138 changes: 70 additions & 68 deletions compiler-rt/lib/builtins/comparetf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,42 +40,44 @@
#include "fp_lib.h"

#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
enum LE_RESULT {
LE_LESS = -1,
LE_EQUAL = 0,
LE_GREATER = 1,
LE_UNORDERED = 1
};
enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERED = 1 };

COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

// If either a or b is NaN, they are unordered.
if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;

// If a and b are both zeros, they are equal.
if ((aAbs | bAbs) == 0) return LE_EQUAL;

// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a floating-point compare.
if ((aInt & bInt) >= 0) {
if (aInt < bInt) return LE_LESS;
else if (aInt == bInt) return LE_EQUAL;
else return LE_GREATER;
}
else {
// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
if (aInt > bInt) return LE_LESS;
else if (aInt == bInt) return LE_EQUAL;
else return LE_GREATER;
}
const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

// If either a or b is NaN, they are unordered.
if (aAbs > infRep || bAbs > infRep)
return LE_UNORDERED;

// If a and b are both zeros, they are equal.
if ((aAbs | bAbs) == 0)
return LE_EQUAL;

// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a floating-point compare.
if ((aInt & bInt) >= 0) {
if (aInt < bInt)
return LE_LESS;
else if (aInt == bInt)
return LE_EQUAL;
else
return LE_GREATER;
} else {
// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
if (aInt > bInt)
return LE_LESS;
else if (aInt == bInt)
return LE_EQUAL;
else
return LE_GREATER;
}
}

#if defined(__ELF__)
Expand All @@ -84,54 +86,54 @@ FNALIAS(__cmptf2, __letf2);
#endif

enum GE_RESULT {
GE_LESS = -1,
GE_EQUAL = 0,
GE_GREATER = 1,
GE_UNORDERED = -1 // Note: different from LE_UNORDERED
GE_LESS = -1,
GE_EQUAL = 0,
GE_GREATER = 1,
GE_UNORDERED = -1 // Note: different from LE_UNORDERED
};

COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) {

const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
if ((aAbs | bAbs) == 0) return GE_EQUAL;
if ((aInt & bInt) >= 0) {
if (aInt < bInt) return GE_LESS;
else if (aInt == bInt) return GE_EQUAL;
else return GE_GREATER;
} else {
if (aInt > bInt) return GE_LESS;
else if (aInt == bInt) return GE_EQUAL;
else return GE_GREATER;
}
const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
const rep_t aAbs = aInt & absMask;
const rep_t bAbs = bInt & absMask;

if (aAbs > infRep || bAbs > infRep)
return GE_UNORDERED;
if ((aAbs | bAbs) == 0)
return GE_EQUAL;
if ((aInt & bInt) >= 0) {
if (aInt < bInt)
return GE_LESS;
else if (aInt == bInt)
return GE_EQUAL;
else
return GE_GREATER;
} else {
if (aInt > bInt)
return GE_LESS;
else if (aInt == bInt)
return GE_EQUAL;
else
return GE_GREATER;
}
}

COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) {
const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;
return aAbs > infRep || bAbs > infRep;
const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;
return aAbs > infRep || bAbs > infRep;
}

// The following are alternative names for the preceding routines.

COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) {
return __letf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) { return __letf2(a, b); }

COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) {
return __letf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) { return __letf2(a, b); }

COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) {
return __letf2(a, b);
}
COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) { return __letf2(a, b); }

COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) {
return __getf2(a, b);
}
COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) { return __getf2(a, b); }

#endif
46 changes: 22 additions & 24 deletions compiler-rt/lib/builtins/cpu_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//

#if (defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64__) || defined(_M_X64)) && \
#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
defined(_M_X64)) && \
(defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))

#include <assert.h>
Expand Down Expand Up @@ -267,11 +267,11 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
}
}

static void
getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
unsigned Brand_id, unsigned Features,
unsigned Features2, unsigned *Type,
unsigned *Subtype) {
static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
unsigned Brand_id,
unsigned Features,
unsigned Features2, unsigned *Type,
unsigned *Subtype) {
if (Brand_id != 0)
return;
switch (Family) {
Expand All @@ -297,7 +297,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
// As found in a Summer 2010 model iMac.
case 0x1f:
case 0x2e: // Nehalem EX
case 0x2e: // Nehalem EX
*Type = INTEL_COREI7; // "nehalem"
*Subtype = INTEL_COREI7_NEHALEM;
break;
Expand All @@ -315,7 +315,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = INTEL_COREI7_SANDYBRIDGE;
break;
case 0x3a:
case 0x3e: // Ivy Bridge EP
case 0x3e: // Ivy Bridge EP
*Type = INTEL_COREI7; // "ivybridge"
*Subtype = INTEL_COREI7_IVYBRIDGE;
break;
Expand All @@ -339,10 +339,10 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;

// Skylake:
case 0x4e: // Skylake mobile
case 0x5e: // Skylake desktop
case 0x8e: // Kaby Lake mobile
case 0x9e: // Kaby Lake desktop
case 0x4e: // Skylake mobile
case 0x5e: // Skylake desktop
case 0x8e: // Kaby Lake mobile
case 0x9e: // Kaby Lake desktop
*Type = INTEL_COREI7; // "skylake"
*Subtype = INTEL_COREI7_SKYLAKE;
break;
Expand Down Expand Up @@ -398,7 +398,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,

default: // Unknown family 6 CPU.
break;
break;
break;
}
default:
break; // Unknown.
Expand Down Expand Up @@ -474,12 +474,12 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
unsigned Features2 = 0;
unsigned EAX, EBX;

#define setFeature(F) \
do { \
if (F < 32) \
Features |= 1U << (F & 0x1f); \
else if (F < 64) \
Features2 |= 1U << ((F - 32) & 0x1f); \
#define setFeature(F) \
do { \
if (F < 32) \
Features |= 1U << (F & 0x1f); \
else if (F < 64) \
Features2 |= 1U << ((F - 32) & 0x1f); \
} while (0)

if ((EDX >> 15) & 1)
Expand Down Expand Up @@ -618,8 +618,7 @@ unsigned int __cpu_features2;
the priority set. However, it still runs after ifunc initializers and
needs to be called explicitly there. */

int CONSTRUCTOR_ATTRIBUTE
__cpu_indicator_init(void) {
int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
unsigned EAX, EBX, ECX, EDX;
unsigned MaxLeaf = 5;
unsigned Vendor;
Expand Down Expand Up @@ -651,8 +650,7 @@ __cpu_indicator_init(void) {
if (Vendor == SIG_INTEL) {
/* Get CPU type. */
getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
Features2,
&(__cpu_model.__cpu_type),
Features2, &(__cpu_model.__cpu_type),
&(__cpu_model.__cpu_subtype));
__cpu_model.__cpu_vendor = VENDOR_INTEL;
} else if (Vendor == SIG_AMD) {
Expand Down
17 changes: 7 additions & 10 deletions compiler-rt/lib/builtins/ctzdi2.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
/* Returns: the number of trailing 0-bits */

#if !defined(__clang__) && \
((defined(__sparc__) && defined(__arch64__)) || \
defined(__mips64) || \
((defined(__sparc__) && defined(__arch64__)) || defined(__mips64) || \
(defined(__riscv) && __SIZEOF_POINTER__ >= 8))
/* On 64-bit architectures with neither a native clz instruction nor a native
* ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than
Expand All @@ -28,12 +27,10 @@ extern si_int __ctzsi2(si_int);

/* Precondition: a != 0 */

COMPILER_RT_ABI si_int
__ctzdi2(di_int a)
{
dwords x;
x.all = a;
const si_int f = -(x.s.low == 0);
return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) +
(f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
COMPILER_RT_ABI si_int __ctzdi2(di_int a) {
dwords x;
x.all = a;
const si_int f = -(x.s.low == 0);
return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) +
(f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
}
69 changes: 34 additions & 35 deletions compiler-rt/lib/builtins/ctzsi2.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,40 +17,39 @@

/* Precondition: a != 0 */

COMPILER_RT_ABI si_int
__ctzsi2(si_int a)
{
su_int x = (su_int)a;
si_int t = ((x & 0x0000FFFF) == 0) << 4; /* if (x has no small bits) t = 16 else 0 */
x >>= t; /* x = [0 - 0xFFFF] + higher garbage bits */
su_int r = t; /* r = [0, 16] */
/* return r + ctz(x) */
t = ((x & 0x00FF) == 0) << 3;
x >>= t; /* x = [0 - 0xFF] + higher garbage bits */
r += t; /* r = [0, 8, 16, 24] */
/* return r + ctz(x) */
t = ((x & 0x0F) == 0) << 2;
x >>= t; /* x = [0 - 0xF] + higher garbage bits */
r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */
/* return r + ctz(x) */
t = ((x & 0x3) == 0) << 1;
x >>= t;
x &= 3; /* x = [0 - 3] */
r += t; /* r = [0 - 30] and is even */
/* return r + ctz(x) */
COMPILER_RT_ABI si_int __ctzsi2(si_int a) {
su_int x = (su_int)a;
si_int t = ((x & 0x0000FFFF) == 0)
<< 4; /* if (x has no small bits) t = 16 else 0 */
x >>= t; /* x = [0 - 0xFFFF] + higher garbage bits */
su_int r = t; /* r = [0, 16] */
/* return r + ctz(x) */
t = ((x & 0x00FF) == 0) << 3;
x >>= t; /* x = [0 - 0xFF] + higher garbage bits */
r += t; /* r = [0, 8, 16, 24] */
/* return r + ctz(x) */
t = ((x & 0x0F) == 0) << 2;
x >>= t; /* x = [0 - 0xF] + higher garbage bits */
r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */
/* return r + ctz(x) */
t = ((x & 0x3) == 0) << 1;
x >>= t;
x &= 3; /* x = [0 - 3] */
r += t; /* r = [0 - 30] and is even */
/* return r + ctz(x) */

/* The branch-less return statement below is equivalent
* to the following switch statement:
* switch (x)
* {
* case 0:
* return r + 2;
* case 2:
* return r + 1;
* case 1:
* case 3:
* return r;
* }
*/
return r + ((2 - (x >> 1)) & -((x & 1) == 0));
/* The branch-less return statement below is equivalent
* to the following switch statement:
* switch (x)
* {
* case 0:
* return r + 2;
* case 2:
* return r + 1;
* case 1:
* case 3:
* return r;
* }
*/
return r + ((2 - (x >> 1)) & -((x & 1) == 0));
}
14 changes: 6 additions & 8 deletions compiler-rt/lib/builtins/ctzti2.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@

/* Precondition: a != 0 */

COMPILER_RT_ABI si_int
__ctzti2(ti_int a)
{
twords x;
x.all = a;
const di_int f = -(x.s.low == 0);
return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) +
((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
COMPILER_RT_ABI si_int __ctzti2(ti_int a) {
twords x;
x.all = a;
const di_int f = -(x.s.low == 0);
return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) +
((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
}

#endif /* CRT_HAS_128BIT */
71 changes: 32 additions & 39 deletions compiler-rt/lib/builtins/divdc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,44 +18,37 @@

/* Returns: the quotient of (a + ib) / (c + id) */

COMPILER_RT_ABI Dcomplex
__divdc3(double __a, double __b, double __c, double __d)
{
int __ilogbw = 0;
double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d)));
if (crt_isfinite(__logbw))
{
__ilogbw = (int)__logbw;
__c = crt_scalbn(__c, -__ilogbw);
__d = crt_scalbn(__d, -__ilogbw);
COMPILER_RT_ABI Dcomplex __divdc3(double __a, double __b, double __c,
double __d) {
int __ilogbw = 0;
double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d)));
if (crt_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = crt_scalbn(__c, -__ilogbw);
__d = crt_scalbn(__d, -__ilogbw);
}
double __denom = __c * __c + __d * __d;
Dcomplex z;
COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) =
crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a;
COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b;
} else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) &&
crt_isfinite(__d)) {
__a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a);
__b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b);
COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
} else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) &&
crt_isfinite(__b)) {
__c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c);
__d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d);
COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
}
double __denom = __c * __c + __d * __d;
Dcomplex z;
COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
{
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
{
COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a;
COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b;
}
else if ((crt_isinf(__a) || crt_isinf(__b)) &&
crt_isfinite(__c) && crt_isfinite(__d))
{
__a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a);
__b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b);
COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
}
else if (crt_isinf(__logbw) && __logbw > 0.0 &&
crt_isfinite(__a) && crt_isfinite(__b))
{
__c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c);
__d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d);
COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
}
}
return z;
}
return z;
}
348 changes: 178 additions & 170 deletions compiler-rt/lib/builtins/divdf3.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,186 +18,194 @@
#define DOUBLE_PRECISION
#include "fp_lib.h"

COMPILER_RT_ABI fp_t
__divdf3(fp_t a, fp_t b) {

const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;

rep_t aSignificand = toRep(a) & significandMask;
rep_t bSignificand = toRep(b) & significandMask;
int scale = 0;

// Detect if a or b is zero, denormal, infinity, or NaN.
if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {

const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;

// NaN / anything = qNaN
if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
// anything / NaN = qNaN
if (bAbs > infRep) return fromRep(toRep(b) | quietBit);

if (aAbs == infRep) {
// infinity / infinity = NaN
if (bAbs == infRep) return fromRep(qnanRep);
// infinity / anything else = +/- infinity
else return fromRep(aAbs | quotientSign);
}

// anything else / infinity = +/- 0
if (bAbs == infRep) return fromRep(quotientSign);

if (!aAbs) {
// zero / zero = NaN
if (!bAbs) return fromRep(qnanRep);
// zero / anything else = +/- zero
else return fromRep(quotientSign);
}
// anything else / zero = +/- infinity
if (!bAbs) return fromRep(infRep | quotientSign);

// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if (aAbs < implicitBit) scale += normalize(&aSignificand);
if (bAbs < implicitBit) scale -= normalize(&bSignificand);
COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) {

const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;

rep_t aSignificand = toRep(a) & significandMask;
rep_t bSignificand = toRep(b) & significandMask;
int scale = 0;

// Detect if a or b is zero, denormal, infinity, or NaN.
if (aExponent - 1U >= maxExponent - 1U ||
bExponent - 1U >= maxExponent - 1U) {

const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;

// NaN / anything = qNaN
if (aAbs > infRep)
return fromRep(toRep(a) | quietBit);
// anything / NaN = qNaN
if (bAbs > infRep)
return fromRep(toRep(b) | quietBit);

if (aAbs == infRep) {
// infinity / infinity = NaN
if (bAbs == infRep)
return fromRep(qnanRep);
// infinity / anything else = +/- infinity
else
return fromRep(aAbs | quotientSign);
}

// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
aSignificand |= implicitBit;
bSignificand |= implicitBit;
int quotientExponent = aExponent - bExponent + scale;

// Align the significand of b as a Q31 fixed-point number in the range
// [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
// is accurate to about 3.5 binary digits.
const uint32_t q31b = bSignificand >> 21;
uint32_t recip32 = UINT32_C(0x7504f333) - q31b;

// Now refine the reciprocal estimate using a Newton-Raphson iteration:
//
// x1 = x0 * (2 - x0 * b)
//
// This doubles the number of correct binary digits in the approximation
// with each iteration, so after three iterations, we have about 28 binary
// digits of accuracy.
uint32_t correction32;
correction32 = -((uint64_t)recip32 * q31b >> 32);
recip32 = (uint64_t)recip32 * correction32 >> 31;
correction32 = -((uint64_t)recip32 * q31b >> 32);
recip32 = (uint64_t)recip32 * correction32 >> 31;
correction32 = -((uint64_t)recip32 * q31b >> 32);
recip32 = (uint64_t)recip32 * correction32 >> 31;

// recip32 might have overflowed to exactly zero in the preceding
// computation if the high word of b is exactly 1.0. This would sabotage
// the full-width final stage of the computation that follows, so we adjust
// recip32 downward by one bit.
recip32--;

// We need to perform one more iteration to get us to 56 binary digits;
// The last iteration needs to happen with extra precision.
const uint32_t q63blo = bSignificand << 11;
uint64_t correction, reciprocal;
correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32));
uint32_t cHi = correction >> 32;
uint32_t cLo = correction;
reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32);

// We already adjusted the 32-bit estimate, now we need to adjust the final
// 64-bit reciprocal estimate downward to ensure that it is strictly smaller
// than the infinitely precise exact reciprocal. Because the computation
// of the Newton-Raphson step is truncating at every step, this adjustment
// is small; most of the work is already done.
reciprocal -= 2;

// The numerical reciprocal is accurate to within 2^-56, lies in the
// interval [0.5, 1.0), and is strictly smaller than the true reciprocal
// of b. Multiplying a by this reciprocal thus gives a numerical q = a/b
// in Q53 with the following properties:
//
// 1. q < a/b
// 2. q is in the interval [0.5, 2.0)
// 3. the error in q is bounded away from 2^-53 (actually, we have a
// couple of bits to spare, but this is all we need).

// We need a 64 x 64 multiply high to compute q, which isn't a basic
// operation in C, so we need to be a little bit fussy.
rep_t quotient, quotientLo;
wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);

// Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
// In either case, we are going to compute a residual of the form
//
// r = a - q*b
//
// We know from the construction of q that r satisfies:
//
// 0 <= r < ulp(q)*b
//
// if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
// already have the correct result. The exact halfway case cannot occur.
// We also take this time to right shift quotient if it falls in the [1,2)
// range and adjust the exponent accordingly.
rep_t residual;
if (quotient < (implicitBit << 1)) {
residual = (aSignificand << 53) - quotient * bSignificand;
quotientExponent--;
} else {
quotient >>= 1;
residual = (aSignificand << 52) - quotient * bSignificand;
}

const int writtenExponent = quotientExponent + exponentBias;

if (writtenExponent >= maxExponent) {
// If we have overflowed the exponent, return infinity.
return fromRep(infRep | quotientSign);
}
// anything else / infinity = +/- 0
if (bAbs == infRep)
return fromRep(quotientSign);

else if (writtenExponent < 1) {
if (writtenExponent == 0) {
// Check whether the rounded result is normal.
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit.
rep_t absResult = quotient & significandMask;
// Round.
absResult += round;
if (absResult & ~significandMask) {
// The rounded result is normal; return it.
return fromRep(absResult | quotientSign);
}
}
// Flush denormals to zero. In the future, it would be nice to add
// code to round them correctly.
if (!aAbs) {
// zero / zero = NaN
if (!bAbs)
return fromRep(qnanRep);
// zero / anything else = +/- zero
else
return fromRep(quotientSign);
}

else {
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit
rep_t absResult = quotient & significandMask;
// Insert the exponent
absResult |= (rep_t)writtenExponent << significandBits;
// Round
absResult += round;
// Insert the sign and return
const double result = fromRep(absResult | quotientSign);
return result;
// anything else / zero = +/- infinity
if (!bAbs)
return fromRep(infRep | quotientSign);

// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if (aAbs < implicitBit)
scale += normalize(&aSignificand);
if (bAbs < implicitBit)
scale -= normalize(&bSignificand);
}

// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
aSignificand |= implicitBit;
bSignificand |= implicitBit;
int quotientExponent = aExponent - bExponent + scale;

// Align the significand of b as a Q31 fixed-point number in the range
// [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
// is accurate to about 3.5 binary digits.
const uint32_t q31b = bSignificand >> 21;
uint32_t recip32 = UINT32_C(0x7504f333) - q31b;

// Now refine the reciprocal estimate using a Newton-Raphson iteration:
//
// x1 = x0 * (2 - x0 * b)
//
// This doubles the number of correct binary digits in the approximation
// with each iteration, so after three iterations, we have about 28 binary
// digits of accuracy.
uint32_t correction32;
correction32 = -((uint64_t)recip32 * q31b >> 32);
recip32 = (uint64_t)recip32 * correction32 >> 31;
correction32 = -((uint64_t)recip32 * q31b >> 32);
recip32 = (uint64_t)recip32 * correction32 >> 31;
correction32 = -((uint64_t)recip32 * q31b >> 32);
recip32 = (uint64_t)recip32 * correction32 >> 31;

// recip32 might have overflowed to exactly zero in the preceding
// computation if the high word of b is exactly 1.0. This would sabotage
// the full-width final stage of the computation that follows, so we adjust
// recip32 downward by one bit.
recip32--;

// We need to perform one more iteration to get us to 56 binary digits;
// The last iteration needs to happen with extra precision.
const uint32_t q63blo = bSignificand << 11;
uint64_t correction, reciprocal;
correction = -((uint64_t)recip32 * q31b + ((uint64_t)recip32 * q63blo >> 32));
uint32_t cHi = correction >> 32;
uint32_t cLo = correction;
reciprocal = (uint64_t)recip32 * cHi + ((uint64_t)recip32 * cLo >> 32);

// We already adjusted the 32-bit estimate, now we need to adjust the final
// 64-bit reciprocal estimate downward to ensure that it is strictly smaller
// than the infinitely precise exact reciprocal. Because the computation
// of the Newton-Raphson step is truncating at every step, this adjustment
// is small; most of the work is already done.
reciprocal -= 2;

// The numerical reciprocal is accurate to within 2^-56, lies in the
// interval [0.5, 1.0), and is strictly smaller than the true reciprocal
// of b. Multiplying a by this reciprocal thus gives a numerical q = a/b
// in Q53 with the following properties:
//
// 1. q < a/b
// 2. q is in the interval [0.5, 2.0)
// 3. the error in q is bounded away from 2^-53 (actually, we have a
// couple of bits to spare, but this is all we need).

// We need a 64 x 64 multiply high to compute q, which isn't a basic
// operation in C, so we need to be a little bit fussy.
rep_t quotient, quotientLo;
wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);

// Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
// In either case, we are going to compute a residual of the form
//
// r = a - q*b
//
// We know from the construction of q that r satisfies:
//
// 0 <= r < ulp(q)*b
//
// if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
// already have the correct result. The exact halfway case cannot occur.
// We also take this time to right shift quotient if it falls in the [1,2)
// range and adjust the exponent accordingly.
rep_t residual;
if (quotient < (implicitBit << 1)) {
residual = (aSignificand << 53) - quotient * bSignificand;
quotientExponent--;
} else {
quotient >>= 1;
residual = (aSignificand << 52) - quotient * bSignificand;
}

const int writtenExponent = quotientExponent + exponentBias;

if (writtenExponent >= maxExponent) {
// If we have overflowed the exponent, return infinity.
return fromRep(infRep | quotientSign);
}

else if (writtenExponent < 1) {
if (writtenExponent == 0) {
// Check whether the rounded result is normal.
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit.
rep_t absResult = quotient & significandMask;
// Round.
absResult += round;
if (absResult & ~significandMask) {
// The rounded result is normal; return it.
return fromRep(absResult | quotientSign);
}
}
// Flush denormals to zero. In the future, it would be nice to add
// code to round them correctly.
return fromRep(quotientSign);
}

else {
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit
rep_t absResult = quotient & significandMask;
// Insert the exponent
absResult |= (rep_t)writtenExponent << significandBits;
// Round
absResult += round;
// Insert the sign and return
const double result = fromRep(absResult | quotientSign);
return result;
}
}

#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) {
return __divdf3(a, b);
}
AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { return __divdf3(a, b); }
#else
AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divdf3);
#endif
Expand Down
19 changes: 9 additions & 10 deletions compiler-rt/lib/builtins/divdi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@

/* Returns: a / b */

COMPILER_RT_ABI di_int
__divdi3(di_int a, di_int b)
{
const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */
di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */
a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
s_a ^= s_b; /*sign of quotient */
return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */
COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) {
const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */
di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */
a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
s_a ^= s_b; /*sign of quotient */
return (__udivmoddi4(a, b, (du_int *)0) ^ s_a) -
s_a; /* negate if s_a == -1 */
}
8 changes: 3 additions & 5 deletions compiler-rt/lib/builtins/divmoddi4.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@

/* Returns: a / b, *rem = a % b */

COMPILER_RT_ABI di_int
__divmoddi4(di_int a, di_int b, di_int* rem)
{
di_int d = __divdi3(a,b);
*rem = a - (d*b);
COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int *rem) {
di_int d = __divdi3(a, b);
*rem = a - (d * b);
return d;
}
12 changes: 4 additions & 8 deletions compiler-rt/lib/builtins/divmodsi4.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,8 @@

/* Returns: a / b, *rem = a % b */

COMPILER_RT_ABI si_int
__divmodsi4(si_int a, si_int b, si_int* rem)
{
si_int d = __divsi3(a,b);
*rem = a - (d*b);
return d;
COMPILER_RT_ABI si_int __divmodsi4(si_int a, si_int b, si_int *rem) {
si_int d = __divsi3(a, b);
*rem = a - (d * b);
return d;
}


72 changes: 32 additions & 40 deletions compiler-rt/lib/builtins/divsc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,45 +18,37 @@

/* Returns: the quotient of (a + ib) / (c + id) */

COMPILER_RT_ABI Fcomplex
__divsc3(float __a, float __b, float __c, float __d)
{
int __ilogbw = 0;
float __logbw =
__compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d)));
if (crt_isfinite(__logbw))
{
__ilogbw = (int)__logbw;
__c = crt_scalbnf(__c, -__ilogbw);
__d = crt_scalbnf(__d, -__ilogbw);
COMPILER_RT_ABI Fcomplex __divsc3(float __a, float __b, float __c, float __d) {
int __ilogbw = 0;
float __logbw =
__compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d)));
if (crt_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = crt_scalbnf(__c, -__ilogbw);
__d = crt_scalbnf(__d, -__ilogbw);
}
float __denom = __c * __c + __d * __d;
Fcomplex z;
COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) =
crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a;
COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b;
} else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) &&
crt_isfinite(__d)) {
__a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
__b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
} else if (crt_isinf(__logbw) && __logbw > 0 && crt_isfinite(__a) &&
crt_isfinite(__b)) {
__c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
__d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
}
float __denom = __c * __c + __d * __d;
Fcomplex z;
COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
{
if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
{
COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a;
COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b;
}
else if ((crt_isinf(__a) || crt_isinf(__b)) &&
crt_isfinite(__c) && crt_isfinite(__d))
{
__a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
__b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
}
else if (crt_isinf(__logbw) && __logbw > 0 &&
crt_isfinite(__a) && crt_isfinite(__b))
{
__c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
__d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
}
}
return z;
}
return z;
}
314 changes: 161 additions & 153 deletions compiler-rt/lib/builtins/divsf3.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,170 +18,178 @@
#define SINGLE_PRECISION
#include "fp_lib.h"

COMPILER_RT_ABI fp_t
__divsf3(fp_t a, fp_t b) {

const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;

rep_t aSignificand = toRep(a) & significandMask;
rep_t bSignificand = toRep(b) & significandMask;
int scale = 0;

// Detect if a or b is zero, denormal, infinity, or NaN.
if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {

const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;

// NaN / anything = qNaN
if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
// anything / NaN = qNaN
if (bAbs > infRep) return fromRep(toRep(b) | quietBit);

if (aAbs == infRep) {
// infinity / infinity = NaN
if (bAbs == infRep) return fromRep(qnanRep);
// infinity / anything else = +/- infinity
else return fromRep(aAbs | quotientSign);
}

// anything else / infinity = +/- 0
if (bAbs == infRep) return fromRep(quotientSign);

if (!aAbs) {
// zero / zero = NaN
if (!bAbs) return fromRep(qnanRep);
// zero / anything else = +/- zero
else return fromRep(quotientSign);
}
// anything else / zero = +/- infinity
if (!bAbs) return fromRep(infRep | quotientSign);

// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if (aAbs < implicitBit) scale += normalize(&aSignificand);
if (bAbs < implicitBit) scale -= normalize(&bSignificand);
COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) {

const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;

rep_t aSignificand = toRep(a) & significandMask;
rep_t bSignificand = toRep(b) & significandMask;
int scale = 0;

// Detect if a or b is zero, denormal, infinity, or NaN.
if (aExponent - 1U >= maxExponent - 1U ||
bExponent - 1U >= maxExponent - 1U) {

const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;

// NaN / anything = qNaN
if (aAbs > infRep)
return fromRep(toRep(a) | quietBit);
// anything / NaN = qNaN
if (bAbs > infRep)
return fromRep(toRep(b) | quietBit);

if (aAbs == infRep) {
// infinity / infinity = NaN
if (bAbs == infRep)
return fromRep(qnanRep);
// infinity / anything else = +/- infinity
else
return fromRep(aAbs | quotientSign);
}

// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
aSignificand |= implicitBit;
bSignificand |= implicitBit;
int quotientExponent = aExponent - bExponent + scale;

// Align the significand of b as a Q31 fixed-point number in the range
// [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
// is accurate to about 3.5 binary digits.
uint32_t q31b = bSignificand << 8;
uint32_t reciprocal = UINT32_C(0x7504f333) - q31b;

// Now refine the reciprocal estimate using a Newton-Raphson iteration:
//
// x1 = x0 * (2 - x0 * b)
//
// This doubles the number of correct binary digits in the approximation
// with each iteration, so after three iterations, we have about 28 binary
// digits of accuracy.
uint32_t correction;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;

// Exhaustive testing shows that the error in reciprocal after three steps
// is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our
// expectations. We bump the reciprocal by a tiny value to force the error
// to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to
// be specific). This also causes 1/1 to give a sensible approximation
// instead of zero (due to overflow).
reciprocal -= 2;

// The numerical reciprocal is accurate to within 2^-28, lies in the
// interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller
// than the true reciprocal of b. Multiplying a by this reciprocal thus
// gives a numerical q = a/b in Q24 with the following properties:
//
// 1. q < a/b
// 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)
// 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes
// from the fact that we truncate the product, and the 2^27 term
// is the error in the reciprocal of b scaled by the maximum
// possible value of a. As a consequence of this error bound,
// either q or nextafter(q) is the correctly rounded
rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32;

// Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
// In either case, we are going to compute a residual of the form
//
// r = a - q*b
//
// We know from the construction of q that r satisfies:
//
// 0 <= r < ulp(q)*b
//
// if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
// already have the correct result. The exact halfway case cannot occur.
// We also take this time to right shift quotient if it falls in the [1,2)
// range and adjust the exponent accordingly.
rep_t residual;
if (quotient < (implicitBit << 1)) {
residual = (aSignificand << 24) - quotient * bSignificand;
quotientExponent--;
} else {
quotient >>= 1;
residual = (aSignificand << 23) - quotient * bSignificand;
}

const int writtenExponent = quotientExponent + exponentBias;

if (writtenExponent >= maxExponent) {
// If we have overflowed the exponent, return infinity.
return fromRep(infRep | quotientSign);
}
// anything else / infinity = +/- 0
if (bAbs == infRep)
return fromRep(quotientSign);

else if (writtenExponent < 1) {
if (writtenExponent == 0) {
// Check whether the rounded result is normal.
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit.
rep_t absResult = quotient & significandMask;
// Round.
absResult += round;
if (absResult & ~significandMask) {
// The rounded result is normal; return it.
return fromRep(absResult | quotientSign);
}
}
// Flush denormals to zero. In the future, it would be nice to add
// code to round them correctly.
if (!aAbs) {
// zero / zero = NaN
if (!bAbs)
return fromRep(qnanRep);
// zero / anything else = +/- zero
else
return fromRep(quotientSign);
}

else {
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit
rep_t absResult = quotient & significandMask;
// Insert the exponent
absResult |= (rep_t)writtenExponent << significandBits;
// Round
absResult += round;
// Insert the sign and return
// anything else / zero = +/- infinity
if (!bAbs)
return fromRep(infRep | quotientSign);

// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if (aAbs < implicitBit)
scale += normalize(&aSignificand);
if (bAbs < implicitBit)
scale -= normalize(&bSignificand);
}

// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
aSignificand |= implicitBit;
bSignificand |= implicitBit;
int quotientExponent = aExponent - bExponent + scale;

// Align the significand of b as a Q31 fixed-point number in the range
// [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
// is accurate to about 3.5 binary digits.
uint32_t q31b = bSignificand << 8;
uint32_t reciprocal = UINT32_C(0x7504f333) - q31b;

// Now refine the reciprocal estimate using a Newton-Raphson iteration:
//
// x1 = x0 * (2 - x0 * b)
//
// This doubles the number of correct binary digits in the approximation
// with each iteration, so after three iterations, we have about 28 binary
// digits of accuracy.
uint32_t correction;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;
correction = -((uint64_t)reciprocal * q31b >> 32);
reciprocal = (uint64_t)reciprocal * correction >> 31;

// Exhaustive testing shows that the error in reciprocal after three steps
// is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our
// expectations. We bump the reciprocal by a tiny value to force the error
// to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to
// be specific). This also causes 1/1 to give a sensible approximation
// instead of zero (due to overflow).
reciprocal -= 2;

// The numerical reciprocal is accurate to within 2^-28, lies in the
// interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller
// than the true reciprocal of b. Multiplying a by this reciprocal thus
// gives a numerical q = a/b in Q24 with the following properties:
//
// 1. q < a/b
// 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)
// 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes
// from the fact that we truncate the product, and the 2^27 term
// is the error in the reciprocal of b scaled by the maximum
// possible value of a. As a consequence of this error bound,
// either q or nextafter(q) is the correctly rounded
rep_t quotient = (uint64_t)reciprocal * (aSignificand << 1) >> 32;

// Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
// In either case, we are going to compute a residual of the form
//
// r = a - q*b
//
// We know from the construction of q that r satisfies:
//
// 0 <= r < ulp(q)*b
//
// if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
// already have the correct result. The exact halfway case cannot occur.
// We also take this time to right shift quotient if it falls in the [1,2)
// range and adjust the exponent accordingly.
rep_t residual;
if (quotient < (implicitBit << 1)) {
residual = (aSignificand << 24) - quotient * bSignificand;
quotientExponent--;
} else {
quotient >>= 1;
residual = (aSignificand << 23) - quotient * bSignificand;
}

const int writtenExponent = quotientExponent + exponentBias;

if (writtenExponent >= maxExponent) {
// If we have overflowed the exponent, return infinity.
return fromRep(infRep | quotientSign);
}

else if (writtenExponent < 1) {
if (writtenExponent == 0) {
// Check whether the rounded result is normal.
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit.
rep_t absResult = quotient & significandMask;
// Round.
absResult += round;
if (absResult & ~significandMask) {
// The rounded result is normal; return it.
return fromRep(absResult | quotientSign);
}
}
// Flush denormals to zero. In the future, it would be nice to add
// code to round them correctly.
return fromRep(quotientSign);
}

else {
const bool round = (residual << 1) > bSignificand;
// Clear the implicit bit
rep_t absResult = quotient & significandMask;
// Insert the exponent
absResult |= (rep_t)writtenExponent << significandBits;
// Round
absResult += round;
// Insert the sign and return
return fromRep(absResult | quotientSign);
}
}

#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) {
return __divsf3(a, b);
}
AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { return __divsf3(a, b); }
#else
AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divsf3);
#endif
Expand Down
30 changes: 14 additions & 16 deletions compiler-rt/lib/builtins/divsi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,20 @@

/* Returns: a / b */

COMPILER_RT_ABI si_int
__divsi3(si_int a, si_int b)
{
const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1;
si_int s_a = a >> bits_in_word_m1; /* s_a = a < 0 ? -1 : 0 */
si_int s_b = b >> bits_in_word_m1; /* s_b = b < 0 ? -1 : 0 */
a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
s_a ^= s_b; /* sign of quotient */
/*
* On CPUs without unsigned hardware division support,
* this calls __udivsi3 (notice the cast to su_int).
* On CPUs with unsigned hardware division support,
* this uses the unsigned division instruction.
*/
return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */
COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b) {
const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1;
si_int s_a = a >> bits_in_word_m1; /* s_a = a < 0 ? -1 : 0 */
si_int s_b = b >> bits_in_word_m1; /* s_b = b < 0 ? -1 : 0 */
a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
s_a ^= s_b; /* sign of quotient */
/*
* On CPUs without unsigned hardware division support,
* this calls __udivsi3 (notice the cast to su_int).
* On CPUs with unsigned hardware division support,
* this uses the unsigned division instruction.
*/
return ((su_int)a / (su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */
}

#if defined(__ARM_EABI__)
Expand Down
73 changes: 33 additions & 40 deletions compiler-rt/lib/builtins/divtc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,45 +18,38 @@

/* Returns: the quotient of (a + ib) / (c + id) */

COMPILER_RT_ABI Lcomplex
__divtc3(long double __a, long double __b, long double __c, long double __d)
{
int __ilogbw = 0;
long double __logbw =
__compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
if (crt_isfinite(__logbw))
{
__ilogbw = (int)__logbw;
__c = crt_scalbnl(__c, -__ilogbw);
__d = crt_scalbnl(__d, -__ilogbw);
COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b,
long double __c, long double __d) {
int __ilogbw = 0;
long double __logbw =
__compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
if (crt_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = crt_scalbnl(__c, -__ilogbw);
__d = crt_scalbnl(__d, -__ilogbw);
}
long double __denom = __c * __c + __d * __d;
Lcomplex z;
COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) =
crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
} else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) &&
crt_isfinite(__d)) {
__a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
__b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
} else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) &&
crt_isfinite(__b)) {
__c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
__d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
}
long double __denom = __c * __c + __d * __d;
Lcomplex z;
COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
{
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
{
COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
}
else if ((crt_isinf(__a) || crt_isinf(__b)) &&
crt_isfinite(__c) && crt_isfinite(__d))
{
__a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
__b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
}
else if (crt_isinf(__logbw) && __logbw > 0.0 &&
crt_isfinite(__a) && crt_isfinite(__b))
{
__c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
__d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
}
}
return z;
}
return z;
}
Loading