diff --git a/CMakeLists.txt b/CMakeLists.txt index 99b3be19..637d105a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,11 +88,6 @@ set(COSTOVERRIDE_SVENOFMA 10) cmake_minimum_required(VERSION 3.4.3) -# Set to NEW when updating cmake_minimum_required to VERSION >= 3.7.2 -if(${CMAKE_VERSION} VERSION_GREATER "3.7.1") - cmake_policy(SET CMP0066 OLD) -endif() - if(${CMAKE_VERSION} VERSION_GREATER "3.14.99") cmake_policy(SET CMP0091 NEW) endif() diff --git a/Jenkinsfile b/Jenkinsfile index 3f2ec6b0..259f243f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -53,7 +53,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=FALSE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h index 346bf2ed..0a207c6e 100644 --- a/src/arch/helperpurec_scalar.h +++ b/src/arch/helperpurec_scalar.h @@ -123,7 +123,7 @@ typedef struct { #if defined(ENABLEFLOAT128) && CONFIG != 3 typedef __float128 vargquad; -#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) && CONFIG != 3 +#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__aarch64__) && CONFIG != 3 typedef long double vargquad; #else typedef vquad vargquad; @@ -444,19 +444,19 @@ static INLINE vargquad cast_aq_vq(vquad vq) { #else static vquad loadu_vq_p(void *p) { vquad vq; - memcpy(&vq, p, 16); + memcpy(&vq, p, sizeof(vq)); return vq; } static INLINE vquad cast_vq_aq(vargquad aq) { vquad vq; - memcpy(&vq, &aq, 16); + memcpy(&vq, &aq, sizeof(vq)); return vq; } static INLINE vargquad cast_aq_vq(vquad vq) { vargquad aq; - memcpy(&aq, &vq, 16); + memcpy(&aq, &vq, sizeof(aq)); return aq; } #endif diff --git a/src/common/misc.h b/src/common/misc.h index 2f4c3dda..59d53874 100644 --- a/src/common/misc.h +++ b/src/common/misc.h @@ -150,10 +150,6 @@ #define stringify(s) stringify_(s) #define stringify_(s) #s -#if !defined(SLEEF_GENHEADER) -typedef long double longdouble; -#endif - #if !defined(Sleef_double2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_double2_DEFINED typedef struct { @@ -168,19 +164,12 @@ typedef struct { } Sleef_float2; #endif -#if !defined(Sleef_longdouble2_DEFINED) && !defined(SLEEF_GENHEADER) -#define Sleef_longdouble2_DEFINED -typedef struct { - long double x, y; -} Sleef_longdouble2; -#endif - #if !defined(Sleef_quad_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad_DEFINED #if defined(ENABLEFLOAT128) typedef __float128 Sleef_quad; #define SLEEF_QUAD_C(x) (x ## Q) -#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) +#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__aarch64__) typedef long double Sleef_quad; #define SLEEF_QUAD_C(x) (x ## L) #else @@ -188,50 +177,6 @@ typedef struct { uint64_t x, y; } Sleef_quad; #endif #endif -#if !defined(Sleef_quad1_DEFINED) && !defined(SLEEF_GENHEADER) -#define Sleef_quad1_DEFINED -typedef union { - struct { - Sleef_quad x; - }; - Sleef_quad s[1]; -} Sleef_quad1; -#endif - -#if !defined(Sleef_quad2_DEFINED) && !defined(SLEEF_GENHEADER) -#define Sleef_quad2_DEFINED -typedef union { - struct { - Sleef_quad x, y; - }; - Sleef_quad s[2]; -} Sleef_quad2; -#endif - -#if !defined(Sleef_quad4_DEFINED) && !defined(SLEEF_GENHEADER) -#define Sleef_quad4_DEFINED -typedef union { - struct { - Sleef_quad x, y, z, w; - }; - Sleef_quad s[4]; -} Sleef_quad4; -#endif - -#if !defined(Sleef_quad8_DEFINED) && !defined(SLEEF_GENHEADER) -#define Sleef_quad8_DEFINED -typedef union { - Sleef_quad s[8]; -} Sleef_quad8; -#endif - -#if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) && !defined(SLEEF_GENHEADER) -#define Sleef_quadx_DEFINED -typedef union { - Sleef_quad s[32]; -} Sleef_quadx; -#endif - // #if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER) diff --git a/src/dft/CMakeLists.txt b/src/dft/CMakeLists.txt index c558c236..77a2eed6 100644 --- a/src/dft/CMakeLists.txt +++ b/src/dft/CMakeLists.txt @@ -12,16 +12,14 @@ if (SLEEFDFT_MAXBUTWIDTH GREATER 7) endif() option(SLEEFDFT_ENABLE_STREAM "Streaming instructions are utilized in DFT." OFF) -option(SLEEFDFT_ENABLE_LONGDOUBLE "Long double routines will be compiled in." OFF) -option(SLEEFDFT_ENABLE_QUAD "Quad precision routines will be compiled in." OFF) # Settings # Constants definition -set(LISTSHORTTYPENAME "dp" "sp" "ld" "qp") -set(LISTLONGTYPENAME "double" "float" "longdouble" "Sleef_quad") -set(LISTTYPEID "1" "2" "3" "4") +set(LISTSHORTTYPENAME "dp" "sp") +set(LISTLONGTYPENAME "double" "float") +set(LISTTYPEID "1" "2") set(MACRODEF_vecextdp BASETYPEID=1 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextdp ${FLAGS_ENABLE_VECEXT}) @@ -105,22 +103,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") set(ISALIST_DP vecextdp) endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") -if (COMPILER_SUPPORTS_LONG_DOUBLE AND SLEEFDFT_ENABLE_LONGDOUBLE) - set(LIST_SUPPORTED_FPTYPE ${LIST_SUPPORTED_FPTYPE} 2) - set(ISALIST_QP purecld) - if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") - set(ISALIST_LD vecextld) - endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") -endif(COMPILER_SUPPORTS_LONG_DOUBLE AND SLEEFDFT_ENABLE_LONGDOUBLE) - -if (COMPILER_SUPPORTS_FLOAT128 AND SLEEFDFT_ENABLE_QUAD) - set(LIST_SUPPORTED_FPTYPE ${LIST_SUPPORTED_FPTYPE} 3) - set(ISALIST_QP purecqp) - if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") - set(ISALIST_QP vecextqp) - endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") -endif(COMPILER_SUPPORTS_FLOAT128 AND SLEEFDFT_ENABLE_QUAD) - # List all available vector data types if (COMPILER_SUPPORTS_SSE4) @@ -204,14 +186,6 @@ else() set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_STREAM=0) endif() -if (COMPILER_SUPPORTS_FLOAT128 AND NOT (SLEEF_ARCH_32BIT AND SLEEF_ARCH_X86)) - set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLEFLOAT128) -endif() - -if (COMPILER_SUPPORTS_LONG_DOUBLE AND NOT (SLEEF_ARCH_32BIT AND SLEEF_ARCH_X86)) - set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_LONGDOUBLE) -endif() - if(COMPILER_SUPPORTS_OPENMP) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") endif(COMPILER_SUPPORTS_OPENMP) diff --git a/src/dft/dft.c b/src/dft/dft.c index a58cb64a..f3b704eb 100644 --- a/src/dft/dft.c +++ b/src/dft/dft.c @@ -68,50 +68,6 @@ typedef Sleef_float2 sc_t; #define BUTB butb_float #define SINCOSPI Sleef_sincospif_u05 #include "dispatchsp.h" -#elif BASETYPEID == 3 -typedef long double real; -typedef Sleef_longdouble2 sc_t; -#define BASETYPESTRING "long double" -#define MAGIC 0x14142135 -#define MAGIC2D 0x26457513 -#define INIT SleefDFT_longdouble_init1d -#define EXECUTE SleefDFT_longdouble_execute -#define INIT2D SleefDFT_longdouble_init2d -#define CTBL ctbl_longdouble -#define REALSUB0 realSub0_longdouble -#define REALSUB1 realSub1_longdouble -#define GETINT getInt_longdouble -#define GETPTR getPtr_longdouble -#define DFTF dftf_longdouble -#define DFTB dftb_longdouble -#define TBUTF tbutf_longdouble -#define TBUTB tbutb_longdouble -#define BUTF butf_longdouble -#define BUTB butb_longdouble -#define SINCOSPI Sleef_sincospil_u05 -#include "dispatchld.h" -#elif BASETYPEID == 4 -typedef Sleef_quad real; -typedef Sleef_quad2 sc_t; -#define BASETYPESTRING "Sleef_quad" -#define MAGIC 0x33166247 -#define MAGIC2D 0x36055512 -#define INIT SleefDFT_quad_init1d -#define EXECUTE SleefDFT_quad_execute -#define INIT2D SleefDFT_quad_init2d -#define CTBL ctbl_Sleef_quad -#define REALSUB0 realSub0_Sleef_quad -#define REALSUB1 realSub1_Sleef_quad -#define GETINT getInt_Sleef_quad -#define GETPTR getPtr_Sleef_quad -#define DFTF dftf_Sleef_quad -#define DFTB dftb_Sleef_quad -#define TBUTF tbutf_Sleef_quad -#define TBUTB tbutb_Sleef_quad -#define BUTF butf_Sleef_quad -#define BUTB butb_Sleef_quad -#define SINCOSPI Sleef_sincospiq_u05 -#include "dispatchqp.h" #else #error No BASETYPEID specified #endif @@ -121,47 +77,6 @@ typedef Sleef_quad2 sc_t; // -#if BASETYPEID == 4 -real CTBL[] = { - 0.7071067811865475243818940365159164684883Q, -0.7071067811865475243818940365159164684883Q, - 0.9238795325112867561014214079495587839119Q, -0.382683432365089771723257530688933059082Q, - 0.382683432365089771723257530688933059082Q, -0.9238795325112867561014214079495587839119Q, -#if MAXBUTWIDTH >= 5 - 0.9807852804032304491190993878113602022495Q, -0.1950903220161282678433729148581576851029Q, - 0.5555702330196022247573058028269343822103Q, -0.8314696123025452370808655033762590846891Q, - 0.8314696123025452370808655033762590846891Q, -0.5555702330196022247573058028269343822103Q, - 0.1950903220161282678433729148581576851029Q, -0.9807852804032304491190993878113602022495Q, -#endif -#if MAXBUTWIDTH >= 6 - 0.9951847266721968862310254699821143731242Q, -0.09801714032956060199569840382660679267701Q, - 0.6343932841636454982026105398063009488396Q, -0.7730104533627369607965383602188325085081Q, - 0.881921264348355029715105513066220055407Q, -0.4713967368259976485449225247492677226546Q, - 0.2902846772544623676448431737195932100803Q, -0.9569403357322088649310892760624369657307Q, - 0.9569403357322088649310892760624369657307Q, -0.2902846772544623676448431737195932100803Q, - 0.4713967368259976485449225247492677226546Q, -0.881921264348355029715105513066220055407Q, - 0.7730104533627369607965383602188325085081Q, -0.6343932841636454982026105398063009488396Q, - 0.09801714032956060199569840382660679267701Q, -0.9951847266721968862310254699821143731242Q, -#endif -#if MAXBUTWIDTH >= 7 - 0.9987954562051723927007702841240899260811Q, -0.04906767432741801425355085940205324135377Q, - 0.6715589548470184006194634573905233310143Q, -0.7409511253549590911932944126139233276263Q, - 0.9039892931234433315823215138173907234886Q, -0.427555093430282094315230886905077056781Q, - 0.336889853392220050702686798271834334173Q, -0.9415440651830207783906830087961026265475Q, - 0.9700312531945439926159106824865574481009Q, -0.2429801799032638899447731489766866275204Q, - 0.5141027441932217266072797923204262815489Q, -0.8577286100002720698929313536407192941624Q, - 0.8032075314806449097991200569701675249235Q, -0.5956993044924333434615715265891822127742Q, - 0.1467304744553617516588479505190711904561Q, -0.9891765099647809734561415551112872890371Q, - 0.9891765099647809734561415551112872890371Q, -0.1467304744553617516588479505190711904561Q, - 0.5956993044924333434615715265891822127742Q, -0.8032075314806449097991200569701675249235Q, - 0.8577286100002720698929313536407192941624Q, -0.5141027441932217266072797923204262815489Q, - 0.2429801799032638899447731489766866275204Q, -0.9700312531945439926159106824865574481009Q, - 0.9415440651830207783906830087961026265475Q, -0.336889853392220050702686798271834334173Q, - 0.427555093430282094315230886905077056781Q, -0.9039892931234433315823215138173907234886Q, - 0.7409511253549590911932944126139233276263Q, -0.6715589548470184006194634573905233310143Q, - 0.04906767432741801425355085940205324135377Q, -0.9987954562051723927007702841240899260811Q, -#endif -}; -#else real CTBL[] = { 0.7071067811865475243818940365159164684883L, -0.7071067811865475243818940365159164684883L, 0.9238795325112867561014214079495587839119L, -0.382683432365089771723257530688933059082L, @@ -201,7 +116,6 @@ real CTBL[] = { 0.04906767432741801425355085940205324135377L, -0.9987954562051723927007702841240899260811L, #endif }; -#endif #ifndef ENABLE_STREAM #error ENABLE_STREAM not defined diff --git a/src/dft/dftcommon.c b/src/dft/dftcommon.c index 33eedb38..bea37950 100644 --- a/src/dft/dftcommon.c +++ b/src/dft/dftcommon.c @@ -29,13 +29,9 @@ #define MAGIC_FLOAT 0x31415926 #define MAGIC_DOUBLE 0x27182818 -#define MAGIC_LONGDOUBLE 0x14142135 -#define MAGIC_QUAD 0x33166247 #define MAGIC2D_FLOAT 0x22360679 #define MAGIC2D_DOUBLE 0x17320508 -#define MAGIC2D_LONGDOUBLE 0x26457513 -#define MAGIC2D_QUAD 0x36055512 const char *configStr[] = { "ST", "ST stream", "MT", "MT stream" }; @@ -76,7 +72,7 @@ static int parsePathStr(char *p, int *path, int *config, int pathLenMax, int log } EXPORT void SleefDFT_setPath(SleefDFT *p, char *pathStr) { - assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); + assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE)); int path[32], config[32]; int pathLen = parsePathStr(pathStr, path, config, 31, p->log2len); @@ -116,7 +112,7 @@ void freeTables(SleefDFT *p) { } EXPORT void SleefDFT_dispose(SleefDFT *p) { - if (p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)) { + if (p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE)) { Sleef_free(p->tBuf); SleefDFT_dispose(p->instH); if (p->hlen != p->vlen) SleefDFT_dispose(p->instV); @@ -126,7 +122,7 @@ EXPORT void SleefDFT_dispose(SleefDFT *p) { return; } - assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); + assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE)); if (p->log2len <= 1) { p->magic = 0; @@ -322,7 +318,7 @@ static void planMap_putU64(uint64_t key, uint64_t value) { } int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat) { - assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); + assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE)); initPlanMapLock(); @@ -357,7 +353,7 @@ int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat) { } void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat) { - assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); + assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE)); initPlanMapLock(); @@ -388,7 +384,7 @@ void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat) { } int PlanManager_loadMeasurementResultsT(SleefDFT *p) { - assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)); + assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE)); initPlanMapLock(); @@ -409,7 +405,7 @@ int PlanManager_loadMeasurementResultsT(SleefDFT *p) { } void PlanManager_saveMeasurementResultsT(SleefDFT *p) { - assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)); + assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE)); initPlanMapLock(); diff --git a/src/dft/vectortype.h b/src/dft/vectortype.h index c7fdb74d..98b9b8c5 100644 --- a/src/dft/vectortype.h +++ b/src/dft/vectortype.h @@ -133,76 +133,6 @@ static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vf(&ptr[ static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vf(ptr, offset, step, v); } -static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } -#elif BASETYPEID == 3 -#define LOG2VECWIDTH (LOG2VECTLENDP-1) -#define VECWIDTH (1 << LOG2VECWIDTH) - -typedef long double real; -typedef vlongdouble real2; - -static int available(int name) { return vavailability_i(name); } - -static INLINE real2 uminus(real2 d0) { return vneg_vl_vl(d0); } -static INLINE real2 uplusminus(real2 d0) { return vposneg_vl_vl(d0); } -static INLINE real2 uminusplus(real2 d0) { return vnegpos_vl_vl(d0); } - -static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vl_vl_vl(d0, d1); } -static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vl_vl_vl(d0, d1); } -static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vl_vl_vl(d0, d1); } -static INLINE real2 times(real2 d0, real2 d1) { return vmul_vl_vl_vl(d0, d1); } -static INLINE real2 ctimes(real2 d0, real d) { return vmul_vl_vl_vl(d0, vcast_vl_l(d)); } -static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vl_vl_vl_vl(d0, d2, d1); } -static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vl_vl_vl_vl(d0, vcast_vl_l(c), d1); } - -static INLINE real2 reverse(real2 d0) { return vrev21_vl_vl(d0); } -static INLINE real2 reverse2(real2 d0) { return vreva2_vl_vl(d0); } - -static INLINE real2 loadc(real c) { return vcast_vl_l(c); } - -static INLINE real2 load(const real *ptr, int offset) { return vload_vl_p(&ptr[2*offset]); } -static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vl_p(&ptr[2*offset]); } -static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vl(&ptr[2*offset], v); } -static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vl(&ptr[2*offset], v); } -static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vl(&ptr[2*offset], v); } -static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vl(ptr, offset, step, v); } -static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vl(ptr, offset, step, v); } - -static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } -#elif BASETYPEID == 4 -#define LOG2VECWIDTH (LOG2VECTLENDP-1) -#define VECWIDTH (1 << LOG2VECWIDTH) - -typedef Sleef_quad real; -typedef vquad real2; - -static int available(int name) { return vavailability_i(name); } - -static INLINE real2 uminus(real2 d0) { return vneg_vq_vq(d0); } -static INLINE real2 uplusminus(real2 d0) { return vposneg_vq_vq(d0); } -static INLINE real2 uminusplus(real2 d0) { return vnegpos_vq_vq(d0); } - -static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vq_vq_vq(d0, d1); } -static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vq_vq_vq(d0, d1); } -static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vq_vq_vq(d0, d1); } -static INLINE real2 times(real2 d0, real2 d1) { return vmul_vq_vq_vq(d0, d1); } -static INLINE real2 ctimes(real2 d0, real d) { return vmul_vq_vq_vq(d0, vcast_vq_q(d)); } -static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vq_vq_vq_vq(d0, d2, d1); } -static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vq_vq_vq_vq(d0, vcast_vq_q(c), d1); } - -static INLINE real2 reverse(real2 d0) { return vrev21_vq_vq(d0); } -static INLINE real2 reverse2(real2 d0) { return vreva2_vq_vq(d0); } - -static INLINE real2 loadc(real c) { return vcast_vq_q(c); } - -static INLINE real2 load(const real *ptr, int offset) { return vload_vq_p(&ptr[2*offset]); } -static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vq_p(&ptr[2*offset]); } -static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vq(&ptr[2*offset], v); } -static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vq(&ptr[2*offset], v); } -static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vq(&ptr[2*offset], v); } -static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vq(ptr, offset, step, v); } -static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vq(ptr, offset, step, v); } - static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #else #error No BASETYPEID specified diff --git a/src/libm-tester/CMakeLists.txt b/src/libm-tester/CMakeLists.txt index 49f72a86..ac4877d3 100644 --- a/src/libm-tester/CMakeLists.txt +++ b/src/libm-tester/CMakeLists.txt @@ -70,7 +70,7 @@ if (NOT LIBRT) set(LIBRT "") endif() -set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS}") +set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS} ${FLAGS_NOSTRICTALIASING}") set(COMMON_TARGET_PROPERTIES C_STANDARD 99 # -std=gnu99 diff --git a/src/libm-tester/iutcuda.cu b/src/libm-tester/iutcuda.cu index 0f37361c..500a6e28 100644 --- a/src/libm-tester/iutcuda.cu +++ b/src/libm-tester/iutcuda.cu @@ -373,7 +373,7 @@ int main(int argc, char **argv) { fflush(stdout); char buf[BUFSIZE]; - fgets(buf, BUFSIZE-1, stdin); + if (fgets(buf, BUFSIZE-1, stdin)) {} while(!feof(stdin)) { func_d_d("sin", xsin); diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index 07165fe6..48238c9b 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -224,8 +224,6 @@ typedef Sleef___m128_2 vfloat2; #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helpersve.h" -typedef Sleef_svfloat64_t_2 vdouble2; -typedef Sleef_svfloat32_t_2 vfloat2; #endif #endif @@ -234,8 +232,6 @@ typedef Sleef_svfloat32_t_2 vfloat2; #if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helpersve.h" -typedef Sleef_svfloat64_t_2 vdouble2; -typedef Sleef_svfloat32_t_2 vfloat2; #endif #endif diff --git a/src/libm-tester/iutsimdmain.c b/src/libm-tester/iutsimdmain.c index aba65f8b..89adc57c 100644 --- a/src/libm-tester/iutsimdmain.c +++ b/src/libm-tester/iutsimdmain.c @@ -9,20 +9,20 @@ #include #include -static jmp_buf sigjmp; - -int do_test(int argc, char **argv); -int check_featureDP(double d); -int check_featureSP(float d); - #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) +static jmp_buf sigjmp; #define SETJMP(x) setjmp(x) #define LONGJMP longjmp #else +static sigjmp_buf sigjmp; #define SETJMP(x) sigsetjmp(x, 1) #define LONGJMP siglongjmp #endif +int do_test(int argc, char **argv); +int check_featureDP(double d); +int check_featureSP(float d); + static void sighandler(int signum) { LONGJMP(sigjmp, 1); } diff --git a/src/libm-tester/tester2ld.c b/src/libm-tester/tester2ld.c deleted file mode 100644 index 32e50238..00000000 --- a/src/libm-tester/tester2ld.c +++ /dev/null @@ -1,629 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "misc.h" - -#ifdef ENABLE_SYS_getrandom -#define _GNU_SOURCE -#include -#include -#include -#endif - -#include "sleef.h" - -#define DORENAME -#include "rename.h" - -#define DENORMAL_LDBL_MIN (3.6451995318824746025284059336194e-4951L) -#define XLDBL_MIN (3.3621031431120935062626778173218e-4932L) - -#ifndef M_PIl -#define M_PIl 3.141592653589793238462643383279502884L -#endif - -#ifndef M_PI_4l -#define M_PI_4l .785398163397448309615660845819875721049292L -#endif - -#define POSITIVE_INFINITY INFINITY -#define NEGATIVE_INFINITY (-INFINITY) - -int isnumberl(long double x) { return x != INFINITYl && x != -INFINITYl && x == x; } -int isPlusZerol(long double x) { return x == 0 && copysignl(1, x) == 1; } -int isMinusZerol(long double x) { return x == 0 && copysignl(1, x) == -1; } - -mpfr_t fra, frb, frc, frd; - -double countULP(long double d, mpfr_t c) { - long double c2 = mpfr_get_ld(c, GMP_RNDN); - if (c2 == 0 && d != 0) return 10000; - //if (isPlusZerol(c2) && !isPlusZerol(d)) return 10003; - //if (isMinusZerol(c2) && !isMinusZerol(d)) return 10004; - if (isnanl(c2) && isnanl(d)) return 0; - if (isnanl(c2) || isnanl(d)) return 10001; - if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; - if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; - if (!isnumberl(c2) && !isnumberl(d)) return 0; - - int e; - frexpl(mpfr_get_ld(c, GMP_RNDN), &e); - mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-64), DENORMAL_LDBL_MIN), GMP_RNDN); - - mpfr_set_ld(frd, d, GMP_RNDN); - mpfr_sub(fra, frd, c, GMP_RNDN); - mpfr_div(fra, fra, frb, GMP_RNDN); - double u = fabs(mpfr_get_d(fra, GMP_RNDN)); - - return u; -} - -double countULP2(long double d, mpfr_t c) { - long double c2 = mpfr_get_ld(c, GMP_RNDN); - if (c2 == 0 && d != 0) return 10000; - //if (isPlusZerol(c2) && !isPlusZerol(d)) return 10003; - //if (isMinusZerol(c2) && !isMinusZerol(d)) return 10004; - if (isnanl(c2) && isnanl(d)) return 0; - if (isnanl(c2) || isnanl(d)) return 10001; - if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; - if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; - if (!isnumberl(c2) && !isnumberl(d)) return 0; - - int e; - frexpl(mpfr_get_ld(c, GMP_RNDN), &e); - mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-64), LDBL_MIN), GMP_RNDN); - - mpfr_set_ld(frd, d, GMP_RNDN); - mpfr_sub(fra, frd, c, GMP_RNDN); - mpfr_div(fra, fra, frb, GMP_RNDN); - double u = fabs(mpfr_get_d(fra, GMP_RNDN)); - - return u; -} - -typedef union { - long double d; - __int128 u128; -} conv_t; - -long double rnd() { - conv_t c; - switch(random() & 15) { - case 0: return INFINITY; - case 1: return -INFINITY; - } -#ifdef ENABLE_SYS_getrandom - syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); -#else - c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); -#endif - return c.d; -} - -long double rnd_fr() { - conv_t c; - do { -#ifdef ENABLE_SYS_getrandom - syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); -#else - c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); -#endif - } while(!isnumberl(c.d)); - return c.d; -} - -long double rnd_zo() { - conv_t c; - do { -#ifdef ENABLE_SYS_getrandom - syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); -#else - c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); -#endif - } while(!isnumberl(c.d) || c.d < -1 || 1 < c.d); - return c.d; -} - -void sinpifr(mpfr_t ret, long double d) { - mpfr_t frpi, frd; - mpfr_inits(frpi, frd, NULL); - - mpfr_const_pi(frpi, GMP_RNDN); - mpfr_set_d(frd, 1.0, GMP_RNDN); - mpfr_mul(frpi, frpi, frd, GMP_RNDN); - mpfr_set_ld(frd, d, GMP_RNDN); - mpfr_mul(frd, frpi, frd, GMP_RNDN); - mpfr_sin(ret, frd, GMP_RNDN); - - mpfr_clears(frpi, frd, NULL); -} - -void cospifr(mpfr_t ret, long double d) { - mpfr_t frpi, frd; - mpfr_inits(frpi, frd, NULL); - - mpfr_const_pi(frpi, GMP_RNDN); - mpfr_set_d(frd, 1.0, GMP_RNDN); - mpfr_mul(frpi, frpi, frd, GMP_RNDN); - mpfr_set_ld(frd, d, GMP_RNDN); - mpfr_mul(frd, frpi, frd, GMP_RNDN); - mpfr_cos(ret, frd, GMP_RNDN); - - mpfr_clears(frpi, frd, NULL); -} - -int main(int argc,char **argv) -{ - mpfr_t frw, frx, fry, frz; - - mpfr_set_default_prec(256); - mpfr_inits(fra, frb, frc, frd, frw, frx, fry, frz, NULL); - - conv_t cd; - long double d, t, d2, zo; - - int cnt, ecnt = 0; - - srandom(time(NULL)); - -#if 0 - cd.d = M_PIl; - mpfr_set_ld(frx, cd.d, GMP_RNDN); - cd.u128 += 3; - printf("%g\n", countULP2(cd.d, frx)); -#endif - - const long double rangemax = 1e+9; - - for(cnt = 0;ecnt < 1000;cnt++) { - switch(cnt & 7) { - case 0: - d = rnd(); - d2 = rnd(); - zo = rnd(); - break; - case 1: - cd.d = rint((2 * (double)random() / RAND_MAX - 1) * 1e+10) * M_PI_4; - cd.u128 += (random() & 0xff) - 0x7f; - d = cd.d; - d2 = rnd(); - zo = rnd(); - break; - default: - d = rnd_fr(); - d2 = rnd_fr(); - zo = rnd_zo(); - break; - } - - Sleef_longdouble2 sc = xsincospil_u05(d); - Sleef_longdouble2 sc2 = xsincospil_u35(d); - - { - const double rangemax2 = 1e+9; - - sinpifr(frx, d); - - double u0 = countULP2(t = sc.x, frx); - - if (u0 != 0 && ((fabsl(d) <= rangemax2 && u0 > 0.505) || fabsl(t) > 1 || !isnumberl(t))) { - printf("Pure C sincospil_u05 sin arg=%.30Lg ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP2(t = sc2.x, frx); - - if (u1 != 0 && ((fabsl(d) <= rangemax2 && u1 > 1.5) || fabsl(t) > 1 || !isnumberl(t))) { - printf("Pure C sincospil_u35 sin arg=%.30Lg ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - const double rangemax2 = 1e+9; - - cospifr(frx, d); - - double u0 = countULP2(t = sc.y, frx); - - if (u0 != 0 && ((fabsl(d) <= rangemax2 && u0 > 0.505) || fabsl(t) > 1 || !isnumberl(t))) { - printf("Pure C sincospil_u05 cos arg=%.30Lg ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP2(t = sc.y, frx); - - if (u1 != 0 && ((fabsl(d) <= rangemax2 && u1 > 1.5) || fabsl(t) > 1 || !isnumberl(t))) { - printf("Pure C sincospil_u35 cos arg=%.30Lg ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - -#if 0 - double2 sc = xsincos(d); - double2 sc2 = xsincos_u1(d); - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_sin(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xsin(d), frx); - - if ((fabsl(d) <= rangemax && u0 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { - printf("Pure C sin arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(sc.x, frx); - - if ((fabsl(d) <= rangemax && u1 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { - printf("Pure C sincos sin arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - - double u2 = countULP(t = xsin_u1(d), frx); - - if ((fabsl(d) <= rangemax && u2 > 1) || fabsl(t) > 1 || !isnumberl(t)) { - printf("Pure C sin_u1 arg=%.20g ulp=%.20g\n", d, u2); - fflush(stdout); ecnt++; - } - - double u3 = countULP(t = sc2.x, frx); - - if ((fabsl(d) <= rangemax && u3 > 1) || fabsl(t) > 1 || !isnumberl(t)) { - printf("Pure C sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_cos(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xcos(d), frx); - - if ((fabsl(d) <= rangemax && u0 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { - printf("Pure C cos arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = sc.y, frx); - - if ((fabsl(d) <= rangemax && u1 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { - printf("Pure C sincos cos arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - - double u2 = countULP(t = xcos_u1(d), frx); - - if ((fabsl(d) <= rangemax && u2 > 1) || fabsl(t) > 1 || !isnumberl(t)) { - printf("Pure C cos_u1 arg=%.20g ulp=%.20g\n", d, u2); - fflush(stdout); ecnt++; - } - - double u3 = countULP(t = sc2.y, frx); - - if ((fabsl(d) <= rangemax && u3 > 1) || fabsl(t) > 1 || !isnumberl(t)) { - printf("Pure C sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_tan(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xtan(d), frx); - - if ((fabsl(d) < 1e+7 && u0 > 3.5) || (fabsl(d) <= rangemax && u0 > 5) || isnan(t)) { - printf("Pure C tan arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xtan_u1(d), frx); - - if ((fabsl(d) <= rangemax && u1 > 1) || isnan(t)) { - printf("Pure C tan_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - d = rnd_fr(); - double d2 = rnd_fr(), zo = rnd_zo(); - - { - mpfr_set_d(frx, fabsl(d), GMP_RNDN); - mpfr_log(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xlog(fabsl(d)), frx); - - if (u0 > 3.5) { - printf("Pure C log arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xlog_u1(fabsl(d)), frx); - - if (u1 > 1) { - printf("Pure C log_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, fabsl(d), GMP_RNDN); - mpfr_log10(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xlog10(fabsl(d)), frx); - - if (u0 > 1) { - printf("Pure C log10 arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_log1p(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xlog1p(d), frx); - - if ((-1 <= d && d <= 1e+307 && u0 > 1) || - (d < -1 && !isnan(t)) || - (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { - printf("Pure C log1p arg=%.20g ulp=%.20g\n", d, u0); - printf("%g\n", t); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_exp(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xexp(d), frx); - - if (u0 > 1) { - printf("Pure C exp arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_exp2(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xexp2(d), frx); - - if (u0 > 1) { - printf("Pure C exp2 arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_exp10(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xexp10(d), frx); - - if (u0 > 1) { - printf("Pure C exp10 arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_expm1(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xexpm1(d), frx); - - if (u0 > 1) { - printf("Pure C expm1 arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_set_d(fry, d2, GMP_RNDN); - mpfr_pow(frx, fry, frx, GMP_RNDN); - - double u0 = countULP(t = xpow(d2, d), frx); - - if (u0 > 1) { - printf("Pure C pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_cbrt(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xcbrt(d), frx); - - if (u0 > 3.5) { - printf("Pure C cbrt arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xcbrt_u1(d), frx); - - if (u1 > 1) { - printf("Pure C cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, zo, GMP_RNDN); - mpfr_asin(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xasin(zo), frx); - - if (u0 > 3.5) { - printf("Pure C asin arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xasin_u1(zo), frx); - - if (u1 > 1) { - printf("Pure C asin_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, zo, GMP_RNDN); - mpfr_acos(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xacos(zo), frx); - - if (u0 > 3.5) { - printf("Pure C acos arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xacos_u1(zo), frx); - - if (u1 > 1) { - printf("Pure C acos_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_atan(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xatan(d), frx); - - if (u0 > 3.5) { - printf("Pure C atan arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xatan_u1(d), frx); - - if (u1 > 1) { - printf("Pure C atan_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_set_d(fry, d2, GMP_RNDN); - mpfr_atan2(frx, fry, frx, GMP_RNDN); - - double u0 = countULP(t = xatan2(d2, d), frx); - - if (u0 > 3.5) { - printf("Pure C atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP2(t = xatan2_u1(d2, d), frx); - - if (u1 > 1) { - printf("Pure C atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_sinh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xsinh(d), frx); - - if ((fabsl(d) <= 709 && u0 > 1) || - (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || - (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { - printf("Pure C sinh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_cosh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xcosh(d), frx); - - if ((fabsl(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { - printf("Pure C cosh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_tanh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xtanh(d), frx); - - if (u0 > 1) { - printf("Pure C tanh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_asinh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xasinh(d), frx); - - if ((fabsl(d) < sqrt(DBL_MAX) && u0 > 1) || - (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || - (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { - printf("Pure C asinh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_acosh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xacosh(d), frx); - - if ((fabsl(d) < sqrt(DBL_MAX) && u0 > 1) || - (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || - (d <= -sqrt(DBL_MAX) && !isnan(t))) { - printf("Pure C acosh arg=%.20g ulp=%.20g\n", d, u0); - printf("%.20g\n", t); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_atanh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xatanh(d), frx); - - if (u0 > 1) { - printf("Pure C atanh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } -#endif - } -} diff --git a/src/libm-tester/tester2qp.c b/src/libm-tester/tester2qp.c deleted file mode 100644 index a0a071e1..00000000 --- a/src/libm-tester/tester2qp.c +++ /dev/null @@ -1,609 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define _GNU_SOURCE -#include -#include -#include - -#include "sleef.h" - -#include "f128util.h" - -#define DORENAME -#include "rename.h" - -#define POSITIVE_INFINITY INFINITY -#define NEGATIVE_INFINITY (-INFINITY) - -int isnumberq(Sleef_quad x) { return !isinfq(x) && !isnanq(x); } -int isPlusZeroq(Sleef_quad x) { return x == 0 && copysignq(1, x) == 1; } -int isMinusZeroq(Sleef_quad x) { return x == 0 && copysignq(1, x) == -1; } - -mpfr_t fra, frb, frc, frd; - -double countULP(Sleef_quad d, mpfr_t c) { - Sleef_quad c2 = mpfr_get_f128(c, GMP_RNDN); - if (c2 == 0 && d != 0) return 10000; - //if (isPlusZeroq(c2) && !isPlusZeroq(d)) return 10003; - //if (isMinusZeroq(c2) && !isMinusZeroq(d)) return 10004; - if (isnanq(c2) && isnanq(d)) return 0; - if (isnanq(c2) || isnanq(d)) return 10001; - if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; - if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; - if (!isnumberq(c2) && !isnumberq(d)) return 0; - - int e; - frexpq(mpfr_get_f128(c, GMP_RNDN), &e); - mpfr_set_f128(frb, fmaxq(ldexpq(1.0, e-113), FLT128_DENORM_MIN), GMP_RNDN); - - mpfr_set_f128(frd, d, GMP_RNDN); - mpfr_sub(fra, frd, c, GMP_RNDN); - mpfr_div(fra, fra, frb, GMP_RNDN); - double u = fabs(mpfr_get_d(fra, GMP_RNDN)); - - return u; -} - -double countULP2(Sleef_quad d, mpfr_t c) { - Sleef_quad c2 = mpfr_get_f128(c, GMP_RNDN); - if (c2 == 0 && d != 0) return 10000; - //if (isPlusZeroq(c2) && !isPlusZeroq(d)) return 10003; - //if (isMinusZeroq(c2) && !isMinusZeroq(d)) return 10004; - if (isnanq(c2) && isnanq(d)) return 0; - if (isnanq(c2) || isnanq(d)) return 10001; - if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; - if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; - if (!isnumberq(c2) && !isnumberq(d)) return 0; - - int e; - frexpq(mpfr_get_f128(c, GMP_RNDN), &e); - mpfr_set_f128(frb, fmaxq(ldexpq(1.0, e-113), FLT128_MIN), GMP_RNDN); - - mpfr_set_f128(frd, d, GMP_RNDN); - mpfr_sub(fra, frd, c, GMP_RNDN); - mpfr_div(fra, fra, frb, GMP_RNDN); - double u = fabs(mpfr_get_d(fra, GMP_RNDN)); - - return u; -} - -typedef union { - Sleef_quad d; - __int128 u128; - uint64_t u[2]; -} conv_t; - -Sleef_quad rnd() { - conv_t c; - switch(random() & 15) { - case 0: return INFINITY; - case 1: return -INFINITY; - } - syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); - return c.d; -} - -Sleef_quad rnd_fr() { - conv_t c; - do { - syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); - } while(!isnumberq(c.d)); - return c.d; -} - -Sleef_quad rnd_zo() { - conv_t c; - do { - syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); - } while(!isnumberq(c.d) || c.d < -1 || 1 < c.d); - return c.d; -} - -void sinpifr(mpfr_t ret, Sleef_quad d) { - mpfr_t frpi, frd; - mpfr_inits(frpi, frd, NULL); - - mpfr_const_pi(frpi, GMP_RNDN); - mpfr_set_d(frd, 1.0, GMP_RNDN); - mpfr_mul(frpi, frpi, frd, GMP_RNDN); - mpfr_set_f128(frd, d, GMP_RNDN); - mpfr_mul(frd, frpi, frd, GMP_RNDN); - mpfr_sin(ret, frd, GMP_RNDN); - - mpfr_clears(frpi, frd, NULL); -} - -void cospifr(mpfr_t ret, Sleef_quad d) { - mpfr_t frpi, frd; - mpfr_inits(frpi, frd, NULL); - - mpfr_const_pi(frpi, GMP_RNDN); - mpfr_set_d(frd, 1.0, GMP_RNDN); - mpfr_mul(frpi, frpi, frd, GMP_RNDN); - mpfr_set_f128(frd, d, GMP_RNDN); - mpfr_mul(frd, frpi, frd, GMP_RNDN); - mpfr_cos(ret, frd, GMP_RNDN); - - mpfr_clears(frpi, frd, NULL); -} - -int main(int argc,char **argv) -{ - mpfr_t frw, frx, fry, frz; - - mpfr_set_default_prec(2048); - mpfr_inits(fra, frb, frc, frd, frw, frx, fry, frz, NULL); - - conv_t cd; - Sleef_quad d, t, d2, zo; - - int cnt, ecnt = 0; - - srandom(time(NULL)); - -#if 0 - cd.d = M_PIq; - mpfr_set_f128(frx, cd.d, GMP_RNDN); - cd.u128 += 3; - printf("%g\n", countULP2(cd.d, frx)); -#endif - - const Sleef_quad rangemax = 1e+9; - - for(cnt = 0;ecnt < 1000;cnt++) { - switch(cnt & 7) { - case 0: - d = rnd(); - d2 = rnd(); - zo = rnd(); - break; - case 1: - cd.d = rint((2 * (double)random() / RAND_MAX - 1) * 1e+10) * M_PI_4; - cd.u128 += (random() & 0xff) - 0x7f; - d = cd.d; - d2 = rnd(); - zo = rnd(); - break; - default: - d = rnd_fr(); - d2 = rnd_fr(); - zo = rnd_zo(); - break; - } - - Sleef_quad2 sc = xsincospiq_u05(d); - Sleef_quad2 sc2 = xsincospiq_u35(d); - - { - const double rangemax2 = 1e+9; - - sinpifr(frx, d); - - double u0 = countULP2(t = sc.x, frx); - - if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumberq(t))) { - printf("Pure C sincospiq_u05 sin arg="); printf128(d); printf(" ulp=%.20g\n", u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP2(t = sc2.x, frx); - - if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumberq(t))) { - printf("Pure C sincospiq_u35 sin arg=%.30Lg ulp=%.20g\n", (long double)d, u1); - fflush(stdout); ecnt++; - } - - } - - { - const double rangemax2 = 1e+9; - - cospifr(frx, d); - - double u0 = countULP2(t = sc.y, frx); - - if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumberq(t))) { - printf("Pure C sincospiq_u05 cos arg=%.30Lg ulp=%.20g\n", (long double)d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP2(t = sc.y, frx); - - if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumberq(t))) { - printf("Pure C sincospiq_u35 cos arg=%.30Lg ulp=%.20g\n", (long double)d, u1); - fflush(stdout); ecnt++; - } - - } - -#if 0 - double2 sc = xsincos(d); - double2 sc2 = xsincos_u1(d); - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_sin(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xsin(d), frx); - - if ((fabs(d) <= rangemax && u0 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { - printf("Pure C sin arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(sc.x, frx); - - if ((fabs(d) <= rangemax && u1 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { - printf("Pure C sincos sin arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - - double u2 = countULP(t = xsin_u1(d), frx); - - if ((fabs(d) <= rangemax && u2 > 1) || fabs(t) > 1 || !isnumberq(t)) { - printf("Pure C sin_u1 arg=%.20g ulp=%.20g\n", d, u2); - fflush(stdout); ecnt++; - } - - double u3 = countULP(t = sc2.x, frx); - - if ((fabs(d) <= rangemax && u3 > 1) || fabs(t) > 1 || !isnumberq(t)) { - printf("Pure C sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_cos(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xcos(d), frx); - - if ((fabs(d) <= rangemax && u0 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { - printf("Pure C cos arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = sc.y, frx); - - if ((fabs(d) <= rangemax && u1 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { - printf("Pure C sincos cos arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - - double u2 = countULP(t = xcos_u1(d), frx); - - if ((fabs(d) <= rangemax && u2 > 1) || fabs(t) > 1 || !isnumberq(t)) { - printf("Pure C cos_u1 arg=%.20g ulp=%.20g\n", d, u2); - fflush(stdout); ecnt++; - } - - double u3 = countULP(t = sc2.y, frx); - - if ((fabs(d) <= rangemax && u3 > 1) || fabs(t) > 1 || !isnumberq(t)) { - printf("Pure C sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_tan(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xtan(d), frx); - - if ((fabs(d) < 1e+7 && u0 > 3.5) || (fabs(d) <= rangemax && u0 > 5) || isnan(t)) { - printf("Pure C tan arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xtan_u1(d), frx); - - if ((fabs(d) <= rangemax && u1 > 1) || isnan(t)) { - printf("Pure C tan_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - d = rnd_fr(); - double d2 = rnd_fr(), zo = rnd_zo(); - - { - mpfr_set_d(frx, fabs(d), GMP_RNDN); - mpfr_log(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xlog(fabs(d)), frx); - - if (u0 > 3.5) { - printf("Pure C log arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xlog_u1(fabs(d)), frx); - - if (u1 > 1) { - printf("Pure C log_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, fabs(d), GMP_RNDN); - mpfr_log10(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xlog10(fabs(d)), frx); - - if (u0 > 1) { - printf("Pure C log10 arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_log1p(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xlog1p(d), frx); - - if ((-1 <= d && d <= 1e+307 && u0 > 1) || - (d < -1 && !isnan(t)) || - (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { - printf("Pure C log1p arg=%.20g ulp=%.20g\n", d, u0); - printf("%g\n", t); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_exp(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xexp(d), frx); - - if (u0 > 1) { - printf("Pure C exp arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_exp2(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xexp2(d), frx); - - if (u0 > 1) { - printf("Pure C exp2 arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_exp10(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xexp10(d), frx); - - if (u0 > 1) { - printf("Pure C exp10 arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_expm1(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xexpm1(d), frx); - - if (u0 > 1) { - printf("Pure C expm1 arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_set_d(fry, d2, GMP_RNDN); - mpfr_pow(frx, fry, frx, GMP_RNDN); - - double u0 = countULP(t = xpow(d2, d), frx); - - if (u0 > 1) { - printf("Pure C pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_cbrt(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xcbrt(d), frx); - - if (u0 > 3.5) { - printf("Pure C cbrt arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xcbrt_u1(d), frx); - - if (u1 > 1) { - printf("Pure C cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, zo, GMP_RNDN); - mpfr_asin(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xasin(zo), frx); - - if (u0 > 3.5) { - printf("Pure C asin arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xasin_u1(zo), frx); - - if (u1 > 1) { - printf("Pure C asin_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, zo, GMP_RNDN); - mpfr_acos(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xacos(zo), frx); - - if (u0 > 3.5) { - printf("Pure C acos arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xacos_u1(zo), frx); - - if (u1 > 1) { - printf("Pure C acos_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_atan(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xatan(d), frx); - - if (u0 > 3.5) { - printf("Pure C atan arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP(t = xatan_u1(d), frx); - - if (u1 > 1) { - printf("Pure C atan_u1 arg=%.20g ulp=%.20g\n", d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_set_d(fry, d2, GMP_RNDN); - mpfr_atan2(frx, fry, frx, GMP_RNDN); - - double u0 = countULP(t = xatan2(d2, d), frx); - - if (u0 > 3.5) { - printf("Pure C atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); - fflush(stdout); ecnt++; - } - - double u1 = countULP2(t = xatan2_u1(d2, d), frx); - - if (u1 > 1) { - printf("Pure C atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_sinh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xsinh(d), frx); - - if ((fabs(d) <= 709 && u0 > 1) || - (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || - (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { - printf("Pure C sinh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_cosh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xcosh(d), frx); - - if ((fabs(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { - printf("Pure C cosh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_tanh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xtanh(d), frx); - - if (u0 > 1) { - printf("Pure C tanh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_asinh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xasinh(d), frx); - - if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || - (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || - (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { - printf("Pure C asinh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_acosh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xacosh(d), frx); - - if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || - (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || - (d <= -sqrt(DBL_MAX) && !isnan(t))) { - printf("Pure C acosh arg=%.20g ulp=%.20g\n", d, u0); - printf("%.20g\n", t); - fflush(stdout); ecnt++; - } - } - - { - mpfr_set_d(frx, d, GMP_RNDN); - mpfr_atanh(frx, frx, GMP_RNDN); - - double u0 = countULP(t = xatanh(d), frx); - - if (u0 > 1) { - printf("Pure C atanh arg=%.20g ulp=%.20g\n", d, u0); - fflush(stdout); ecnt++; - } - } -#endif - } -} diff --git a/src/libm-tester/testerutil.c b/src/libm-tester/testerutil.c index c4c73c3f..d1df8508 100644 --- a/src/libm-tester/testerutil.c +++ b/src/libm-tester/testerutil.c @@ -45,7 +45,7 @@ int isMinusZero(double x) { return x == 0 && copysign(1, x) == -1; } double sign(double d) { return d < 0 ? -1 : 1; } int xisnan(double x) { return x != x; } -int isnumberf(float x) { return !isinff(x) && !isnanf(x); } +int isnumberf(float x) { return !isinf(x) && !isnan(x); } int isPlusZerof(float x) { return x == 0 && copysignf(1, x) == 1; } int isMinusZerof(float x) { return x == 0 && copysignf(1, x) == -1; } float signf(float d) { return d < 0 ? -1 : 1; } diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index 53933298..b86d6031 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -347,11 +347,6 @@ endif() # Original sleef sources set(STANDARD_SOURCES sleefdp.c sleefsp.c rempitab.c) -# Check for different precision support and add sources accordingly -if(COMPILER_SUPPORTS_LONG_DOUBLE) - list(APPEND STANDARD_SOURCES sleefld.c) -endif() - add_library(${TARGET_LIBSLEEF} ${STANDARD_SOURCES}) add_dependencies(${TARGET_LIBSLEEF} ${TARGET_HEADERS}) set_target_properties(${TARGET_LIBSLEEF} PROPERTIES @@ -365,13 +360,6 @@ target_compile_definitions(${TARGET_LIBSLEEF} PRIVATE DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ) -if(COMPILER_SUPPORTS_FLOAT128) - # TODO: Not supported for LLVM bitcode gen as it has a specific compilation flags - target_sources(${TARGET_LIBSLEEF} PRIVATE sleefqp.c) - target_compile_definitions(${TARGET_LIBSLEEF} - PRIVATE ENABLEFLOAT128=1 ${COMMON_TARGET_DEFINITIONS}) -endif() - if(COMPILER_SUPPORTS_BUILTIN_MATH) target_compile_definitions(${TARGET_LIBSLEEF} PRIVATE ENABLE_BUILTIN_MATH=1) endif() diff --git a/src/libm/sleefdp.c b/src/libm/sleefdp.c index 64bb90ca..d251d4d2 100644 --- a/src/libm/sleefdp.c +++ b/src/libm/sleefdp.c @@ -35,21 +35,15 @@ extern const double Sleef_rempitabdp[]; #include "estrin.h" static INLINE CONST int64_t doubleToRawLongBits(double d) { - union { - double f; - int64_t i; - } tmp; - tmp.f = d; - return tmp.i; + int64_t ret; + memcpy(&ret, &d, sizeof(ret)); + return ret; } static INLINE CONST double longBitsToDouble(int64_t i) { - union { - double f; - int64_t i; - } tmp; - tmp.i = i; - return tmp.f; + double ret; + memcpy(&ret, &i, sizeof(ret)); + return ret; } static INLINE CONST double fabsk(double x) { @@ -2334,57 +2328,59 @@ EXPORT CONST double xhypot_u35(double x, double y) { } EXPORT CONST double xnextafter(double x, double y) { - union { - double f; - int64_t i; - } cx; + double cxf; + int64_t cxi; x = x == 0 ? mulsign(0, y) : x; - cx.f = x; - int c = (cx.i < 0) == (y < x); - if (c) cx.i = -(cx.i ^ (UINT64_C(1) << 63)); + cxf = x; + memcpy(&cxi, &cxf, sizeof(cxi)); + + int c = (cxi < 0) == (y < x); + if (c) cxi = -(cxi ^ (UINT64_C(1) << 63)); - if (x != y) cx.i--; + if (x != y) cxi--; - if (c) cx.i = -(cx.i ^ (UINT64_C(1) << 63)); + if (c) cxi = -(cxi ^ (UINT64_C(1) << 63)); - if (cx.f == 0 && x != 0) cx.f = mulsign(0, x); - if (x == 0 && y == 0) cx.f = y; - if (xisnan(x) || xisnan(y)) cx.f = SLEEF_NAN; + memcpy(&cxf, &cxi, sizeof(cxf)); + if (cxf == 0 && x != 0) cxf = mulsign(0, x); + if (x == 0 && y == 0) cxf = y; + if (xisnan(x) || xisnan(y)) cxf = SLEEF_NAN; - return cx.f; + return cxf; } EXPORT CONST double xfrfrexp(double x) { - union { - double f; - uint64_t u; - } cx; + double cxf; + uint64_t cxu; if (fabsk(x) < DBL_MIN) x *= (UINT64_C(1) << 63); - cx.f = x; - cx.u &= ~UINT64_C(0x7ff0000000000000); - cx.u |= UINT64_C(0x3fe0000000000000); + cxf = x; + memcpy(&cxu, &cxf, sizeof(cxu)); - if (xisinf(x)) cx.f = mulsign(SLEEF_INFINITY, x); - if (x == 0) cx.f = x; + cxu &= ~UINT64_C(0x7ff0000000000000); + cxu |= UINT64_C(0x3fe0000000000000); + + memcpy(&cxf, &cxu, sizeof(cxf)); + if (xisinf(x)) cxf = mulsign(SLEEF_INFINITY, x); + if (x == 0) cxf = x; - return cx.f; + return cxf; } EXPORT CONST int xexpfrexp(double x) { - union { - double f; - uint64_t u; - } cx; + double cxf; + uint64_t cxu; int ret = 0; if (fabsk(x) < DBL_MIN) { x *= (UINT64_C(1) << 63); ret = -63; } - cx.f = x; - ret += (int32_t)(((cx.u >> 52) & 0x7ff)) - 0x3fe; + cxf = x; + memcpy(&cxu, &cxf, sizeof(cxu)); + + ret += (int32_t)(((cxu >> 52) & 0x7ff)) - 0x3fe; if (x == 0 || xisnan(x) || xisinf(x)) ret = 0; diff --git a/src/libm/sleefinline_cuda_header.h.org b/src/libm/sleefinline_cuda_header.h.org index 846fb7fd..abb729ba 100644 --- a/src/libm/sleefinline_cuda_header.h.org +++ b/src/libm/sleefinline_cuda_header.h.org @@ -7,6 +7,14 @@ // Use --fmad=false option to compile this file // Include cmath, cfloat and cstdint before including this file +#ifndef SLEEF_FP_ILOGB0 +#define SLEEF_FP_ILOGB0 ((int)-2147483648) +#endif + +#ifndef SLEEF_FP_ILOGBNAN +#define SLEEF_FP_ILOGBNAN ((int)2147483647) +#endif + __device__ const double Sleef_rempitabdp[] = { 0.15915494309189531785, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, diff --git a/src/libm/sleefinline_header.h.org b/src/libm/sleefinline_header.h.org index d4e84205..8e6a784c 100644 --- a/src/libm/sleefinline_header.h.org +++ b/src/libm/sleefinline_header.h.org @@ -20,6 +20,14 @@ #pragma fp_contract (off) #endif +#ifndef SLEEF_FP_ILOGB0 +#define SLEEF_FP_ILOGB0 ((int)-2147483648) +#endif + +#ifndef SLEEF_FP_ILOGBNAN +#define SLEEF_FP_ILOGBNAN ((int)2147483647) +#endif + #ifndef __SLEEF_REMPITAB__ #define __SLEEF_REMPITAB__ const double Sleef_rempitabdp[] = { diff --git a/src/libm/sleefld.c b/src/libm/sleefld.c deleted file mode 100644 index 3cba5162..00000000 --- a/src/libm/sleefld.c +++ /dev/null @@ -1,419 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -// Always use -ffp-contract=off option to compile SLEEF. - -#include - -#include -#include -#include - -#include "misc.h" - -#ifdef DORENAME -#include "rename.h" -#endif - -#if (defined(_MSC_VER)) -#pragma fp_contract (off) -#endif - -static INLINE CONST long double mlal(long double x, long double y, long double z) { return x * y + z; } -static INLINE CONST long double xrintl(long double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); } -static INLINE CONST int64_t xceill(long double x) { return (int64_t)x + (x < 0 ? 0 : 1); } -static INLINE CONST long double xtruncl(long double x) { return (long double)(int)x; } - -static INLINE CONST int xisnanl(long double x) { return x != x; } -static INLINE CONST int xisinfl(long double x) { return x == SLEEF_INFINITYl || x == -SLEEF_INFINITYl; } -static INLINE CONST int xisminfl(long double x) { return x == -SLEEF_INFINITYl; } -static INLINE CONST int xispinfl(long double x) { return x == SLEEF_INFINITYl; } - -static INLINE CONST long double xfabsl(long double x) { return x >= 0 ? x : -x; } - -// - -#ifndef NDEBUG -static int checkfp(long double x) { - if (xisinfl(x) || xisnanl(x)) return 1; - return 0; -} -#endif - -static INLINE CONST long double upperl(long double d) { - union { - long double ld; - uint32_t u[4]; - } cnv; - - cnv.ld = d; - cnv.u[0] = 0; - return cnv.ld; -} - -static INLINE CONST Sleef_longdouble2 dl(long double h, long double l) { - Sleef_longdouble2 ret; - ret.x = h; ret.y = l; - return ret; -} - -static INLINE CONST Sleef_longdouble2 dlnormalize_l2_l2(Sleef_longdouble2 t) { - Sleef_longdouble2 s; - - s.x = t.x + t.y; - s.y = t.x - s.x + t.y; - - return s; -} - -static INLINE CONST Sleef_longdouble2 dlscale_l2_l2_l(Sleef_longdouble2 d, long double s) { - Sleef_longdouble2 r; - - r.x = d.x * s; - r.y = d.y * s; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dlneg_l2_l2(Sleef_longdouble2 d) { - Sleef_longdouble2 r; - - r.x = -d.x; - r.y = -d.y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dladd_l2_l_l(long double x, long double y) { - // |x| >= |y| - - Sleef_longdouble2 r; - -#ifndef NDEBUG - if (!(checkfp(x) || checkfp(y) || xfabsl(x) >= xfabsl(y))) { - fprintf(stderr, "[dladd_l2_l_l : %Lg, %Lg]\n", x, y); - fflush(stderr); - } -#endif - - r.x = x + y; - r.y = x - r.x + y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dladd2_l2_l_l(long double x, long double y) { - Sleef_longdouble2 r; - - r.x = x + y; - long double v = r.x - x; - r.y = (x - (r.x - v)) + (y - v); - - return r; -} - -static INLINE CONST Sleef_longdouble2 dladd_l2_l2_l(Sleef_longdouble2 x, long double y) { - // |x| >= |y| - - Sleef_longdouble2 r; - -#ifndef NDEBUG - if (!(checkfp(x.x) || checkfp(y) || xfabsl(x.x) >= xfabsl(y))) { - fprintf(stderr, "[dladd_l2_l2_l : %Lg %Lg]\n", x.x, y); - fflush(stderr); - } -#endif - - r.x = x.x + y; - r.y = x.x - r.x + y + x.y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dladd2_l2_l2_l(Sleef_longdouble2 x, long double y) { - // |x| >= |y| - - Sleef_longdouble2 r; - - r.x = x.x + y; - long double v = r.x - x.x; - r.y = (x.x - (r.x - v)) + (y - v); - r.y += x.y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dladd_l2_l_l2(long double x, Sleef_longdouble2 y) { - // |x| >= |y| - - Sleef_longdouble2 r; - -#ifndef NDEBUG - if (!(checkfp(x) || checkfp(y.x) || xfabsl(x) >= xfabsl(y.x))) { - fprintf(stderr, "[dladd_l2_l_l2 : %Lg %Lg]\n", x, y.x); - fflush(stderr); - } -#endif - - r.x = x + y.x; - r.y = x - r.x + y.x + y.y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dladd2_l2_l_l2(long double x, Sleef_longdouble2 y) { - Sleef_longdouble2 r; - - r.x = x + y.x; - long double v = r.x - x; - r.y = (x - (r.x - v)) + (y.x - v) + y.y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dladd_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { - // |x| >= |y| - - Sleef_longdouble2 r; - -#ifndef NDEBUG - if (!(checkfp(x.x) || checkfp(y.x) || xfabsl(x.x) >= xfabsl(y.x))) { - fprintf(stderr, "[dladd_l2_l2_l2 : %Lg %Lg]\n", x.x, y.x); - fflush(stderr); - } -#endif - - r.x = x.x + y.x; - r.y = x.x - r.x + y.x + x.y + y.y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dladd2_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { - Sleef_longdouble2 r; - - r.x = x.x + y.x; - long double v = r.x - x.x; - r.y = (x.x - (r.x - v)) + (y.x - v); - r.y += x.y + y.y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dlsub_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { - // |x| >= |y| - - Sleef_longdouble2 r; - -#ifndef NDEBUG - if (!(checkfp(x.x) || checkfp(y.x) || xfabsl(x.x) >= xfabsl(y.x))) { - fprintf(stderr, "[dlsub_l2_l2_l2 : %Lg %Lg]\n", x.x, y.x); - fflush(stderr); - } -#endif - - r.x = x.x - y.x; - r.y = x.x - r.x - y.x + x.y - y.y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dldiv_l2_l2_l2(Sleef_longdouble2 n, Sleef_longdouble2 d) { - long double t = 1.0 / d.x; - long double dh = upperl(d.x), dl = d.x - dh; - long double th = upperl(t ), tl = t - th; - long double nhh = upperl(n.x), nhl = n.x - nhh; - - Sleef_longdouble2 q; - - q.x = n.x * t; - - long double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + - q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); - - q.y = t * (n.y - q.x * d.y) + u; - - return q; -} - -static INLINE CONST Sleef_longdouble2 dlmul_l2_l_l(long double x, long double y) { - long double xh = upperl(x), xl = x - xh; - long double yh = upperl(y), yl = y - yh; - Sleef_longdouble2 r; - - r.x = x * y; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dlmul_l2_l2_l(Sleef_longdouble2 x, long double y) { - long double xh = upperl(x.x), xl = x.x - xh; - long double yh = upperl(y ), yl = y - yh; - Sleef_longdouble2 r; - - r.x = x.x * y; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dlmul_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { - long double xh = upperl(x.x), xl = x.x - xh; - long double yh = upperl(y.x), yl = y.x - yh; - Sleef_longdouble2 r; - - r.x = x.x * y.x; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; - - return r; -} - -static INLINE CONST Sleef_longdouble2 dlsqu_l2_l2(Sleef_longdouble2 x) { - long double xh = upperl(x.x), xl = x.x - xh; - Sleef_longdouble2 r; - - r.x = x.x * x.x; - r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); - - return r; -} - -static INLINE CONST Sleef_longdouble2 dlrec_l2_l(long double d) { - long double t = 1.0 / d; - long double dh = upperl(d), dl = d - dh; - long double th = upperl(t), tl = t - th; - Sleef_longdouble2 q; - - q.x = t; - q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); - - return q; -} - -static INLINE CONST Sleef_longdouble2 dlrec_l2_l2(Sleef_longdouble2 d) { - long double t = 1.0 / d.x; - long double dh = upperl(d.x), dl = d.x - dh; - long double th = upperl(t ), tl = t - th; - Sleef_longdouble2 q; - - q.x = t; - q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); - - return q; -} - -/* -static INLINE CONST Sleef_longdouble2 dlsqrt_l2_l2(Sleef_longdouble2 d) { - long double t = sqrt(d.x + d.y); - return dlscale_l2_l2_l(dlmul_l2_l2_l2(dladd2_l2_l2_l2(d, dlmul_l2_l_l(t, t)), dlrec_l2_l(t)), 0.5); -} -*/ - -// - -EXPORT CONST Sleef_longdouble2 xsincospil_u05(long double d) { - long double u, s, t; - Sleef_longdouble2 r, x, s2; - - u = d * 4; - int64_t q = xceill(u) & ~(int64_t)1; - - s = u - (long double)q; - t = s; - s = s * s; - s2 = dlmul_l2_l_l(t, t); - - // - - u = 4.59265607313529833157632e-17L; - u = mlal(u, s, -2.04096140520547829627419e-14L); - u = mlal(u, s, 6.94845264320316515640316e-12L); - u = mlal(u, s, -1.75724767308629210422023e-09L); - u = mlal(u, s, 3.13361689037693212744991e-07L); - u = mlal(u, s, -3.65762041821772284521155e-05L); - u = mlal(u, s, 0.00249039457019272015784594L); - x = dladd2_l2_l_l2(u * s, dl(-0.0807455121882807817044873L, -2.40179063154839769223037e-21L)); - x = dladd2_l2_l2_l2(dlmul_l2_l2_l2(s2, x), dl(0.785398163397448309628202L, -1.25420305812534448752181e-20L)); - - x = dlmul_l2_l2_l(x, t); - r.x = x.x + x.y; - - // - - u = -2.00423964577657539380734e-18L; - u = mlal(u, s, 1.00185574457758689324113e-15L); - u = mlal(u, s, -3.89807283423502620989528e-13L); - u = mlal(u, s, 1.15011591257563133685341e-10L); - u = mlal(u, s, -2.461136950493305818105e-08L); - u = mlal(u, s, 3.59086044859150791782134e-06L); - u = mlal(u, s, -0.00032599188692739001335938L); - x = dladd2_l2_l_l2(u * s, dl(0.0158543442438155008529635L, -6.97556143018517384674258e-22L)); - x = dladd2_l2_l2_l2(dlmul_l2_l2_l2(s2, x), dl(-0.308425137534042456829379L, -9.19882299434302978226668e-21L)); - - x = dladd2_l2_l2_l(dlmul_l2_l2_l2(x, s2), 1); - r.y = x.x + x.y; - - // - - if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } - if ((q & 4) != 0) { r.x = -r.x; } - if (((q+2) & 4) != 0) { r.y = -r.y; } - - if (xisinfl(d)) { r.x = r.y = SLEEF_NAN; } - if (!xisinfl(d) && xfabsl(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } - - return r; -} - -EXPORT CONST Sleef_longdouble2 xsincospil_u35(long double d) { - long double u, s, t; - Sleef_longdouble2 r; - - u = d * 4; - int64_t q = xceill(u) & ~(int64_t)1; - - s = u - (long double)q; - t = s; - s = s * s; - - // - - u = -0.2023275819380976135024e-13L; - u = mlal(u, s, +0.6948176964255957574946e-11L); - u = mlal(u, s, -0.1757247450021535880723e-8L); - u = mlal(u, s, +0.3133616889379195970541e-6L); - u = mlal(u, s, -0.3657620418215300856408e-4L); - u = mlal(u, s, +0.2490394570192717262476e-2L); - u = mlal(u, s, -0.8074551218828078160284e-1L); - u = mlal(u, s, +0.7853981633974483096282e+0L); - - r.x = u * t; - - // - - u = +0.9933418221428971922705e-15L; - u = mlal(u, s, -0.3897923064055824005357e-12L); - u = mlal(u, s, +0.1150115771521792692066e-9L); - u = mlal(u, s, -0.2461136949725905367314e-7L); - u = mlal(u, s, +0.3590860448589084195081e-5L); - u = mlal(u, s, -0.3259918869273895914840e-3L); - u = mlal(u, s, +0.1585434424381550079706e-1L); - u = mlal(u, s, -0.3084251375340424568294e+0L); - u = mlal(u, s, 1.0L); - - r.y = u; - - // - - if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } - if ((q & 4) != 0) { r.x = -r.x; } - if (((q+2) & 4) != 0) { r.y = -r.y; } - - if (xisinfl(d)) { r.x = r.y = SLEEF_NAN; } - if (!xisinfl(d) && xfabsl(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } - - return r; -} diff --git a/src/libm/sleeflibm_header.h.org.in b/src/libm/sleeflibm_header.h.org.in index a26ef5c2..e0a5f57e 100644 --- a/src/libm/sleeflibm_header.h.org.in +++ b/src/libm/sleeflibm_header.h.org.in @@ -120,19 +120,12 @@ typedef struct { } Sleef_float2; #endif -#ifndef Sleef_longdouble2_DEFINED -#define Sleef_longdouble2_DEFINED -typedef struct { - long double x, y; -} Sleef_longdouble2; -#endif - #if !defined(Sleef_quad_DEFINED) #define Sleef_quad_DEFINED #if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8) typedef __float128 Sleef_quad; #define SLEEF_QUAD_C(x) (x ## Q) -#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) +#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__aarch64__) typedef long double Sleef_quad; #define SLEEF_QUAD_C(x) (x ## L) #else @@ -309,11 +302,3 @@ IMPORT CONST float Sleef_lgammaf_u10(float); IMPORT CONST float Sleef_tgammaf_u10(float); IMPORT CONST float Sleef_erff_u10(float); IMPORT CONST float Sleef_erfcf_u15(float); - -IMPORT CONST Sleef_longdouble2 Sleef_sincospil_u05(long double); -IMPORT CONST Sleef_longdouble2 Sleef_sincospil_u35(long double); - -#if defined(Sleef_quad2_DEFINED) -IMPORT CONST Sleef_quad2 Sleef_sincospiq_u05(Sleef_quad); -IMPORT CONST Sleef_quad2 Sleef_sincospiq_u35(Sleef_quad); -#endif diff --git a/src/libm/sleefqp.c b/src/libm/sleefqp.c deleted file mode 100644 index cae341cb..00000000 --- a/src/libm/sleefqp.c +++ /dev/null @@ -1,471 +0,0 @@ -// Copyright Naoki Shibata and contributors 2010 - 2021. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -// Always use -ffp-contract=off option to compile SLEEF. - -#include - -#include -#include -#include - -#include "misc.h" - -#ifdef DORENAME -#include "rename.h" -#endif - -#if (defined(_MSC_VER)) -#pragma fp_contract (off) -#endif - -static INLINE CONST Sleef_quad mlaq(Sleef_quad x, Sleef_quad y, Sleef_quad z) { return x * y + z; } -static INLINE CONST int64_t xrintq(Sleef_quad x) { return x < 0 ? (int64_t)(x - 0.5) : (int64_t)(x + 0.5); } -static INLINE CONST int64_t xceilq(Sleef_quad x) { return (int64_t)x + (x < 0 ? 0 : 1); } -static INLINE CONST Sleef_quad xtruncq(Sleef_quad x) { return (Sleef_quad)(int64_t)x; } - -static INLINE CONST int xisnanq(Sleef_quad x) { return x != x; } -static INLINE CONST int xisinfq(Sleef_quad x) { return x == SLEEF_INFINITYq || x == -SLEEF_INFINITYq; } -static INLINE CONST int xisminfq(Sleef_quad x) { return x == -SLEEF_INFINITYq; } -static INLINE CONST int xispinfq(Sleef_quad x) { return x == SLEEF_INFINITYq; } - -static INLINE CONST Sleef_quad xfabsq(Sleef_quad x) { - union { - Sleef_quad q; - uint64_t u[2]; - } cnv; - - cnv.q = x; - cnv.u[1] &= UINT64_C(0x7fffffffffffffff); - return cnv.q; -} - -// - -#ifndef NDEBUG -static int checkfp(Sleef_quad x) { - if (xisinfq(x) || xisnanq(x)) return 1; - return 0; -} -#endif - -static INLINE CONST Sleef_quad upperq(Sleef_quad d) { - union { - Sleef_quad q; - uint64_t u[2]; - } cnv; - - cnv.q = d; - cnv.u[0] &= ~((UINT64_C(1) << (112/2+1)) - 1); - return cnv.q; -} - -static INLINE CONST Sleef_quad2 dq(Sleef_quad h, Sleef_quad l) { - Sleef_quad2 ret; - ret.x = h; ret.y = l; - return ret; -} - -static INLINE CONST Sleef_quad2 dqnormalize_q2_q2(Sleef_quad2 t) { - Sleef_quad2 s; - - s.x = t.x + t.y; - s.y = t.x - s.x + t.y; - - return s; -} - -static INLINE CONST Sleef_quad2 dqscale_q2_q2_q(Sleef_quad2 d, Sleef_quad s) { - Sleef_quad2 r; - - r.x = d.x * s; - r.y = d.y * s; - - return r; -} - -static INLINE CONST Sleef_quad2 dqneg_q2_q2(Sleef_quad2 d) { - Sleef_quad2 r; - - r.x = -d.x; - r.y = -d.y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqadd_q2_q_q(Sleef_quad x, Sleef_quad y) { - // |x| >= |y| - - Sleef_quad2 r; - -#ifndef NDEBUG - if (!(checkfp(x) || checkfp(y) || xfabsq(x) >= xfabsq(y))) { - fprintf(stderr, "[dqadd_q2_q_q : %g, %g]\n", (double)x, (double)y); - fflush(stderr); - } -#endif - - r.x = x + y; - r.y = x - r.x + y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqadd2_q2_q_q(Sleef_quad x, Sleef_quad y) { - Sleef_quad2 r; - - r.x = x + y; - Sleef_quad v = r.x - x; - r.y = (x - (r.x - v)) + (y - v); - - return r; -} - -static INLINE CONST Sleef_quad2 dqadd_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { - // |x| >= |y| - - Sleef_quad2 r; - -#ifndef NDEBUG - if (!(checkfp(x.x) || checkfp(y) || xfabsq(x.x) >= xfabsq(y))) { - fprintf(stderr, "[dqadd_q2_q2_q : %g %g]\n", (double)x.x, (double)y); - fflush(stderr); - } -#endif - - r.x = x.x + y; - r.y = x.x - r.x + y + x.y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqadd2_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { - // |x| >= |y| - - Sleef_quad2 r; - - r.x = x.x + y; - Sleef_quad v = r.x - x.x; - r.y = (x.x - (r.x - v)) + (y - v); - r.y += x.y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqadd_q2_q_q2(Sleef_quad x, Sleef_quad2 y) { - // |x| >= |y| - - Sleef_quad2 r; - -#ifndef NDEBUG - if (!(checkfp(x) || checkfp(y.x) || xfabsq(x) >= xfabsq(y.x))) { - fprintf(stderr, "[dqadd_q2_q_q2 : %g %g]\n", (double)x, (double)y.x); - fflush(stderr); - } -#endif - - r.x = x + y.x; - r.y = x - r.x + y.x + y.y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqadd2_q2_q_q2(Sleef_quad x, Sleef_quad2 y) { - Sleef_quad2 r; - - r.x = x + y.x; - Sleef_quad v = r.x - x; - r.y = (x - (r.x - v)) + (y.x - v) + y.y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqadd_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { - // |x| >= |y| - - Sleef_quad2 r; - -#ifndef NDEBUG - if (!(checkfp(x.x) || checkfp(y.x) || xfabsq(x.x) >= xfabsq(y.x))) { - fprintf(stderr, "[dqadd_q2_q2_q2 : %g %g]\n", (double)x.x, (double)y.x); - fflush(stderr); - } -#endif - - r.x = x.x + y.x; - r.y = x.x - r.x + y.x + x.y + y.y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqadd2_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { - Sleef_quad2 r; - - r.x = x.x + y.x; - Sleef_quad v = r.x - x.x; - r.y = (x.x - (r.x - v)) + (y.x - v); - r.y += x.y + y.y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqsub_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { - // |x| >= |y| - - Sleef_quad2 r; - -#ifndef NDEBUG - if (!(checkfp(x.x) || checkfp(y.x) || xfabsq(x.x) >= xfabsq(y.x))) { - fprintf(stderr, "[dqsub_q2_q2_q2 : %g %g]\n", (double)x.x, (double)y.x); - fflush(stderr); - } -#endif - - r.x = x.x - y.x; - r.y = x.x - r.x - y.x + x.y - y.y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqdiv_q2_q2_q2(Sleef_quad2 n, Sleef_quad2 d) { - Sleef_quad t = 1.0 / d.x; - Sleef_quad dh = upperq(d.x), dl = d.x - dh; - Sleef_quad th = upperq(t ), tl = t - th; - Sleef_quad nhh = upperq(n.x), nhl = n.x - nhh; - - Sleef_quad2 q; - - q.x = n.x * t; - - Sleef_quad u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + - q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); - - q.y = t * (n.y - q.x * d.y) + u; - - return q; -} - -static INLINE CONST Sleef_quad2 dqmul_q2_q_q(Sleef_quad x, Sleef_quad y) { - Sleef_quad xh = upperq(x), xl = x - xh; - Sleef_quad yh = upperq(y), yl = y - yh; - Sleef_quad2 r; - - r.x = x * y; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; - - return r; -} - -static INLINE CONST Sleef_quad2 dqmul_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { - Sleef_quad xh = upperq(x.x), xl = x.x - xh; - Sleef_quad yh = upperq(y ), yl = y - yh; - Sleef_quad2 r; - - r.x = x.x * y; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; - - return r; -} - -static INLINE CONST Sleef_quad2 dqmul_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { - Sleef_quad xh = upperq(x.x), xl = x.x - xh; - Sleef_quad yh = upperq(y.x), yl = y.x - yh; - Sleef_quad2 r; - - r.x = x.x * y.x; - r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; - - return r; -} - -static INLINE CONST Sleef_quad2 dqsqu_q2_q2(Sleef_quad2 x) { - Sleef_quad xh = upperq(x.x), xl = x.x - xh; - Sleef_quad2 r; - - r.x = x.x * x.x; - r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); - - return r; -} - -static INLINE CONST Sleef_quad2 dqrec_q2_q(Sleef_quad d) { - Sleef_quad t = 1.0 / d; - Sleef_quad dh = upperq(d), dl = d - dh; - Sleef_quad th = upperq(t), tl = t - th; - Sleef_quad2 q; - - q.x = t; - q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); - - return q; -} - -static INLINE CONST Sleef_quad2 dqrec_q2_q2(Sleef_quad2 d) { - Sleef_quad t = 1.0 / d.x; - Sleef_quad dh = upperq(d.x), dl = d.x - dh; - Sleef_quad th = upperq(t ), tl = t - th; - Sleef_quad2 q; - - q.x = t; - q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); - - return q; -} - -/* -static INLINE CONST Sleef_quad2 dqsqrt_q2_q2(Sleef_quad2 d) { - Sleef_quad t = sqrt(d.x + d.y); - return dqscale_q2_q2_q(dqmul_q2_q2_q2(dqadd2_q2_q2_q2(d, dqmul_q2_q_q(t, t)), dqrec_q2_q(t)), 0.5); -} -*/ - -// - -EXPORT CONST Sleef_quad2 xsincospiq_u05(Sleef_quad d) { - Sleef_quad u, s, t; - Sleef_quad2 r, x, s2; - - u = d * 4; - int64_t q = xceilq(u) & ~(int64_t)1; - - s = u - (Sleef_quad)q; - t = s; - s = s * s; - s2 = dqmul_q2_q_q(t, t); - - // - - u = +0.1528321016188828732764080161368244291e-27Q; - u = mlaq(u, s, -0.1494741498689376415859233754050616110e-24Q); - u = mlaq(u, s, +0.1226149947504428931621181953791777769e-21Q); - u = mlaq(u, s, -0.8348589834426964519785265770009675533e-19Q); - u = mlaq(u, s, +0.4628704628834415551415078707261146069e-16Q); - u = mlaq(u, s, -0.2041026339664143925641158896030605061e-13Q); - u = mlaq(u, s, +0.6948453273886629408492386065037620114e-11Q); - u = mlaq(u, s, -0.1757247673443401045145682042627557066e-8Q); - u = mlaq(u, s, +0.3133616890378121520950407496603902388e-6Q); - u = mlaq(u, s, -0.3657620418217725078660518698299784909e-4Q); - u = mlaq(u, s, +0.2490394570192720160015798421577395304e-2Q); - x = dqadd2_q2_q_q2(u * s, dq(-0.08074551218828078170696957048724322192457Q, 5.959584458773288360696286320980429277618e-36)); - x = dqadd2_q2_q2_q2(dqmul_q2_q2_q2(s2, x), dq(0.7853981633974483096156608458198756993698Q, 2.167745574452451779709844565881105067311e-35Q)); - - x = dqmul_q2_q2_q(x, t); - r.x = x.x + x.y; - - // - - u = -0.4616472554003168470361503708527464705e-29Q; - u = mlaq(u, s, +0.4891528531228245577148587028696897180e-26Q); - u = mlaq(u, s, -0.4377345071482935585011339656701961637e-23Q); - u = mlaq(u, s, +0.3278483561449753435303463083506802784e-20Q); - u = mlaq(u, s, -0.2019653396886554861865456720993185772e-17Q); - u = mlaq(u, s, +0.1001886461636271957275884859852184250e-14Q); - u = mlaq(u, s, -0.3898073171259675439843028673969857173e-12Q); - u = mlaq(u, s, +0.1150115912797405152263176921581706121e-9Q); - u = mlaq(u, s, -0.2461136950494199754009084018126527316e-7Q); - u = mlaq(u, s, +0.3590860448591510079069203991167071234e-5Q); - u = mlaq(u, s, -0.3259918869273900136414318317506198622e-3Q); - x = dqadd2_q2_q_q2(u * s, dq(0.01585434424381550085228521039855226376329Q, 6.529088663284413499535484912972485728198e-38Q)); - x = dqadd2_q2_q2_q2(dqmul_q2_q2_q2(s2, x), dq(-0.308425137534042456838577843746129712906Q, -1.006808646313642786855469666154064243572e-35Q)); - - x = dqadd2_q2_q2_q(dqmul_q2_q2_q2(x, s2), 1); - r.y = x.x + x.y; - - // - - if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } - if ((q & 4) != 0) { r.x = -r.x; } - if (((q+2) & 4) != 0) { r.y = -r.y; } - - if (xisinfq(d)) { r.x = r.y = SLEEF_NANq; } - if (!xisinfq(d) && xfabsq(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } - - return r; -} - -EXPORT CONST Sleef_quad2 xsincospiq_u35(Sleef_quad d) { - Sleef_quad u, s, t; - Sleef_quad2 r; - - u = d * 4; - int64_t q = xceilq(u) & ~(int64_t)1; - - s = u - (Sleef_quad)q; - t = s; - s = s * s; - - // - - u = -0.1485963032785725729464918728185622156e-24Q; - u = mlaq(u, s, +0.1226127943866088943202201676879490635e-21Q); - u = mlaq(u, s, -0.8348589518463078609690110857435995326e-19Q); - u = mlaq(u, s, +0.4628704628547538824855302470312741438e-16Q); - u = mlaq(u, s, -0.2041026339663972432248777826778586936e-13Q); - u = mlaq(u, s, +0.6948453273886628726907826757576187848e-11Q); - u = mlaq(u, s, -0.1757247673443401044967978719804318982e-8Q); - u = mlaq(u, s, +0.3133616890378121520950114757196589206e-6Q); - u = mlaq(u, s, -0.3657620418217725078660518414453815240e-4Q); - u = mlaq(u, s, +0.2490394570192720160015798421435124000e-2Q); - u = mlaq(u, s, -0.8074551218828078170696957048724041729e-1Q); - u = mlaq(u, s, +0.7853981633974483096156608458198756994e+0Q); - - r.x = u * t; - - // - - u = +0.4862670988511544771355006256522366302e-26Q; - u = mlaq(u, s, -0.4377265452147065611484052550741141029e-23Q); - u = mlaq(u, s, +0.3278483433857326331665386021267750285e-20Q); - u = mlaq(u, s, -0.2019653396755055912482006994709659430e-17Q); - u = mlaq(u, s, +0.1001886461636180795663169552615123249e-14Q); - u = mlaq(u, s, -0.3898073171259675007871885150022866077e-12Q); - u = mlaq(u, s, +0.1150115912797405152123832255915284811e-9Q); - u = mlaq(u, s, -0.2461136950494199754008784937314856168e-7Q); - u = mlaq(u, s, +0.3590860448591510079069203583263258862e-5Q); - u = mlaq(u, s, -0.3259918869273900136414318317180623832e-3Q); - u = mlaq(u, s, +0.1585434424381550085228521039855096075e-1Q); - u = mlaq(u, s, -0.3084251375340424568385778437461297129e+0Q); - u = mlaq(u, s, 1.0Q); - - r.y = u; - - // - - if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } - if ((q & 4) != 0) { r.x = -r.x; } - if (((q+2) & 4) != 0) { r.y = -r.y; } - - if (xisinfq(d)) { r.x = r.y = SLEEF_NANq; } - if (!xisinfq(d) && xfabsq(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } - - return r; -} - -// - -#ifdef ENABLE_MAIN -#include -#include -int main(int argc, char **argv) { - Sleef_quad a = -8.3998726984803832684266802333309369056312711821029e-09Q; - Sleef_quad2 q = xsincospiq_u05(a); - printf(" "); printf128(q.x); printf("\n"); - - /* - printf128(0.1Q); printf("\n"); - Sleef_quad2 q2 = dqmul_q2_q_q(0.1Q, 0.1Q); - printf128(q2.x); printf("\n"); - printf128(q2.y); printf("\n"); - */ - /* - printf("%s\n", toBCq(0.1Q)); - printf("%s\n", toBCq(upperq(0.1Q))); - printf("%s\n", toBCq(0.1Q-upperq(0.1Q))); - Sleef_quad2 q2 = dqmul_q2_q_q(0.1Q, 0.1Q); - printf("%s + ", toBCq(q2.x)); - printf("%s\n", toBCq(q2.y)); - */ -} -#endif diff --git a/src/libm/sleefsp.c b/src/libm/sleefsp.c index 9821bdc6..e88a613e 100644 --- a/src/libm/sleefsp.c +++ b/src/libm/sleefsp.c @@ -35,21 +35,15 @@ extern const float Sleef_rempitabsp[]; #include "estrin.h" static INLINE CONST int32_t floatToRawIntBits(float d) { - union { - float f; - int32_t i; - } tmp; - tmp.f = d; - return tmp.i; + int32_t ret; + memcpy(&ret, &d, sizeof(ret)); + return ret; } static INLINE CONST float intBitsToFloat(int32_t i) { - union { - float f; - int32_t i; - } tmp; - tmp.i = i; - return tmp.f; + float ret; + memcpy(&ret, &i, sizeof(ret)); + return ret; } static INLINE CONST float fabsfk(float x) { @@ -1919,56 +1913,56 @@ EXPORT CONST float xldexpf(float x, int exp) { } EXPORT CONST float xnextafterf(float x, float y) { - union { - float f; - int32_t i; - } cx; + float cxf; + int32_t cxi; - cx.f = x == 0 ? mulsignf(0, y) : x; - int c = (cx.i < 0) == (y < x); - if (c) cx.i = -(cx.i ^ (1 << 31)); + cxf = x == 0 ? mulsignf(0, y) : x; + memcpy(&cxi, &cxf, sizeof(cxi)); + int c = (cxi < 0) == (y < x); + if (c) cxi = -(cxi ^ (1 << 31)); - if (x != y) cx.i--; + if (x != y) cxi--; - if (c) cx.i = -(cx.i ^ (1 << 31)); + if (c) cxi = -(cxi ^ (1 << 31)); - if (cx.f == 0 && x != 0) cx.f = mulsignf(0, x); - if (x == 0 && y == 0) cx.f = y; - if (xisnanf(x) || xisnanf(y)) cx.f = SLEEF_NANf; + memcpy(&cxf, &cxi, sizeof(cxf)); + if (cxf == 0 && x != 0) cxf = mulsignf(0, x); + if (x == 0 && y == 0) cxf = y; + if (xisnanf(x) || xisnanf(y)) cxf = SLEEF_NANf; - return cx.f; + return cxf; } EXPORT CONST float xfrfrexpf(float x) { - union { - float f; - int32_t u; - } cx; + float cxf; + int32_t cxu; if (fabsfk(x) < FLT_MIN) x *= (1 << 30); - cx.f = x; - cx.u &= ~0x7f800000U; - cx.u |= 0x3f000000U; + cxf = x; + memcpy(&cxu, &cxf, sizeof(cxu)); + + cxu &= ~0x7f800000U; + cxu |= 0x3f000000U; - if (xisinff(x)) cx.f = mulsignf(SLEEF_INFINITYf, x); - if (x == 0) cx.f = x; + memcpy(&cxf, &cxu, sizeof(cxf)); + if (xisinff(x)) cxf = mulsignf(SLEEF_INFINITYf, x); + if (x == 0) cxf = x; - return cx.f; + return cxf; } EXPORT CONST int xexpfrexpf(float x) { - union { - float f; - uint32_t u; - } cx; + float cxf; + uint32_t cxu; int ret = 0; if (fabsfk(x) < FLT_MIN) { x *= (1 << 30); ret = -30; } - cx.f = x; - ret += (int32_t)(((cx.u >> 23) & 0xff)) - 0x7e; + cxf = x; + memcpy(&cxu, &cxf, sizeof(cxu)); + ret += (int32_t)(((cxu >> 23) & 0xff)) - 0x7e; if (x == 0 || xisnanf(x) || xisinff(x)) ret = 0; diff --git a/src/quad-tester/CMakeLists.txt b/src/quad-tester/CMakeLists.txt index aaa692b5..aaf608ae 100644 --- a/src/quad-tester/CMakeLists.txt +++ b/src/quad-tester/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT LIBRT) set(LIBRT "") endif() -set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS}") +set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS} ${FLAGS_NOSTRICTALIASING}") if(COMPILER_SUPPORTS_FLOAT128) list(APPEND COMMON_TARGET_DEFINITIONS ENABLEFLOAT128=1) diff --git a/src/quad-tester/qiutcuda.cu b/src/quad-tester/qiutcuda.cu index 0124e050..94c4a296 100644 --- a/src/quad-tester/qiutcuda.cu +++ b/src/quad-tester/qiutcuda.cu @@ -373,7 +373,7 @@ int main(int argc, char **argv) { fflush(stdout); char buf[BUFSIZE]; - fgets(buf, BUFSIZE-1, stdin); + if (fgets(buf, BUFSIZE-1, stdin)) {} int sentinel = 0; while(!feof(stdin) && sentinel < 2) { diff --git a/src/quad-tester/qiutsimd.c b/src/quad-tester/qiutsimd.c index 3d4f7c2a..7c534cd2 100644 --- a/src/quad-tester/qiutsimd.c +++ b/src/quad-tester/qiutsimd.c @@ -619,6 +619,25 @@ int do_test(int argc, char **argv) { } #endif +#if defined(ENABLE_PUREC_SCALAR) +#if defined(SLEEF_QUAD_C) + { + VARGQUAD v0 = xsplatq(SLEEF_QUAD_C(3.141592653589793238462643383279502884)); + VARGQUAD v1 = xsplatq(SLEEF_Q(+0x1921fb54442d1LL, 0x8469898cc51701b8ULL, 1)); + if (xicmpneq(v0, v1)) { + fprintf(stderr, "Testing on SLEEF_QUAD_C failed\n"); + exit(-1); + } + } +#else +#ifndef _MSC_VER +#warning SLEEF_QUAD_C not defined +#else +#pragma message ("SLEEF_QUAD_C not defined") +#endif +#endif +#endif + char buf[BUFSIZE]; fgets(buf, BUFSIZE-1, stdin); int sentinel = 0; diff --git a/src/quad-tester/qiutsimdmain.c b/src/quad-tester/qiutsimdmain.c index c232a69d..ce727b21 100644 --- a/src/quad-tester/qiutsimdmain.c +++ b/src/quad-tester/qiutsimdmain.c @@ -9,19 +9,27 @@ #include #include +#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) static jmp_buf sigjmp; +#define SETJMP(x) setjmp(x) +#define LONGJMP longjmp +#else +static sigjmp_buf sigjmp; +#define SETJMP(x) sigsetjmp(x, 1) +#define LONGJMP siglongjmp +#endif int do_test(int argc, char **argv); int check_featureQP(); static void sighandler(int signum) { - longjmp(sigjmp, 1); + LONGJMP(sigjmp, 1); } int detectFeatureQP() { signal(SIGILL, sighandler); - if (setjmp(sigjmp) == 0) { + if (SETJMP(sigjmp) == 0) { int r = check_featureQP(); signal(SIGILL, SIG_DFL); return r; diff --git a/src/quad-tester/qtesterutil.c b/src/quad-tester/qtesterutil.c index 6ba9e901..4f921129 100644 --- a/src/quad-tester/qtesterutil.c +++ b/src/quad-tester/qtesterutil.c @@ -299,7 +299,7 @@ Sleef_quad mpfr_get_f128(mpfr_t m, mpfr_rnd_t rnd) { c.f = mpfr_get_float128(m, rnd); return c.q; } -#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) +#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__aarch64__) void mpfr_set_f128(mpfr_t frx, Sleef_quad q, mpfr_rnd_t rnd) { union { Sleef_quad q; diff --git a/src/quad/sleefquadinline_cuda_header.h.org b/src/quad/sleefquadinline_cuda_header.h.org index 2e2134e3..d14941f0 100644 --- a/src/quad/sleefquadinline_cuda_header.h.org +++ b/src/quad/sleefquadinline_cuda_header.h.org @@ -12,7 +12,7 @@ #if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8) typedef __float128 Sleef_quad; #define SLEEF_QUAD_C(x) (x ## Q) -#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) +#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__aarch64__) typedef long double Sleef_quad; #define SLEEF_QUAD_C(x) (x ## L) #else @@ -48,6 +48,14 @@ typedef struct { uint64_t x, y; } Sleef_quad; #endif #endif +#ifndef SLEEF_FP_ILOGB0 +#define SLEEF_FP_ILOGB0 ((int)-2147483648) +#endif + +#ifndef SLEEF_FP_ILOGBNAN +#define SLEEF_FP_ILOGBNAN ((int)2147483647) +#endif + __device__ const double Sleef_rempitabqp[] = { 0.15915494308865163475, 114.12758349655632628, 87.820147804392036051, 27.423136899138626177, 14.254027919272630243, 85.935026329207175877, 114.27691102886092267, 37.750191829592949944, diff --git a/src/quad/sleefquadinline_header.h.org b/src/quad/sleefquadinline_header.h.org index 1d038b2b..976dd80e 100644 --- a/src/quad/sleefquadinline_header.h.org +++ b/src/quad/sleefquadinline_header.h.org @@ -25,7 +25,7 @@ #if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8) typedef __float128 Sleef_quad; #define SLEEF_QUAD_C(x) (x ## Q) -#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) +#elif defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__aarch64__) typedef long double Sleef_quad; #define SLEEF_QUAD_C(x) (x ## L) #else @@ -61,6 +61,14 @@ typedef struct { uint64_t x, y; } Sleef_quad; #endif #endif +#ifndef SLEEF_FP_ILOGB0 +#define SLEEF_FP_ILOGB0 ((int)-2147483648) +#endif + +#ifndef SLEEF_FP_ILOGBNAN +#define SLEEF_FP_ILOGBNAN ((int)2147483647) +#endif + #ifndef __SLEEF_QREMPITAB__ #define __SLEEF_QREMPITAB__ const double Sleef_rempitabqp[] = { diff --git a/src/quad/sleefsimdqp.c b/src/quad/sleefsimdqp.c index 9391e568..57423ada 100644 --- a/src/quad/sleefsimdqp.c +++ b/src/quad/sleefsimdqp.c @@ -3523,10 +3523,7 @@ EXPORT vargquad Sleef_strtoq(const char *str, const char **endptr) { static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespec, int width, int precision, int flags) { if (width > bufsize) width = bufsize; - union { - vquad q; - struct { uint64_t l, h; }; - } c128 = { .q = cast_vq_aq(argvalue) }; + vquad c128 = cast_vq_aq(argvalue); char *ptr = buf; char prefix = 0; @@ -3534,8 +3531,8 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe assert(typespec == 'e' || typespec == 'f' || typespec == 'g'); - if ((c128.h & UINT64_C(0x8000000000000000)) != 0) { - c128.h ^= UINT64_C(0x8000000000000000); + if ((c128.y & UINT64_C(0x8000000000000000)) != 0) { + c128.y ^= UINT64_C(0x8000000000000000); prefix = '-'; } else if (flags & FLAG_SIGN) { prefix = '+'; @@ -3543,13 +3540,13 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe prefix = ' '; } - tdx value = cast_tdx_vq(c128.q); + tdx value = cast_tdx_vq(c128); - if (isnan_vo_vq(c128.q)) { + if (isnan_vo_vq(c128)) { if (prefix) *ptr++ = prefix; ptr += snprintf(ptr, buf + bufsize - ptr, (flags & FLAG_UPPER) ? "NAN" : "nan"); flags &= ~FLAG_ZERO; - } else if (isinf_vo_vq(c128.q)) { + } else if (isinf_vo_vq(c128)) { if (prefix) *ptr++ = prefix; ptr += snprintf(ptr, buf + bufsize - ptr, (flags & FLAG_UPPER) ? "INF" : "inf"); flags &= ~FLAG_ZERO; @@ -3563,7 +3560,7 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe if (typespec == 'f') value = add_tdx_tdx_tdx(value, rounder); int exp = 0, e2 = 0; - if (!iszero_vo_vq(c128.q)) { + if (!iszero_vo_vq(c128)) { exp = ilog10(value); value = mul_tdx_tdx_tdx(value, exp10i(-exp)); } @@ -3685,14 +3682,11 @@ static int snprintquadhex(char *buf, size_t bufsize, vargquad argvalue, int widt if (width > bufsize) width = bufsize; char *bufend = buf + bufsize, *ptr = buf; - union { - vquad q; - struct { uint64_t l, h; }; - } c128 = { .q = cast_vq_aq(argvalue) }; + vquad c128 = cast_vq_aq(argvalue); int mainpos = 0; - if (c128.h >> 63) { + if (c128.y >> 63) { ptr += snprintf(ptr, bufend - ptr, "-"); } else if (flags & FLAG_SIGN) { ptr += snprintf(ptr, bufend - ptr, "+"); @@ -3700,27 +3694,27 @@ static int snprintquadhex(char *buf, size_t bufsize, vargquad argvalue, int widt ptr += snprintf(ptr, bufend - ptr, " "); } - if (isnan_vo_vq(c128.q)) { + if (isnan_vo_vq(c128)) { ptr += snprintf(ptr, bufend - ptr, (flags & FLAG_UPPER) ? "NAN" : "nan"); flags &= ~FLAG_ZERO; - } else if (isinf_vo_vq(c128.q)) { + } else if (isinf_vo_vq(c128)) { ptr += snprintf(ptr, bufend - ptr, (flags & FLAG_UPPER) ? "INF" : "inf"); flags &= ~FLAG_ZERO; } else { - int iszero = iszero_vo_vq(c128.q); + int iszero = iszero_vo_vq(c128); if (precision >= 0 && precision < 28) { int s = (28 - precision) * 4 - 1; if (s < 64) { - uint64_t u = c128.l + (((uint64_t)1) << s); - if (u < c128.l) c128.h++; - c128.l = u; + uint64_t u = c128.x + (((uint64_t)1) << s); + if (u < c128.x) c128.y++; + c128.x = u; } else { - c128.h += ((uint64_t)1) << (s - 64); + c128.y += ((uint64_t)1) << (s - 64); } } - int exp = (c128.h >> 48) & 0x7fff; - uint64_t h = c128.h << 16, l = c128.l; + int exp = (c128.y >> 48) & 0x7fff; + uint64_t h = c128.y << 16, l = c128.x; ptr += snprintf(ptr, bufend - ptr, (flags & FLAG_UPPER) ? "0X" :"0x"); mainpos = ptr - buf;