Skip to content

Commit

Permalink
[SVE] Workaround for ICE with GCC-10 & SVE (#391)
Browse files Browse the repository at this point in the history
With this patch, `-fno-tree-vrp` compiler option is added if the SVE code is compiled with GCC.
  • Loading branch information
shibatch committed Jan 12, 2021
1 parent e7b4784 commit ea29e62
Show file tree
Hide file tree
Showing 14 changed files with 65 additions and 42 deletions.
16 changes: 8 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@ command_arguments(MKMASKED_PARAMS_GNUABI_SVE_sp sve s -4)

#

set(COSTOVERRIDE_AVX512F 10.0)
set(COSTOVERRIDE_AVX512FNOFMA 10.0)
set(COSTOVERRIDE_AVX2 2.0)
set(COSTOVERRIDE_AVX 2.0)
set(COSTOVERRIDE_NEON32 2.0)
set(COSTOVERRIDE_NEON32VFPV4 2.0)
set(COSTOVERRIDE_SVE 10.0)
set(COSTOVERRIDE_SVENOFMA 10.0)
set(COSTOVERRIDE_AVX512F 10)
set(COSTOVERRIDE_AVX512FNOFMA 10)
set(COSTOVERRIDE_AVX2 2)
set(COSTOVERRIDE_AVX 2)
set(COSTOVERRIDE_NEON32 2)
set(COSTOVERRIDE_NEON32VFPV4 2)
set(COSTOVERRIDE_SVE 10)
set(COSTOVERRIDE_SVENOFMA 10)

#

Expand Down
4 changes: 4 additions & 0 deletions Configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,10 @@ else()
set(DFT_C_FLAGS "${FLAGS_WALL} ${FLAGS_NOSTRICTALIASING} ${FLAGS_FASTMATH} ${FLAGS_OTHERS}")
endif()

if(CMAKE_C_COMPILER_ID MATCHES "GNU")
set(FLAGS_ENABLE_SVE "${FLAGS_ENABLE_SVE};-fno-tree-vrp")
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" AND CMAKE_C_COMPILER_ID MATCHES "GNU")
set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -msse2 -mfpmath=sse")
set(DFT_C_FLAGS "${DFT_C_FLAGS} -msse2 -mfpmath=sse -m128bit-long-double")
Expand Down
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DDISABLE_SVE=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand Down
6 changes: 6 additions & 0 deletions src/arch/helperpurec_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,17 @@

#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || CONFIG == 3
#ifndef FP_FAST_FMA
//@#ifndef FP_FAST_FMA
#define FP_FAST_FMA
//@#define FP_FAST_FMA
#endif
//@#endif
#ifndef FP_FAST_FMAF
//@#ifndef FP_FAST_FMAF
#define FP_FAST_FMAF
//@#define FP_FAST_FMAF
#endif
//@#endif
#endif

#if (!defined(FP_FAST_FMA) || !defined(FP_FAST_FMAF)) && !defined(SLEEF_GENHEADER)
Expand Down Expand Up @@ -119,6 +123,8 @@ typedef struct {

#if defined(ENABLEFLOAT128) && CONFIG != 3
typedef __float128 vargquad;
#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) && CONFIG != 3
typedef long double vargquad;
#else
typedef vquad vargquad;
#endif
Expand Down
4 changes: 4 additions & 0 deletions src/common/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ typedef struct {
#define Sleef_quad_DEFINED
#if defined(ENABLEFLOAT128)
typedef __float128 Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## Q)
#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
typedef long double Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## L)
#else
typedef struct { uint64_t x, y; } Sleef_quad;
#endif
Expand Down
4 changes: 2 additions & 2 deletions src/libm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ if(BUILD_INLINE_HEADERS)
> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h # > /build/include/sleefinline_sse2.h

MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}}
DEPENDS ${HEADER_${SIMD}} addSuffix
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}} addSuffix
VERBATIM
)

Expand Down Expand Up @@ -630,7 +630,7 @@ if(BUILD_INLINE_HEADERS)
> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h # > /build/include/sleefinline_cuda.h

MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}}
DEPENDS ${HEADER_${SIMD}} addSuffix
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}} addSuffix
VERBATIM
)

Expand Down
6 changes: 3 additions & 3 deletions src/libm/sleeflibm_header.h.org.in
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ typedef struct {
#if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8)
typedef __float128 Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## Q)
//#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
//typedef long double Sleef_quad;
//#define SLEEF_QUAD_C(x) (x ## L)
#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
typedef long double Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## L)
#else
typedef struct { uint64_t x, y; } Sleef_quad;
#endif
Expand Down
8 changes: 5 additions & 3 deletions src/quad-tester/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function(add_test_iut IUT C)
set_tests_properties(${IUT} PROPERTIES COST ${C})
else()
add_test(NAME ${IUT}
COMMAND ${QTESTER} ${EMULATOR} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${IUT}
COMMAND ${QTESTER} "--qemu" ${EMULATOR} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${IUT}
WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
set_tests_properties(${IUT} PROPERTIES COST ${C})
endif()
Expand All @@ -97,7 +97,8 @@ macro(test_extension SIMD)
add_dependencies(${TARGET_IUT${SIMD}} sleefquad ${TARGET_LIBSLEEF})
set_target_properties(${TARGET_IUT${SIMD}} PROPERTIES C_STANDARD 99)
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_iut(${TARGET_IUT${SIMD}} ${COSTOVERRIDE_${SIMD}})
math(EXPR C "${COSTOVERRIDE_${SIMD}} + 1")
add_test_iut(${TARGET_IUT${SIMD}} ${C})
else()
add_test_iut(${TARGET_IUT${SIMD}} 0.5)
endif()
Expand Down Expand Up @@ -125,7 +126,8 @@ macro(test_extension SIMD)
add_dependencies(${IUTINAME} ${TARGET_QINLINE_HEADERS})
set_target_properties(${IUTINAME} PROPERTIES C_STANDARD 99)
if (DEFINED COSTOVERRIDE_${SIMD})
add_test_iut(${IUTINAME} ${COSTOVERRIDE_${SIMD}})
math(EXPR C "${COSTOVERRIDE_${SIMD}} + 1")
add_test_iut(${IUTINAME} ${C})
else()
add_test_iut(${IUTINAME} 0.5)
endif()
Expand Down
25 changes: 16 additions & 9 deletions src/quad-tester/qtester.c
Original file line number Diff line number Diff line change
Expand Up @@ -1499,7 +1499,7 @@ void do_test(int options) {
}

int main(int argc, char **argv) {
char *argv2[argc+2], *commandSde = NULL;
char *argv2[argc+2], *commandSde = NULL, *commandQEmu = NULL;
int i, a2s, options;

// BUGFIX: this flush is to prevent incorrect syncing with the
Expand All @@ -1511,6 +1511,9 @@ int main(int argc, char **argv) {
if (a2s+1 < argc && strcmp(argv[a2s], "--sde") == 0) {
commandSde = argv[a2s+1];
a2s++;
} else if (a2s+1 < argc && strcmp(argv[a2s], "--qemu") == 0) {
commandQEmu = argv[a2s+1];
a2s++;
} else {
break;
}
Expand All @@ -1532,14 +1535,20 @@ int main(int argc, char **argv) {
if (readln(ctop[0], str, 255) < 1 ||
sscanf(str, "%d", &options) != 1 ||
(options & 1) == 0) {
if (commandSde != NULL) {
if (commandSde != NULL || commandQEmu != NULL) {
close(ctop[0]);
close(ptoc[1]);

argv2[0] = commandSde;
argv2[1] = "--";
for(i=a2s;i<argc;i++) argv2[i-a2s+2] = argv[i];
argv2[argc-a2s+2] = NULL;
if (commandSde) {
argv2[0] = commandSde;
argv2[1] = "--";
for(i=a2s;i<argc;i++) argv2[i-a2s+2] = argv[i];
argv2[argc-a2s+2] = NULL;
} else {
argv2[0] = commandQEmu;
for(i=a2s;i<argc;i++) argv2[i-a2s+1] = argv[i];
argv2[argc-a2s+1] = NULL;
}

startChild(argv2[0], argv2);

Expand All @@ -1550,7 +1559,7 @@ int main(int argc, char **argv) {
return 0;
}

fprintf(stderr, "*** Using SDE\n");
fprintf(stderr, "*** Using emulator\n");
} else {
int status;
waitpid(pid, &status, 0);
Expand All @@ -1565,8 +1574,6 @@ int main(int argc, char **argv) {
}
}

fprintf(stderr, "\n\n*** qtester : now testing %s\n", argv2[0]);

fpctop = fdopen(ctop[0], "r");

do_test(options);
Expand Down
4 changes: 2 additions & 2 deletions src/quad/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ if(BUILD_INLINE_HEADERS)
> ${PROJECT_BINARY_DIR}/include/sleefquadinline_${SIMDLC}.h # > /build/include/sleefquadinline_sse2.h

MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdqp.c ${HEADER_${SIMD}}
DEPENDS ${HEADER_${SIMD}} addSuffix
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdqp.c ${HEADER_${SIMD}} addSuffix
VERBATIM
)

Expand Down Expand Up @@ -307,7 +307,7 @@ if(BUILD_INLINE_HEADERS)
> ${PROJECT_BINARY_DIR}/include/sleefquadinline_${SIMDLC}.h # > /build/include/sleefquadinline_cuda.h

MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdqp.c ${HEADER_${SIMD}}
DEPENDS ${HEADER_${SIMD}} addSuffix
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdqp.c ${HEADER_${SIMD}} addSuffix
VERBATIM
)

Expand Down
6 changes: 3 additions & 3 deletions src/quad/sleefquadinline_cuda_header.h.org
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
#if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8)
typedef __float128 Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## Q)
//#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
//typedef long double Sleef_quad;
//#define SLEEF_QUAD_C(x) (x ## L)
#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
typedef long double Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## L)
#else
typedef struct { uint64_t x, y; } Sleef_quad;
#endif
Expand Down
6 changes: 3 additions & 3 deletions src/quad/sleefquadinline_header.h.org
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
#if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8)
typedef __float128 Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## Q)
//#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
//typedef long double Sleef_quad;
//#define SLEEF_QUAD_C(x) (x ## L)
#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
typedef long double Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## L)
#else
typedef struct { uint64_t x, y; } Sleef_quad;
#endif
Expand Down
14 changes: 7 additions & 7 deletions src/quad/sleefsimdqp.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,15 +261,15 @@ static INLINE CONST VECTOR_CC vdouble2 twoprod_vd2_vd_vd(vdouble x, vdouble y) {
}

static INLINE CONST VECTOR_CC vdouble3 scale_vd3_vd3_vd(vdouble3 d, vdouble s) {
return (vdouble3) { vmul_vd_vd_vd(vd3getx_vd_vd3(d), s), vmul_vd_vd_vd(vd3gety_vd_vd3(d), s), vmul_vd_vd_vd(vd3getz_vd_vd3(d), s) };
return cast_vd3_vd_vd_vd(vmul_vd_vd_vd(vd3getx_vd_vd3(d), s), vmul_vd_vd_vd(vd3gety_vd_vd3(d), s), vmul_vd_vd_vd(vd3getz_vd_vd3(d), s));
}

static INLINE CONST VECTOR_CC vdouble3 scale_vd3_vd3_d(vdouble3 d, double s) { return scale_vd3_vd3_vd(d, vcast_vd_d(s)); }

static INLINE CONST VECTOR_CC vdouble3 quickrenormalize_vd3_vd3(vdouble3 td) {
vdouble2 u = quicktwosum_vd2_vd_vd(vd3getx_vd_vd3(td), vd3gety_vd_vd3(td));
vdouble2 v = quicktwosum_vd2_vd_vd(vd2gety_vd_vd2(u), vd3getz_vd_vd3(td));
return (vdouble3) { vd2getx_vd_vd2(u), vd2getx_vd_vd2(v), vd2gety_vd_vd2(v) };
return cast_vd3_vd_vd_vd(vd2getx_vd_vd2(u), vd2getx_vd_vd2(v), vd2gety_vd_vd2(v));
}

static INLINE CONST VECTOR_CC vdouble3 normalize_vd3_vd3(vdouble3 td) {
Expand Down Expand Up @@ -405,31 +405,31 @@ static INLINE CONST VECTOR_CC vdouble3 mul_vd3_vd2_vd2(vdouble2 x, vdouble2 y) {
}

static INLINE CONST VECTOR_CC vdouble3 div2_vd3_vd3_vd3(vdouble3 n, vdouble3 q) {
vdouble2 d = ddrec_vd2_vd2((vdouble2) {vd3getx_vd_vd3(q), vd3gety_vd_vd3(q)});
vdouble2 d = ddrec_vd2_vd2(vcast_vd2_vd_vd(vd3getx_vd_vd3(q), vd3gety_vd_vd3(q)));
return mul2_vd3_vd3_vd3(n, add_vd3_vd2_vd3(d, mul_vd3_vd2_vd3(ddscale_vd2_vd2_d(d, -1),
add_vd3_vd_vd3(vcast_vd_d(-1), mul_vd3_vd2_vd3(d, q)))));
}

static INLINE CONST VECTOR_CC vdouble3 div_vd3_vd3_vd3(vdouble3 n, vdouble3 q) {
vdouble2 d = ddrec_vd2_vd2((vdouble2) {vd3getx_vd_vd3(q), vd3gety_vd_vd3(q)});
vdouble2 d = ddrec_vd2_vd2(vcast_vd2_vd_vd(vd3getx_vd_vd3(q), vd3gety_vd_vd3(q)));
return mul_vd3_vd3_vd3(n, add_vd3_vd2_vd3(d, mul_vd3_vd2_vd3(ddscale_vd2_vd2_d(d, -1),
add_vd3_vd_vd3(vcast_vd_d(-1), mul_vd3_vd2_vd3(d, q)))));
}

static INLINE CONST VECTOR_CC vdouble3 rec_vd3_vd3(vdouble3 q) {
vdouble2 d = ddrec_vd2_vd2((vdouble2) {vd3getx_vd_vd3(q), vd3gety_vd_vd3(q)});
vdouble2 d = ddrec_vd2_vd2(vcast_vd2_vd_vd(vd3getx_vd_vd3(q), vd3gety_vd_vd3(q)));
return add2_vd3_vd2_vd3(d, mul_vd3_vd2_vd3(ddscale_vd2_vd2_d(d, -1),
add_vd3_vd_vd3(vcast_vd_d(-1), mul_vd3_vd2_vd3(d, q))));
}

static INLINE CONST VECTOR_CC vdouble3 rec_vd3_vd2(vdouble2 q) {
vdouble2 d = ddrec_vd2_vd2((vdouble2) {vd2getx_vd_vd2(q), vd2gety_vd_vd2(q)});
vdouble2 d = ddrec_vd2_vd2(vcast_vd2_vd_vd(vd2getx_vd_vd2(q), vd2gety_vd_vd2(q)));
return add2_vd3_vd2_vd3(d, mul_vd3_vd2_vd3(ddscale_vd2_vd2_d(d, -1),
add_vd3_vd_vd3(vcast_vd_d(-1), mul_vd3_vd2_vd2(d, q))));
}

static INLINE CONST VECTOR_CC vdouble3 sqrt_vd3_vd3(vdouble3 d) {
vdouble2 t = ddsqrt_vd2_vd2((vdouble2) {vd3getx_vd_vd3(d), vd3gety_vd_vd3(d)});
vdouble2 t = ddsqrt_vd2_vd2(vcast_vd2_vd_vd(vd3getx_vd_vd3(d), vd3gety_vd_vd3(d)));
vdouble3 r = mul2_vd3_vd3_vd3(add2_vd3_vd3_vd3(d, mul_vd3_vd2_vd2(t, t)), rec_vd3_vd2(t));
r = sel_vd3_vo_vd3_vd3(veq_vo_vd_vd(vd3getx_vd_vd3(d), vcast_vd_d(0)), cast_vd3_d_d_d(0, 0, 0), scale_vd3_vd3_d(r, 0.5));
return r;
Expand Down
2 changes: 1 addition & 1 deletion travis/before_script.arm64-gcc-sve.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ cd sleef.build
export PATH=/opt/local/bin:$PATH
export LD_LIBRARY_PATH=/opt/local/lib:$LD_LIBRARY_PATH
export CC=gcc-10
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DEMULATOR=qemu-aarch64 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=FALSE -DBUILD_DFT=TRUE -DENFORCE_SVE=TRUE ..
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DEMULATOR=qemu-aarch64 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SVE=TRUE ..

0 comments on commit ea29e62

Please sign in to comment.