New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: optimizing compilers can reorder call to npy_get_floatstatus #11036
Changes from all commits
a51f86b
f21ad36
6eefa6d
5a835fb
b91becf
305ca24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
*/ | ||
#include "npy_math_common.h" | ||
#include "npy_math_private.h" | ||
#include "numpy/utils.h" | ||
|
||
#ifndef HAVE_COPYSIGN | ||
double npy_copysign(double x, double y) | ||
|
@@ -557,6 +558,15 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y) | |
} | ||
#endif | ||
|
||
int npy_clear_floatstatus() { | ||
char x=0; | ||
return npy_clear_floatstatus_barrier(&x); | ||
} | ||
int npy_get_floatstatus() { | ||
char x=0; | ||
return npy_get_floatstatus_barrier(&x); | ||
} | ||
|
||
/* | ||
* Functions to set the floating point status word. | ||
* keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h | ||
|
@@ -574,18 +584,24 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y) | |
defined(__NetBSD__) | ||
#include <ieeefp.h> | ||
|
||
int npy_get_floatstatus(void) | ||
int npy_get_floatstatus_barrier(char * param)) | ||
{ | ||
int fpstatus = fpgetsticky(); | ||
/* | ||
* By using a volatile, the compiler cannot reorder this call | ||
*/ | ||
if (param != NULL) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do this check, yet pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The correct thing to do is call When I fix the documentation from the comment above I will expand why the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think @eric-wieser was asking why the check was needed at all. |
||
volatile char NPY_UNUSED(c) = *(char*)param; | ||
} | ||
return ((FP_X_DZ & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | | ||
((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) | | ||
((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | | ||
((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0); | ||
} | ||
|
||
int npy_clear_floatstatus(void) | ||
int npy_clear_floatstatus_barrier(char * param) | ||
{ | ||
int fpstatus = npy_get_floatstatus(); | ||
int fpstatus = npy_get_floatstatus_barrier(param); | ||
fpsetsticky(0); | ||
|
||
return fpstatus; | ||
|
@@ -617,21 +633,27 @@ void npy_set_floatstatus_invalid(void) | |
(defined(__FreeBSD__) && (__FreeBSD_version >= 502114)) | ||
# include <fenv.h> | ||
|
||
int npy_get_floatstatus(void) | ||
int npy_get_floatstatus_barrier(char* param) | ||
{ | ||
int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | | ||
FE_UNDERFLOW | FE_INVALID); | ||
/* | ||
* By using a volatile, the compiler cannot reorder this call | ||
*/ | ||
if (param != NULL) { | ||
volatile char NPY_UNUSED(c) = *(char*)param; | ||
} | ||
|
||
return ((FE_DIVBYZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | | ||
((FE_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | | ||
((FE_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | | ||
((FE_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); | ||
} | ||
|
||
int npy_clear_floatstatus(void) | ||
int npy_clear_floatstatus_barrier(char * param) | ||
{ | ||
/* testing float status is 50-100 times faster than clearing on x86 */ | ||
int fpstatus = npy_get_floatstatus(); | ||
int fpstatus = npy_get_floatstatus_barrier(param); | ||
if (fpstatus != 0) { | ||
feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | | ||
FE_UNDERFLOW | FE_INVALID); | ||
|
@@ -665,18 +687,24 @@ void npy_set_floatstatus_invalid(void) | |
#include <float.h> | ||
#include <fpxcp.h> | ||
|
||
int npy_get_floatstatus(void) | ||
int npy_get_floatstatus_barrier(char *param) | ||
{ | ||
int fpstatus = fp_read_flag(); | ||
/* | ||
* By using a volatile, the compiler cannot reorder this call | ||
*/ | ||
if (param != NULL) { | ||
volatile char NPY_UNUSED(c) = *(char*)param; | ||
} | ||
return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | | ||
((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | | ||
((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | | ||
((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); | ||
} | ||
|
||
int npy_clear_floatstatus(void) | ||
int npy_clear_floatstatus_barrier(char * param) | ||
{ | ||
int fpstatus = npy_get_floatstatus(); | ||
int fpstatus = npy_get_floatstatus_barrier(param); | ||
fp_swap_flag(0); | ||
|
||
return fpstatus; | ||
|
@@ -710,8 +738,11 @@ void npy_set_floatstatus_invalid(void) | |
#include <float.h> | ||
|
||
|
||
int npy_get_floatstatus(void) | ||
int npy_get_floatstatus_barrier(char *param) | ||
{ | ||
/* | ||
* By using a volatile, the compiler cannot reorder this call | ||
*/ | ||
#if defined(_WIN64) | ||
int fpstatus = _statusfp(); | ||
#else | ||
|
@@ -720,15 +751,18 @@ int npy_get_floatstatus(void) | |
_statusfp2(&fpstatus, &fpstatus2); | ||
fpstatus |= fpstatus2; | ||
#endif | ||
if (param != NULL) { | ||
volatile char NPY_UNUSED(c) = *(char*)param; | ||
} | ||
return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | | ||
((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | | ||
((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | | ||
((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); | ||
} | ||
|
||
int npy_clear_floatstatus(void) | ||
int npy_clear_floatstatus_barrier(char *param) | ||
{ | ||
int fpstatus = npy_get_floatstatus(); | ||
int fpstatus = npy_get_floatstatus_barrier(param); | ||
_clearfp(); | ||
|
||
return fpstatus; | ||
|
@@ -739,18 +773,24 @@ int npy_clear_floatstatus(void) | |
|
||
#include <machine/fpu.h> | ||
|
||
int npy_get_floatstatus(void) | ||
int npy_get_floatstatus_barrier(char *param) | ||
{ | ||
unsigned long fpstatus = ieee_get_fp_control(); | ||
/* | ||
* By using a volatile, the compiler cannot reorder this call | ||
*/ | ||
if (param != NULL) { | ||
volatile char NPY_UNUSED(c) = *(char*)param; | ||
} | ||
return ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | | ||
((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) | | ||
((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | | ||
((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0); | ||
} | ||
|
||
int npy_clear_floatstatus(void) | ||
int npy_clear_floatstatus_barrier(char *param) | ||
{ | ||
long fpstatus = npy_get_floatstatus(); | ||
int fpstatus = npy_get_floatstatus_barrier(param); | ||
/* clear status bits as well as disable exception mode if on */ | ||
ieee_set_fp_control(0); | ||
|
||
|
@@ -759,13 +799,14 @@ int npy_clear_floatstatus(void) | |
|
||
#else | ||
|
||
int npy_get_floatstatus(void) | ||
int npy_get_floatstatus_barrier(char NPY_UNUSED(*param)) | ||
{ | ||
return 0; | ||
} | ||
|
||
int npy_clear_floatstatus(void) | ||
int npy_clear_floatstatus_barrier(char *param) | ||
{ | ||
int fpstatus = npy_get_floatstatus_barrier(param); | ||
return 0; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be
npy_clear_floatstatus_barrier
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes it should. This has been merged, so I will fix it somewhere else