From f283fc124f1fe644aa7ae2e0caf6dc5888b0c83f Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Fri, 24 Oct 2025 17:21:22 +0000 Subject: [PATCH 1/4] [libc][fenv] Refactor x86 fenv implementations to make it work for various fenv_t. --- libc/src/__support/CPP/bit.h | 36 +- libc/src/__support/FPUtil/FEnvImpl.h | 8 +- libc/src/__support/FPUtil/x86_64/FEnvImpl.h | 759 +++++------------- .../FPUtil/x86_64/fenv_mxcsr_utils.h | 129 +++ .../__support/FPUtil/x86_64/fenv_x86_common.h | 253 ++++++ .../__support/FPUtil/x86_64/fenv_x87_only.h | 137 ++++ .../__support/FPUtil/x86_64/fenv_x87_utils.h | 194 +++++ .../__support/macros/properties/compiler.h | 5 + libc/test/UnitTest/FEnvSafeTest.cpp | 64 +- 9 files changed, 972 insertions(+), 613 deletions(-) create mode 100644 libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h create mode 100644 libc/src/__support/FPUtil/x86_64/fenv_x86_common.h create mode 100644 libc/src/__support/FPUtil/x86_64/fenv_x87_only.h create mode 100644 libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index f602a7447ec10..5b244aa2a4b03 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -26,6 +26,16 @@ namespace cpp { #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE #endif +template +LIBC_INLINE static void inline_copy(const char *from, char *to) { +#if __has_builtin(__builtin_memcpy_inline) + __builtin_memcpy_inline(to, from, N); +#else + for (unsigned i = 0; i < N; ++i) + to[i] = from[i]; +#endif // __has_builtin(__builtin_memcpy_inline) +} + // This implementation of bit_cast requires trivially-constructible To, to avoid // UB in the implementation. template @@ -43,16 +53,30 @@ bit_cast(const From &from) { To to{}; char *dst = reinterpret_cast(&to); const char *src = reinterpret_cast(&from); -#if __has_builtin(__builtin_memcpy_inline) - __builtin_memcpy_inline(dst, src, sizeof(To)); -#else - for (unsigned i = 0; i < sizeof(To); ++i) - dst[i] = src[i]; -#endif // __has_builtin(__builtin_memcpy_inline) + inline_copy(src, dst); return to; #endif // __has_builtin(__builtin_bit_cast) } +// The following simple bit copy from a smaller type to maybe-larger type. +template +LIBC_INLINE constexpr cpp::enable_if_t< + (sizeof(To) >= sizeof(From)) && + cpp::is_trivially_constructible::value && + cpp::is_trivially_copyable::value && + cpp::is_trivially_copyable::value, + void> +bit_copy(const From &from, To &to) { + MSAN_UNPOISON(&from, sizeof(From)); + if constexpr (sizeof(To) == sizeof(From)) { + to = bit_cast(from); + } else { + char *dst = reinterpret_cast(&to); + const char *src = reinterpret_cast(&from); + inline_copy(src, dst); + } +} + template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, bool> diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h index ef3f60a5b3d7f..3ef2df5f0a352 100644 --- a/libc/src/__support/FPUtil/FEnvImpl.h +++ b/libc/src/__support/FPUtil/FEnvImpl.h @@ -31,8 +31,7 @@ // the dummy implementations below. Once a proper x86_64 darwin fenv is set up, // the apple condition here should be removed. // TODO: fully support fenv for MSVC. -#elif defined(LIBC_TARGET_ARCH_IS_X86) && !defined(__APPLE__) && \ - !defined(LIBC_COMPILER_IS_MSVC) +#elif defined(LIBC_TARGET_ARCH_IS_X86) && !defined(__APPLE__) #include "x86_64/FEnvImpl.h" #elif defined(LIBC_TARGET_ARCH_IS_ARM) && defined(__ARM_FP) && \ !defined(LIBC_COMPILER_IS_MSVC) @@ -110,12 +109,7 @@ raise_except_if_required([[maybe_unused]] int excepts) { } else { #ifndef LIBC_MATH_HAS_NO_EXCEPT if (math_errhandling & MATH_ERREXCEPT) -#ifdef LIBC_TARGET_ARCH_IS_X86_64 - return raise_except(excepts); -#else // !LIBC_TARGET_ARCH_IS_X86 return raise_except(excepts); -#endif // LIBC_TARGET_ARCH_IS_X86 - #endif // LIBC_MATH_HAS_NO_EXCEPT return 0; } diff --git a/libc/src/__support/FPUtil/x86_64/FEnvImpl.h b/libc/src/__support/FPUtil/x86_64/FEnvImpl.h index 5da509796d849..4dfd784fa9734 100644 --- a/libc/src/__support/FPUtil/x86_64/FEnvImpl.h +++ b/libc/src/__support/FPUtil/x86_64/FEnvImpl.h @@ -9,642 +9,241 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H +#include "hdr/fenv_macros.h" +#include "hdr/stdint_proxy.h" +#include "hdr/types/fenv_t.h" +#include "src/__support/CPP/bit.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" +#include "src/__support/macros/properties/types.h" #if !defined(LIBC_TARGET_ARCH_IS_X86) #error "Invalid include" #endif -#include "hdr/stdint_proxy.h" -#include "hdr/types/fenv_t.h" -#include "src/__support/macros/sanitizer.h" +#ifndef __SSE__ +// When SSE is not available, we will only touch x87 floating point environment. +#include "src/__support/FPUtil/x86_64/fenv_x87_only.h" +#else // __SSE__ -namespace LIBC_NAMESPACE_DECL { -namespace fputil { +#ifndef LIBC_COMPILER_IS_MSVC +#include "src/__support/FPUtil/x86_64/fenv_x87_utils.h" +#endif // !LIBC_COMPILER_IS_MSVC -namespace internal { - -// Normally, one should be able to define FE_* macros to the exact rounding mode -// encodings. However, since we want LLVM libc to be compiled against headers -// from other libcs, we cannot assume that FE_* macros are always defined in -// such a manner. So, we will define enums corresponding to the x86_64 bit -// encodings. The implementations can map from FE_* to the corresponding enum -// values. - -// The rounding control values in the x87 control register and the MXCSR -// register have the same 2-bit enoding but have different bit positions. -// See below for the bit positions. -struct RoundingControlValue { - static constexpr uint16_t TO_NEAREST = 0x0; - static constexpr uint16_t DOWNWARD = 0x1; - static constexpr uint16_t UPWARD = 0x2; - static constexpr uint16_t TOWARD_ZERO = 0x3; -}; - -static constexpr uint16_t X87_ROUNDING_CONTROL_BIT_POSITION = 10; -static constexpr uint16_t MXCSR_ROUNDING_CONTROL_BIT_POSITION = 13; - -// The exception flags in the x87 status register and the MXCSR have the same -// encoding as well as the same bit positions. -struct ExceptionFlags { - static constexpr uint16_t INVALID_F = 0x1; - // Some libcs define __FE_DENORM corresponding to the denormal input - // exception and include it in FE_ALL_EXCEPTS. We define and use it to - // support compiling against headers provided by such libcs. - static constexpr uint16_t DENORMAL_F = 0x2; - static constexpr uint16_t DIV_BY_ZERO_F = 0x4; - static constexpr uint16_t OVERFLOW_F = 0x8; - static constexpr uint16_t UNDERFLOW_F = 0x10; - static constexpr uint16_t INEXACT_F = 0x20; -}; - -// The exception control bits occupy six bits, one bit for each exception. -// In the x87 control word, they occupy the first 6 bits. In the MXCSR -// register, they occupy bits 7 to 12. -static constexpr uint16_t X87_EXCEPTION_CONTROL_BIT_POSITION = 0; -static constexpr uint16_t X87_EXCEPTION_CONTROL_BIT_POSITION_HIGH = 24; -static constexpr uint16_t MXCSR_EXCEPTION_CONTOL_BIT_POISTION = 7; - -// Exception flags are individual bits in the corresponding registers. -// So, we just OR the bit values to get the full set of exceptions. -LIBC_INLINE uint16_t get_status_value_for_except(int excepts) { - // We will make use of the fact that exception control bits are single - // bit flags in the control registers. - return ((excepts & FE_INVALID) ? ExceptionFlags::INVALID_F : 0) | -#ifdef __FE_DENORM - ((excepts & __FE_DENORM) ? ExceptionFlags::DENORMAL_F : 0) | -#endif // __FE_DENORM - ((excepts & FE_DIVBYZERO) ? ExceptionFlags::DIV_BY_ZERO_F : 0) | - ((excepts & FE_OVERFLOW) ? ExceptionFlags::OVERFLOW_F : 0) | - ((excepts & FE_UNDERFLOW) ? ExceptionFlags::UNDERFLOW_F : 0) | - ((excepts & FE_INEXACT) ? ExceptionFlags::INEXACT_F : 0); -} +#include "src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h" -LIBC_INLINE int exception_status_to_macro(uint16_t status) { - return ((status & ExceptionFlags::INVALID_F) ? FE_INVALID : 0) | -#ifdef __FE_DENORM - ((status & ExceptionFlags::DENORMAL_F) ? __FE_DENORM : 0) | -#endif // __FE_DENORM - ((status & ExceptionFlags::DIV_BY_ZERO_F) ? FE_DIVBYZERO : 0) | - ((status & ExceptionFlags::OVERFLOW_F) ? FE_OVERFLOW : 0) | - ((status & ExceptionFlags::UNDERFLOW_F) ? FE_UNDERFLOW : 0) | - ((status & ExceptionFlags::INEXACT_F) ? FE_INEXACT : 0); -} +namespace LIBC_NAMESPACE_DECL { +namespace fputil { -struct X87StateDescriptor { - uint16_t control_word; - uint16_t unused1; - uint16_t status_word; - uint16_t unused2; - // TODO: Elaborate the remaining 20 bytes as required. - uint32_t _[5]; -}; - -LIBC_INLINE uint16_t get_x87_control_word() { - uint16_t w; - __asm__ __volatile__("fnstcw %0" : "=m"(w)::); - MSAN_UNPOISON(&w, sizeof(w)); - return w; -} +LIBC_INLINE static int clear_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + sse::clear_except(x86_excepts); -LIBC_INLINE void write_x87_control_word(uint16_t w) { - __asm__ __volatile__("fldcw %0" : : "m"(w) :); -} +#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 + x87::clear_except(x86_excepts); +#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 -LIBC_INLINE uint16_t get_x87_status_word() { - uint16_t w; - __asm__ __volatile__("fnstsw %0" : "=m"(w)::); - MSAN_UNPOISON(&w, sizeof(w)); - return w; -} - -LIBC_INLINE void clear_x87_exceptions() { - __asm__ __volatile__("fnclex" : : :); -} - -LIBC_INLINE uint32_t get_mxcsr() { - uint32_t w; - __asm__ __volatile__("stmxcsr %0" : "=m"(w)::); - MSAN_UNPOISON(&w, sizeof(w)); - return w; + return 0; } -LIBC_INLINE void write_mxcsr(uint32_t w) { - __asm__ __volatile__("ldmxcsr %0" : : "m"(w) :); -} +LIBC_INLINE static int test_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + uint16_t tested_excepts = sse::test_except(x86_excepts); -LIBC_INLINE void get_x87_state_descriptor(X87StateDescriptor &s) { - __asm__ __volatile__("fnstenv %0" : "=m"(s)); - MSAN_UNPOISON(&s, sizeof(s)); -} +#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 + tested_excepts |= x87::test_except(x86_excepts); +#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 -LIBC_INLINE void write_x87_state_descriptor(const X87StateDescriptor &s) { - __asm__ __volatile__("fldenv %0" : : "m"(s) :); + return internal::get_macro_from_exception_status(tested_excepts); } -LIBC_INLINE void fwait() { __asm__ __volatile__("fwait"); } - -} // namespace internal - -LIBC_INLINE int enable_except(int excepts) { - // In the x87 control word and in MXCSR, an exception is blocked - // if the corresponding bit is set. That is the reason for all the - // bit-flip operations below as we need to turn the bits to zero - // to enable them. - - uint16_t bit_mask = internal::get_status_value_for_except(excepts); +LIBC_INLINE static int get_except() { + uint16_t excepts = sse::get_except(); - uint16_t x87_cw = internal::get_x87_control_word(); - uint16_t old_excepts = ~x87_cw & 0x3F; // Save previously enabled exceptions. - x87_cw &= ~bit_mask; - internal::write_x87_control_word(x87_cw); +#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 + excepts |= x87::get_except(); +#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 - // Enabling SSE exceptions via MXCSR is a nice thing to do but - // might not be of much use practically as SSE exceptions and the x87 - // exceptions are independent of each other. - uint32_t mxcsr = internal::get_mxcsr(); - mxcsr &= ~(bit_mask << internal::MXCSR_EXCEPTION_CONTOL_BIT_POISTION); - internal::write_mxcsr(mxcsr); - - // Since the x87 exceptions and SSE exceptions are independent of each, - // it doesn't make much sence to report both in the return value. Most - // often, the standard floating point functions deal with FPU operations - // so we will retrun only the old x87 exceptions. - return internal::exception_status_to_macro(old_excepts); + return internal::get_macro_from_exception_status(excepts); } -LIBC_INLINE int disable_except(int excepts) { - // In the x87 control word and in MXCSR, an exception is blocked - // if the corresponding bit is set. - - uint16_t bit_mask = internal::get_status_value_for_except(excepts); - - uint16_t x87_cw = internal::get_x87_control_word(); - uint16_t old_excepts = ~x87_cw & 0x3F; // Save previously enabled exceptions. - x87_cw |= bit_mask; - internal::write_x87_control_word(x87_cw); - - // Just like in enable_except, it is not clear if disabling SSE exceptions - // is required. But, we will still do it only as a "nice thing to do". - uint32_t mxcsr = internal::get_mxcsr(); - mxcsr |= (bit_mask << internal::MXCSR_EXCEPTION_CONTOL_BIT_POISTION); - internal::write_mxcsr(mxcsr); +LIBC_INLINE static int set_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + sse::set_except(x86_excepts); - return internal::exception_status_to_macro(old_excepts); -} - -LIBC_INLINE int get_except() { - uint16_t mxcsr = static_cast(internal::get_mxcsr()); - uint16_t enabled_excepts = ~(mxcsr >> 7) & 0x3F; - return internal::exception_status_to_macro(enabled_excepts); -} +#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 + x87::set_except(x86_excepts); +#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 -LIBC_INLINE int clear_except(int excepts) { - internal::X87StateDescriptor state; - internal::get_x87_state_descriptor(state); - state.status_word &= - static_cast(~internal::get_status_value_for_except(excepts)); - internal::write_x87_state_descriptor(state); - - uint32_t mxcsr = internal::get_mxcsr(); - mxcsr &= ~internal::get_status_value_for_except(excepts); - internal::write_mxcsr(mxcsr); return 0; } -LIBC_INLINE int test_except(int excepts) { - uint16_t status_word = internal::get_x87_status_word(); - uint32_t mxcsr = internal::get_mxcsr(); - // Check both x87 status word and MXCSR. - uint16_t status_value = internal::get_status_value_for_except(excepts); - return internal::exception_status_to_macro( - static_cast(status_value & (status_word | mxcsr))); -} - -// Sets the exception flags but does not trigger the exception handler. -LIBC_INLINE int set_except(int excepts) { - uint16_t status_value = internal::get_status_value_for_except(excepts); - internal::X87StateDescriptor state; - internal::get_x87_state_descriptor(state); - state.status_word |= status_value; - internal::write_x87_state_descriptor(state); - - uint32_t mxcsr = internal::get_mxcsr(); - mxcsr |= status_value; - internal::write_mxcsr(mxcsr); - +// We will only OR sse exception flags. Even though this might make x87 and +// sse exception flags not in sync, the results will be synchronized when +// reading with get_except or test_except. +LIBC_INLINE static int raise_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + sse::raise_except(x86_excepts); return 0; } -template LIBC_INLINE int raise_except(int excepts) { - uint16_t status_value = internal::get_status_value_for_except(excepts); - - // We set the status flag for exception one at a time and call the - // fwait instruction to actually get the processor to raise the - // exception by calling the exception handler. This scheme is per - // the description in "8.6 X87 FPU EXCEPTION SYNCHRONIZATION" - // of the "Intel 64 and IA-32 Architectures Software Developer's - // Manual, Vol 1". - - // FPU status word is read for each exception separately as the - // exception handler can potentially write to it (typically to clear - // the corresponding exception flag). By reading it separately, we - // ensure that the writes by the exception handler are maintained - // when raising the next exception. - - auto raise_helper = [](uint16_t singleExceptFlag) { - if constexpr (!SKIP_X87_FPU) { - internal::X87StateDescriptor state; - internal::get_x87_state_descriptor(state); - state.status_word |= singleExceptFlag; - internal::write_x87_state_descriptor(state); - } - - uint32_t mxcsr = 0; - mxcsr = internal::get_mxcsr(); - mxcsr |= singleExceptFlag; - internal::write_mxcsr(mxcsr); - internal::fwait(); - }; - - if (status_value & internal::ExceptionFlags::INVALID_F) - raise_helper(internal::ExceptionFlags::INVALID_F); - if (status_value & internal::ExceptionFlags::DIV_BY_ZERO_F) - raise_helper(internal::ExceptionFlags::DIV_BY_ZERO_F); - if (status_value & internal::ExceptionFlags::OVERFLOW_F) - raise_helper(internal::ExceptionFlags::OVERFLOW_F); - if (status_value & internal::ExceptionFlags::UNDERFLOW_F) - raise_helper(internal::ExceptionFlags::UNDERFLOW_F); - if (status_value & internal::ExceptionFlags::INEXACT_F) - raise_helper(internal::ExceptionFlags::INEXACT_F); -#ifdef __FE_DENORM - if (status_value & internal::ExceptionFlags::DENORMAL_F) { - raise_helper(internal::ExceptionFlags::DENORMAL_F); - } -#endif // __FE_DENORM +LIBC_INLINE static int enable_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + uint16_t old_excepts = sse::enable_except(x86_excepts); - // There is no special synchronization scheme available to - // raise SEE exceptions. So, we will ignore that for now. - // Just plain writing to the MXCSR register does not guarantee - // the exception handler will be called. +#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 + old_excepts |= x87::enable_except(x86_excepts); +#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 - return 0; + return internal::get_macro_from_exception_status(old_excepts); } -LIBC_INLINE int get_round() { - uint16_t bit_value = - (internal::get_mxcsr() >> internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION) & - 0x3; - switch (bit_value) { - case internal::RoundingControlValue::TO_NEAREST: - return FE_TONEAREST; - case internal::RoundingControlValue::DOWNWARD: - return FE_DOWNWARD; - case internal::RoundingControlValue::UPWARD: - return FE_UPWARD; - case internal::RoundingControlValue::TOWARD_ZERO: - return FE_TOWARDZERO; - default: - return -1; // Error value. - } -} - -LIBC_INLINE int set_round(int mode) { - uint16_t bit_value; - switch (mode) { - case FE_TONEAREST: - bit_value = internal::RoundingControlValue::TO_NEAREST; - break; - case FE_DOWNWARD: - bit_value = internal::RoundingControlValue::DOWNWARD; - break; - case FE_UPWARD: - bit_value = internal::RoundingControlValue::UPWARD; - break; - case FE_TOWARDZERO: - bit_value = internal::RoundingControlValue::TOWARD_ZERO; - break; - default: - return 1; // To indicate failure - } +LIBC_INLINE static int disable_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + uint16_t old_excepts = sse::disable_except(x86_excepts); - uint16_t x87_value = static_cast( - bit_value << internal::X87_ROUNDING_CONTROL_BIT_POSITION); - uint16_t x87_control = internal::get_x87_control_word(); - x87_control = static_cast( - (x87_control & - ~(uint16_t(0x3) << internal::X87_ROUNDING_CONTROL_BIT_POSITION)) | - x87_value); - internal::write_x87_control_word(x87_control); - - uint32_t mxcsr_value = bit_value - << internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION; - uint32_t mxcsr_control = internal::get_mxcsr(); - mxcsr_control = (mxcsr_control & - ~(0x3 << internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION)) | - mxcsr_value; - internal::write_mxcsr(mxcsr_control); +#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 + old_excepts |= x87::disable_except(x86_excepts); +#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 - return 0; + return internal::get_macro_from_exception_status(old_excepts); } -namespace internal { - -#if defined(_WIN32) -// MSVC fenv.h defines a very simple representation of the floating point state -// which just consists of control and status words of the x87 unit. -struct FPState { - uint32_t control_word; - uint32_t status_word; -}; -#elif defined(__APPLE__) -struct FPState { - uint16_t control_word; - uint16_t status_word; - uint32_t mxcsr; - uint8_t reserved[8]; -}; -#else -struct FPState { - X87StateDescriptor x87_status; - uint32_t mxcsr; -}; -#endif // _WIN32 - -} // namespace internal - -static_assert( - sizeof(fenv_t) == sizeof(internal::FPState), - "Internal floating point state does not match the public fenv_t type."); - -#ifdef _WIN32 - -// The exception flags in the Windows FEnv struct and the MXCSR have almost -// reversed bit positions. -struct WinExceptionFlags { - static constexpr uint32_t INEXACT_WIN = 0x01; - static constexpr uint32_t UNDERFLOW_WIN = 0x02; - static constexpr uint32_t OVERFLOW_WIN = 0x04; - static constexpr uint32_t DIV_BY_ZERO_WIN = 0x08; - static constexpr uint32_t INVALID_WIN = 0x10; - static constexpr uint32_t DENORMAL_WIN = 0x20; - - // The Windows FEnv struct has a second copy of all of these bits in the high - // byte of the 32 bit control word. These are used as the source of truth when - // calling fesetenv. - static constexpr uint32_t HIGH_OFFSET = 24; - - static constexpr uint32_t HIGH_INEXACT = INEXACT_WIN << HIGH_OFFSET; - static constexpr uint32_t HIGH_UNDERFLOW = UNDERFLOW_WIN << HIGH_OFFSET; - static constexpr uint32_t HIGH_OVERFLOW = OVERFLOW_WIN << HIGH_OFFSET; - static constexpr uint32_t HIGH_DIV_BY_ZERO = DIV_BY_ZERO_WIN << HIGH_OFFSET; - static constexpr uint32_t HIGH_INVALID = INVALID_WIN << HIGH_OFFSET; - static constexpr uint32_t HIGH_DENORMAL = DENORMAL_WIN << HIGH_OFFSET; -}; - -/* - fenv_t control word format: - - Windows (at least for x64) uses a 4 byte control fenv control word stored in - a 32 bit integer. The first byte contains just the rounding mode and the - exception masks, while the last two bytes contain that same information as - well as the flush-to-zero and denormals-are-zero flags. The flags are - represented with a truth table: - - 00 - No flags set - 01 - Flush-to-zero and Denormals-are-zero set - 11 - Flush-to-zero set - 10 - Denormals-are-zero set - - U represents unused. - - +-----Rounding Mode-----+ - | | - ++ ++ - || || - RRMMMMMM UUUUUUUU UUUUFFRR UUMMMMMM - | | || | | - +----+ flags---++ +----+ - | | - +------Exception Masks-----+ - - - fenv_t status word format: - - The status word is a lot simpler for this conversion, since only the - exception flags are used in the MXCSR. - - +----+---Exception Flags---+----+ - | | | | - UUEEEEEE UUUUUUUU UUUUUUUU UUEEEEEE - - - - MXCSR Format: - - The MXCSR format is the same information, just organized differently. Since - the fenv_t struct for windows doesn't include the mxcsr bits, they must be - generated from the control word bits. - - Exception Masks---+ +---Exception Flags - | | - Flush-to-zero---+ +----+ +----+ - | | | | | - FRRMMMMMMDEEEEEE - || | - ++ +---Denormals-are-zero - | - +---Rounding Mode - - - The mask and flag order is as follows: - - fenv_t mxcsr - - denormal inexact - invalid underflow - div by 0 overflow - overflow div by 0 - underflow denormal - inexact invalid - - This is almost reverse, except for denormal and invalid which are in the - same order in both. - */ - -LIBC_INLINE int get_env(fenv_t *envp) { - internal::FPState *state = reinterpret_cast(envp); - - uint32_t status_word = 0; - uint32_t control_word = 0; - - uint32_t mxcsr = internal::get_mxcsr(); - - // Set exception flags in the status word - status_word |= (mxcsr & (internal::ExceptionFlags::INVALID_F | - internal::ExceptionFlags::DENORMAL_F)) - << 4; - status_word |= (mxcsr & internal::ExceptionFlags::DIV_BY_ZERO_F) << 1; - status_word |= (mxcsr & internal::ExceptionFlags::OVERFLOW_F) >> 1; - status_word |= (mxcsr & internal::ExceptionFlags::UNDERFLOW_F) >> 3; - status_word |= (mxcsr & internal::ExceptionFlags::INEXACT_F) >> 5; - status_word |= status_word << WinExceptionFlags::HIGH_OFFSET; - - // Set exception masks in bits 0-5 and 24-29 - control_word |= (mxcsr & ((internal::ExceptionFlags::INVALID_F | - internal::ExceptionFlags::DENORMAL_F) - << 7)) >> - 3; - control_word |= (mxcsr & (internal::ExceptionFlags::DIV_BY_ZERO_F << 7)) >> 6; - control_word |= (mxcsr & (internal::ExceptionFlags::OVERFLOW_F << 7)) >> 8; - control_word |= (mxcsr & (internal::ExceptionFlags::UNDERFLOW_F << 7)) >> 10; - control_word |= (mxcsr & (internal::ExceptionFlags::INEXACT_F << 7)) >> 12; - control_word |= control_word << WinExceptionFlags::HIGH_OFFSET; - - // Set rounding in bits 8-9 and 30-31 - control_word |= (mxcsr & 0x6000) >> 5; - control_word |= (mxcsr & 0x6000) << 17; +LIBC_INLINE static int get_round() { + uint16_t rounding_mode = sse::get_round(); + return internal::get_macro_from_rounding_control(rounding_mode); +} - // Set flush-to-zero in bit 10 - control_word |= (mxcsr & 0x8000) >> 5; +LIBC_INLINE static int set_round(int rounding_mode) { + uint16_t rounding = internal::get_rounding_control_from_macro(rounding_mode); + if (LIBC_UNLIKELY(rounding == internal::RoundingControl::ERROR)) + return -1; + sse::set_round(rounding); - // Set denormals-are-zero xor flush-to-zero in bit 11 - control_word |= (((mxcsr & 0x8000) >> 9) ^ (mxcsr & 0x0040)) << 5; +#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 + x87::set_round(rounding); +#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 - state->control_word = control_word; - state->status_word = status_word; return 0; } -LIBC_INLINE int set_env(const fenv_t *envp) { - const internal::FPState *state = - reinterpret_cast(envp); - - uint32_t mxcsr = 0; - - // Set exception flags from the status word - mxcsr |= static_cast( - (state->status_word & - (WinExceptionFlags::HIGH_DENORMAL | WinExceptionFlags::HIGH_INVALID)) >> - 28); - mxcsr |= static_cast( - (state->status_word & WinExceptionFlags::HIGH_DIV_BY_ZERO) >> 25); - mxcsr |= static_cast( - (state->status_word & WinExceptionFlags::HIGH_OVERFLOW) >> 23); - mxcsr |= static_cast( - (state->status_word & WinExceptionFlags::HIGH_UNDERFLOW) >> 21); - mxcsr |= static_cast( - (state->status_word & WinExceptionFlags::HIGH_INEXACT) >> 19); - - // Set denormals-are-zero from bit 10 xor bit 11 - mxcsr |= static_cast( - (((state->control_word & 0x800) >> 1) ^ (state->control_word & 0x400)) >> - 4); - - // Set exception masks from bits 24-29 - mxcsr |= static_cast( - (state->control_word & - (WinExceptionFlags::HIGH_DENORMAL | WinExceptionFlags::HIGH_INVALID)) >> - 21); - mxcsr |= static_cast( - (state->control_word & WinExceptionFlags::HIGH_DIV_BY_ZERO) >> 18); - mxcsr |= static_cast( - (state->control_word & WinExceptionFlags::HIGH_OVERFLOW) >> 16); - mxcsr |= static_cast( - (state->control_word & WinExceptionFlags::HIGH_UNDERFLOW) >> 14); - mxcsr |= static_cast( - (state->control_word & WinExceptionFlags::HIGH_INEXACT) >> 12); - - // Set rounding from bits 30-31 - mxcsr |= static_cast((state->control_word & 0xc0000000) >> 17); - - // Set flush-to-zero from bit 10 - mxcsr |= static_cast((state->control_word & 0x400) << 5); - - internal::write_mxcsr(mxcsr); - return 0; -} -#else -LIBC_INLINE int get_env(fenv_t *envp) { - internal::FPState *state = reinterpret_cast(envp); -#ifdef __APPLE__ - internal::X87StateDescriptor x87_status; - internal::get_x87_state_descriptor(x87_status); - state->control_word = x87_status.control_word; - state->status_word = x87_status.status_word; -#else - internal::get_x87_state_descriptor(state->x87_status); -#endif // __APPLE__ - state->mxcsr = internal::get_mxcsr(); +LIBC_INLINE static int get_env(fenv_t *env) { +#ifndef LIBC_COMPILER_IS_MSVC + if constexpr (sizeof(fenv_t) >= sizeof(internal::X87StateDescriptor)) { + // The fenv_t is expected to have x87 floating point environment. + internal::X87StateDescriptor x87_state; + x87::get_x87_state_descriptor(x87_state); + uint32_t mxcsr = static_cast(sse::get_mxcsr()); + if constexpr (sizeof(fenv_t) == sizeof(internal::X87StateDescriptor)) { + // The fenv_t is expected to have only x87 floating point environment, so + // we merge sse data to x87 state. + internal::mxcsr_to_x87_state(static_cast(mxcsr), x87_state); + // Copy the state data; + cpp::bit_copy(x87_state, *env); + } else { + // We expect to have at least extra 32-bit for mxcsr register in the + // fenv_t. + static_assert( + sizeof(sizeof(fenv_t) >= + sizeof(internal::X87StateDescriptor) + sizeof(uint32_t))); + const char *x87_state_ptr = reinterpret_cast(&x87_state); + const char *mxcsr_ptr = reinterpret_cast(&mxcsr); + char *fenv_ptr = reinterpret_cast(env); + cpp::inline_copy(x87_state_ptr, fenv_ptr); + cpp::inline_copy(mxcsr_ptr, fenv_ptr + sizeof(x87_state)); + } + } else +#endif // LIBC_COMPILER_IS_MSVC + if constexpr (sizeof(fenv_t) == 2 * sizeof(uint32_t)) { + // fenv_t has 2 * uint32_t to store mxcsr with control + status + // separately. We will just duplicate mxcsr on those two fields. + uint32_t mxcsr = static_cast(sse::get_mxcsr()); + const char *mxcsr_ptr = reinterpret_cast(&mxcsr); + char *fenv_ptr = reinterpret_cast(env); + cpp::inline_copy(mxcsr_ptr, fenv_ptr); + cpp::inline_copy(mxcsr_ptr, fenv_ptr + sizeof(mxcsr)); + } else { + // Just copy mxcsr over to fenv_t. + // Make sure fenv_t is big enough. + static_assert(sizeof(fenv_t) >= sizeof(uint32_t)); + uint32_t mxcsr = static_cast(sse::get_mxcsr()); + const char *mxcsr_ptr = reinterpret_cast(&mxcsr); + char *fenv_ptr = reinterpret_cast(env); + cpp::inline_copy(mxcsr_ptr, fenv_ptr); + } return 0; } -LIBC_INLINE int set_env(const fenv_t *envp) { - // envp contains everything including pieces like the current - // top of FPU stack. We cannot arbitrarily change them. So, we first - // read the current status and update only those pieces which are - // not disruptive. - internal::X87StateDescriptor x87_status; - internal::get_x87_state_descriptor(x87_status); - - if (envp == FE_DFL_ENV) { - // Reset the exception flags in the status word. - x87_status.status_word &= ~uint16_t(0x3F); - // Reset other non-sensitive parts of the status word. - for (int i = 0; i < 5; i++) - x87_status._[i] = 0; - // In the control word, we do the following: - // 1. Mask all exceptions - // 2. Set rounding mode to round-to-nearest - // 3. Set the internal precision to double extended precision. - x87_status.control_word |= uint16_t(0x3F); // Mask all exceptions. - x87_status.control_word &= ~(uint16_t(0x3) << 10); // Round to nearest. - x87_status.control_word |= (uint16_t(0x3) << 8); // Extended precision. - internal::write_x87_state_descriptor(x87_status); - - // We take the exact same approach MXCSR register as well. - // MXCSR has two additional fields, "flush-to-zero" and - // "denormals-are-zero". We reset those bits. Also, MXCSR does not - // have a field which controls the precision of internal operations. - uint32_t mxcsr = internal::get_mxcsr(); - mxcsr &= ~uint16_t(0x3F); // Clear exception flags. - mxcsr &= ~(uint16_t(0x1) << 6); // Reset denormals-are-zero - mxcsr |= (uint16_t(0x3F) << 7); // Mask exceptions - mxcsr &= ~(uint16_t(0x3) << 13); // Round to nearest. - mxcsr &= ~(uint16_t(0x1) << 15); // Reset flush-to-zero - internal::write_mxcsr(mxcsr); - +LIBC_INLINE int set_env(const fenv_t *env) { + if (env == FE_DFL_ENV) { +#ifndef LIBC_COMPILER_IS_MSVC + x87::initialize_x87_state(); +#endif // LIBC_COMPILER_IS_MSVC + // Initial state of mxcsr: + // Round-to-nearest, all exceptions are masked, all exception flags are + // cleared. + sse::write_mxcsr(0x1f80); return 0; } - const internal::FPState *fpstate = - reinterpret_cast(envp); - - // Copy the exception status flags from envp. - x87_status.status_word &= ~uint16_t(0x3F); -#ifdef __APPLE__ - x87_status.status_word |= (fpstate->status_word & 0x3F); - // We can set the x87 control word as is as there no sensitive bits. - x87_status.control_word = fpstate->control_word; -#else - x87_status.status_word |= (fpstate->x87_status.status_word & 0x3F); - // Copy other non-sensitive parts of the status word. - for (int i = 0; i < 5; i++) - x87_status._[i] = fpstate->x87_status._[i]; - // We can set the x87 control word as is as there no sensitive bits. - x87_status.control_word = fpstate->x87_status.control_word; -#endif // __APPLE__ - internal::write_x87_state_descriptor(x87_status); - - // We can write the MXCSR state as is as there are no sensitive bits. - internal::write_mxcsr(fpstate->mxcsr); +#ifndef LIBC_COMPILER_IS_MSVC + if constexpr (sizeof(fenv_t) > sizeof(internal::X87StateDescriptor)) { + // We expect to have at least extra 32-bit for mxcsr register in the fenv_t. + static_assert( + sizeof(sizeof(fenv_t) >= + sizeof(internal::X87StateDescriptor) + sizeof(uint32_t))); + internal::X87StateDescriptor x87_state; + uint32_t mxcsr = 0; + + char *x87_state_ptr = reinterpret_cast(&x87_state); + char *mxcsr_ptr = reinterpret_cast(&mxcsr); + const char *fenv_ptr = reinterpret_cast(env); + + cpp::inline_copy(fenv_ptr, x87_state_ptr); + cpp::inline_copy(fenv_ptr + sizeof(x87_state), mxcsr_ptr); + + x87::write_x87_state_descriptor(x87_state); + sse::write_mxcsr(mxcsr); + } else if constexpr (sizeof(fenv_t) == sizeof(internal::X87StateDescriptor)) { + const internal::X87StateDescriptor *x87_state_ptr = + reinterpret_cast(env); + uint32_t mxcsr = internal::x87_state_to_mxcsr(*x87_state_ptr); + + x87::write_x87_state_descriptor(*x87_state_ptr); + sse::write_mxcsr(mxcsr); + } else +#endif // LIBC_COMPILER_IS_MSVC + if constexpr (sizeof(fenv_t) == 2 * sizeof(uint32_t)) { + // fenv_t has 2 * uint32_t to store mxcsr with control + status + // separately. We will just merge mxcsr on those two fields. + uint32_t mxcsr = 0, mxcsr_hi = 0; + char *mxcsr_ptr = reinterpret_cast(&mxcsr); + char *mxcsr_hi_ptr = reinterpret_cast(&mxcsr_hi); + const char *fenv_ptr = reinterpret_cast(env); + cpp::inline_copy(fenv_ptr, mxcsr_ptr); + cpp::inline_copy(fenv_ptr + sizeof(mxcsr), + mxcsr_hi_ptr); + sse::write_mxcsr(mxcsr | mxcsr_hi); + } else { + // Just copy mxcsr over to fenv_t. + // Make sure fenv_t is big enough. + static_assert(sizeof(fenv_t) >= sizeof(uint32_t)); + uint32_t mxcsr = 0; + char *mxcsr_ptr = reinterpret_cast(&mxcsr); + const char *fenv_ptr = reinterpret_cast(env); + cpp::inline_copy(fenv_ptr, mxcsr_ptr); + sse::write_mxcsr(mxcsr); + } + return 0; } -#endif } // namespace fputil } // namespace LIBC_NAMESPACE_DECL +#endif // __SSE__ + #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H diff --git a/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h b/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h new file mode 100644 index 0000000000000..022a274798437 --- /dev/null +++ b/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h @@ -0,0 +1,129 @@ +//===-- sse2 floating point env manipulation utilities ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_MXCSR_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_MXCSR_UTILS_H + +#include "hdr/stdint_proxy.h" +#include "hdr/types/fenv_t.h" +#include "src/__support/FPUtil/x86_64/fenv_x86_common.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" +#include "src/__support/macros/sanitizer.h" + +#include + +namespace LIBC_NAMESPACE_DECL { +namespace fputil { + +namespace sse { + +using internal::ExceptionFlags; +using internal::RoundingControl; + +// SSE FPU environment from Intel 64 and IA-32 Architectures Software Developer +// Manuals - Chapter 10 +// https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html +// +// The SSE floating point environment will be save/load with LDMXCSR/STMXCSR +// instructions, which will return the following 4-byte structure in 32-bit +// mode (see section 10.2.3, figure 10-3 in the manual linked above). + +// SSE MXCSR register (32-bit) structure: (section 10.2.3 in the manual) +// - Bit 0: Invalid Exception +// - Bit 1: Denormal Exception +// - Bit 2: Division-by-zero Exception +// - Bit 3: Overflow Exception +// - Bit 4: Underflow Exception +// - Bit 5: Inexact Exception +// - Bit 6: Denormal Are Zeros (DAZ) +// - Bit 7: Invalid Exception Mask +// - Bit 8: Denormal Exception Mask +// - Bit 9: Division-by-zero Exception Mask +// - Bit 10: Overflow Exception Mask +// - Bit 11: Underflow Exception Mask +// - Bit 12: Inexact Exception Mask +// - Bit 13-14: Rounding Control +// - Bit 15: Flush Denormal To Zero (FTZ) +// - Bit 16-31: Reserved, will raise general-protection exception if set to +// non-zero. + +LIBC_INLINE static uint32_t get_mxcsr() { return _mm_getcsr(); } + +LIBC_INLINE static void write_mxcsr(uint32_t w) { _mm_setcsr(w); } + +LIBC_INLINE static void clear_except(uint16_t excepts) { + uint32_t mxcsr = _MM_GET_EXCEPTION_STATE(); + mxcsr &= ~static_cast(excepts); + _MM_SET_EXCEPTION_STATE(mxcsr); +} + +LIBC_INLINE static uint16_t test_except(uint16_t excepts) { + uint32_t mxcsr = get_mxcsr(); + return static_cast(excepts & mxcsr); +} + +LIBC_INLINE static uint16_t get_except() { + uint32_t mxcsr = get_mxcsr(); + return static_cast((mxcsr >> ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION) & ExceptionFlags::ALL_F); +} + +LIBC_INLINE static void set_except(uint16_t excepts) { + _MM_SET_EXCEPTION_STATE(excepts); +} + +LIBC_INLINE static void raise_except(uint16_t excepts) { + uint32_t mxcsr = _MM_GET_EXCEPTION_STATE(); + mxcsr |= excepts; + _MM_SET_EXCEPTION_STATE(mxcsr); +} + +LIBC_INLINE static uint16_t enable_except(uint16_t excepts) { + uint32_t mxcsr = get_mxcsr(); + uint16_t old_excepts = + (mxcsr >> ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION) & + ExceptionFlags::ALL_F; + mxcsr &= ~(static_cast(excepts) + << ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION); + write_mxcsr(mxcsr); + return old_excepts; +} + +LIBC_INLINE static uint16_t disable_except(uint16_t excepts) { + uint32_t mxcsr = get_mxcsr(); + uint16_t old_excepts = + (mxcsr >> ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION) & + ExceptionFlags::ALL_F; + mxcsr |= (static_cast(excepts) + << ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION); + write_mxcsr(mxcsr); + return old_excepts; +} + +LIBC_INLINE static uint16_t get_round() { + uint32_t mxcsr = get_mxcsr(); + return static_cast(mxcsr >> RoundingControl::MXCSR_BIT_POSITION) & + RoundingControl::ROUNDING_MASK; +} + +LIBC_INLINE static void set_round(uint16_t rounding_mode) { + uint32_t mxcsr = get_mxcsr(); + rounding_mode <<= RoundingControl::MXCSR_BIT_POSITION; + // Clear rounding bits. + mxcsr &= (~RoundingControl::MXCSR_ROUNDING_MASK); + write_mxcsr(mxcsr | rounding_mode); +} + +} // namespace sse + +} // namespace fputil +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_MXCSR_UTILS_H diff --git a/libc/src/__support/FPUtil/x86_64/fenv_x86_common.h b/libc/src/__support/FPUtil/x86_64/fenv_x86_common.h new file mode 100644 index 0000000000000..887f07a7cbe0e --- /dev/null +++ b/libc/src/__support/FPUtil/x86_64/fenv_x86_common.h @@ -0,0 +1,253 @@ +//===-- x87 floating point env manipulation functions -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X86_COMMON_H +#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X86_COMMON_H + +#include "hdr/stdint_proxy.h" +#include "hdr/types/fenv_t.h" +#include "src/__support/macros/attributes.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" +#include "src/__support/macros/properties/cpu_features.h" + +namespace LIBC_NAMESPACE_DECL { +namespace fputil { + +namespace internal { + +// Default order of floating point exception flags in x87 and mxcsr registers: +// - Bit 1: Invalid Operations +// - Bit 2: Denormal +// - Bit 3: Divide-by-zero +// - Bit 4: Overflow +// - Bit 5: Underflow +// - Bit 6: Inexact +struct ExceptionFlags { + static constexpr uint16_t INVALID_F = 0x1; + // Some libcs define __FE_DENORM corresponding to the denormal input + // exception and include it in FE_ALL_EXCEPTS. We define and use it to + // support compiling against headers provided by such libcs. + static constexpr uint16_t DENORMAL_F = 0x2; + static constexpr uint16_t DIV_BY_ZERO_F = 0x4; + static constexpr uint16_t OVERFLOW_F = 0x8; + static constexpr uint16_t UNDERFLOW_F = 0x10; + static constexpr uint16_t INEXACT_F = 0x20; + static constexpr uint16_t ALL_F = + static_cast(INVALID_F | DENORMAL_F | DIV_BY_ZERO_F | + OVERFLOW_F | UNDERFLOW_F | INEXACT_F); + static constexpr unsigned MXCSR_EXCEPTION_MASK_BIT_POSITION = 7; +}; + +LIBC_INLINE static constexpr bool fenv_exceptions_match_x86() { + return (FE_INVALID == ExceptionFlags::INVALID_F) && +#ifdef __FE_DENORM + (__FE_DENORM == ExceptionFlags::DENORMAL_F) && +#endif // __FE_DENORM + (FE_DIVBYZERO == ExceptionFlags::DIV_BY_ZERO_F) && + (FE_OVERFLOW == ExceptionFlags::OVERFLOW_F) && + (FE_UNDERFLOW == ExceptionFlags::UNDERFLOW_F) && + (FE_INEXACT == ExceptionFlags::INEXACT_F); +} + +// The rounding control values in the x87 control register and the MXCSR +// register have the same 2-bit enoding but have different bit positions. +// See below for the bit positions. +struct RoundingControl { + static constexpr uint16_t TO_NEAREST = 0x0; + static constexpr uint16_t DOWNWARD = 0x1; + static constexpr uint16_t UPWARD = 0x2; + static constexpr uint16_t TOWARD_ZERO = 0x3; + static constexpr uint16_t ROUNDING_MASK = 0x3; + static constexpr unsigned X87_BIT_POSITION = 10; + static constexpr unsigned MXCSR_BIT_POSITION = 13; + static constexpr uint16_t X87_ROUNDING_MASK = ROUNDING_MASK + << X87_BIT_POSITION; + static constexpr uint16_t MXCSR_ROUNDING_MASK = ROUNDING_MASK + << MXCSR_BIT_POSITION; + static constexpr uint16_t ERROR = 0xFFFF; +}; + +static constexpr uint16_t MXCSR_ROUNDING_CONTROL_BIT_POSITION = 13; + +// Exception flags are individual bits in the corresponding registers. +// So, we just OR the bit values to get the full set of exceptions. +LIBC_INLINE static uint16_t get_status_value_from_except(int excepts) { + if constexpr (fenv_exceptions_match_x86()) { + return static_cast(excepts & ExceptionFlags::ALL_F); + } else { + // We will make use of the fact that exception control bits are single + // bit flags in the control registers. + return ((excepts & FE_INVALID) ? ExceptionFlags::INVALID_F : 0) | +#ifdef __FE_DENORM + ((excepts & __FE_DENORM) ? ExceptionFlags::DENORMAL_F : 0) | +#endif // __FE_DENORM + ((excepts & FE_DIVBYZERO) ? ExceptionFlags::DIV_BY_ZERO_F : 0) | + ((excepts & FE_OVERFLOW) ? ExceptionFlags::OVERFLOW_F : 0) | + ((excepts & FE_UNDERFLOW) ? ExceptionFlags::UNDERFLOW_F : 0) | + ((excepts & FE_INEXACT) ? ExceptionFlags::INEXACT_F : 0); + } +} + +LIBC_INLINE static int get_macro_from_exception_status(uint16_t status) { + if constexpr (fenv_exceptions_match_x86()) { + return status & ExceptionFlags::ALL_F; + } else { + return ((status & ExceptionFlags::INVALID_F) ? FE_INVALID : 0) | +#ifdef __FE_DENORM + ((status & ExceptionFlags::DENORMAL_F) ? __FE_DENORM : 0) | +#endif // __FE_DENORM + ((status & ExceptionFlags::DIV_BY_ZERO_F) ? FE_DIVBYZERO : 0) | + ((status & ExceptionFlags::OVERFLOW_F) ? FE_OVERFLOW : 0) | + ((status & ExceptionFlags::UNDERFLOW_F) ? FE_UNDERFLOW : 0) | + ((status & ExceptionFlags::INEXACT_F) ? FE_INEXACT : 0); + } +} + +LIBC_INLINE static uint16_t get_rounding_control_from_macro(int rounding) { + switch (rounding) { + case FE_TONEAREST: + return RoundingControl::TO_NEAREST; + case FE_DOWNWARD: + return RoundingControl::DOWNWARD; + case FE_UPWARD: + return RoundingControl::UPWARD; + case FE_TOWARDZERO: + return RoundingControl::TOWARD_ZERO; + default: + return RoundingControl::ERROR; + } +} + +LIBC_INLINE static int get_macro_from_rounding_control(uint16_t rounding) { + switch (rounding) { + case RoundingControl::TO_NEAREST: + return FE_TONEAREST; + case RoundingControl::DOWNWARD: + return FE_DOWNWARD; + case RoundingControl::UPWARD: + return FE_UPWARD; + case RoundingControl::TOWARD_ZERO: + return FE_TOWARDZERO; + default: + return -1; + } +} + +// x87 FPU environment from Intel 64 and IA-32 Architectures Software Developer +// Manuals - Chapter 8 +// https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html +// +// The x87 floating point environment will be save/load with FNSTENV/FLDENV +// instructions, which will return the following 28-byte structure in 32-bit +// mode (see section 8.1.10, figures 8-9 and 8-10 in the manual linked above), +// in which we only use the control and status words. + +// x87 control word (16-bit) structure: (section 8.1.5 in the manual) +// - Bit 0: Invalid Exception Mask +// - Bit 1: Denormal Exception Mask +// - Bit 2: Division-by-zero Exception Mask +// - Bit 3: Overflow Exception Mask +// - Bit 4: Underflow Exception Mask +// - Bit 5: Inexact Exception Mask +// - Bit 6-7: Reserved +// - Bit 8-9: Precision Control +// 00 - Single Precision +// 01 - Reserved +// 10 - Double Precision +// 11 - Double Extended Precision (default) +// - Bit 10-11: Rounding Control +// 00 - Round to nearest, tie to even +// 01 - Round down (toward -inf) +// 10 - Round up (toward +inf) +// 11 - Round toward zero (truncate) +// - Bit 13-15: Reserved + +// x87 status word (16-bit) structure: (section 8.1.3 in the manual) +// - Bit 0: Invalid Exception +// - Bit 1: Denormal Exception +// - Bit 2: Division-by-zero Exception +// - Bit 3: Overflow Exception +// - Bit 4: Underflow Exception +// - Bit 5: Inexact Exception +// - Bit 6: Stack Fault +// - Bit 7 Exception Summary Status +// - Bit 8-10: Condition Code +// - Bit 11-13: Top-of-stack Pointer +// - Bit 14: Condition Code +// - Bit 15: FPU Busy Flag +struct X87StateDescriptor { + uint16_t control_word; + uint16_t unused1; + uint16_t status_word; + uint16_t unused2; + uint32_t _[5]; +}; + +// Putting x87 state descriptor to mxcsr. +// SSE MXCSR register (32-bit) structure: (section 10.2.3 in the manual) +// - Bit 0: Invalid Exception +// - Bit 1: Denormal Exception +// - Bit 2: Division-by-zero Exception +// - Bit 3: Overflow Exception +// - Bit 4: Underflow Exception +// - Bit 5: Inexact Exception +// - Bit 6: Denormal Are Zeros (DAZ) +// - Bit 7: Invalid Exception Mask +// - Bit 8: Denormal Exception Mask +// - Bit 9: Division-by-zero Exception Mask +// - Bit 10: Overflow Exception Mask +// - Bit 11: Underflow Exception Mask +// - Bit 12: Inexact Exception Mask +// - Bit 13-14: Rounding Control +// - Bit 15: Flush Denormal To Zero (FTZ) +// - Bit 16-31: Reserved, will raise general-protection exception if set to +// non-zero. +// For all of the following exception functions, we assume the excepts are +// normalized according to x86 and mxcsr exceptions defined in +// fenv_x86_common.h: ExceptionFlags. +LIBC_INLINE static uint16_t x87_state_to_mxcsr(const X87StateDescriptor &s) { + uint16_t mxcsr = 0; + // Copy 6 exception flags from status word. + mxcsr = s.status_word & ExceptionFlags::ALL_F; + // Copy 6 exception masks from control word. + mxcsr |= (s.control_word & ExceptionFlags::ALL_F) + << ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION; + // Copy 2-bit rounding control. + mxcsr |= (s.control_word & RoundingControl::X87_ROUNDING_MASK) + << (RoundingControl::MXCSR_BIT_POSITION - + RoundingControl::X87_BIT_POSITION); + return mxcsr; +} + +LIBC_INLINE static void mxcsr_to_x87_state(uint16_t mxcsr, + X87StateDescriptor &s) { + // Clear exception mask and rounding control. + s.control_word &= + ~(ExceptionFlags::ALL_F | RoundingControl::X87_ROUNDING_MASK); + // Copy 6 exception masks. + s.control_word |= + (mxcsr >> ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION) & + ExceptionFlags::ALL_F; + // Copy rounding control. + s.control_word |= + (mxcsr & RoundingControl::MXCSR_ROUNDING_MASK) >> + (RoundingControl::MXCSR_BIT_POSITION - RoundingControl::X87_BIT_POSITION); + // Clear exception flags + s.status_word &= ~ExceptionFlags::ALL_F; + // Copy 6 exception status flags. + s.status_word |= mxcsr & ExceptionFlags::ALL_F; +} + +} // namespace internal + +} // namespace fputil +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X86_COMMON_H diff --git a/libc/src/__support/FPUtil/x86_64/fenv_x87_only.h b/libc/src/__support/FPUtil/x86_64/fenv_x87_only.h new file mode 100644 index 0000000000000..f78e4ecfaf619 --- /dev/null +++ b/libc/src/__support/FPUtil/x86_64/fenv_x87_only.h @@ -0,0 +1,137 @@ +//===-- x87 floating point env manipulation functions -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X87_ONLY_H +#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X87_ONLY_H + +#include "hdr/stdint_proxy.h" +#include "hdr/types/fenv_t.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/FPUtil/x86_64/fenv_x87_utils.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" +#include "src/__support/macros/sanitizer.h" + +namespace LIBC_NAMESPACE_DECL { +namespace fputil { + +using internal::ExceptionFlags; +using internal::RoundingControl; + +// Implementing fenv.h functions when only x87 are available. + +LIBC_INLINE static int clear_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + x87::clear_except(x86_excepts); + return 0; +} + +LIBC_INLINE static int test_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + uint16_t tested_excepts = x87::test_except(x86_excepts); + return internal::get_macro_from_exception_status(tested_excepts); +} + +LIBC_INLINE static int get_except() { + uint16_t excepts = x87::get_except(); + return internal::get_macro_from_exception_status(excepts); +} + +LIBC_INLINE static int set_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + x87::set_except(x86_excepts); + return 0; +} + +LIBC_INLINE static int raise_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + x87::raise_except(x86_excepts); + return 0; +} + +LIBC_INLINE static int enable_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + uint16_t old_excepts = x87::enable_except(x86_excepts); + return internal::get_macro_from_exception_status(old_excepts); +} + +LIBC_INLINE static int disable_except(int excepts) { + uint16_t x86_excepts = internal::get_status_value_from_except(excepts); + uint16_t old_excepts = x87::disable_except(x86_excepts); + return internal::get_macro_from_exception_status(old_excepts); +} + +LIBC_INLINE static int get_round() { + uint16_t rounding_mode = x87::get_round(); + return internal::get_macro_from_rounding_control(rounding_mode); +} + +LIBC_INLINE static int set_round(int rounding_mode) { + uint16_t rounding = internal::get_rounding_control_from_macro(rounding_mode); + if (LIBC_UNLIKELY(rounding == internal::RoundingControl::ERROR)) + return -1; + x87::set_round(rounding); + return 0; +} + +LIBC_INLINE static void get_env(fenv_t *env) { + internal::X87StateDescriptor x87_state; + x87::get_x87_state_descriptor(x87_state); + if constexpr (sizeof(fenv_t) >= sizeof(internal::X87StateDescriptor)) { + // When fenv_t is 28 bytes or more, we assume that the structure is simply + // store the entire x87 fenv state descriptor (28 bytes) at the beginning of + // the struct. + cpp::bit_copy(x87_state, *env); + } else { + // When fenv_t is less than 28 bytes, we will assume that it is following + // mxcsr structure, so we simply put x87 state descriptor to the first + // 16-bit following mxcsr. + uint16_t mxcsr = internal::x87_state_to_mxcsr(x87_state); + const char *mxcsr_ptr = reinterpret_cast(&mxcsr); + char *env_ptr = reinterpret_cast(env); + cpp::inline_copy(mxcsr_ptr, env_ptr); + } +} + +LIBC_INLINE static int set_env(const fenv_t *env) { + if (env == FE_DFL_ENV) { + x87::initialize_x87_state(); + return 0; + } + + internal::X87StateDescriptor x87_state; + const char *fenv_ptr = reinterpret_cast(env); + if constexpr (sizeof(fenv_t) >= sizeof(internal::X87StateDescriptor)) { + // When fenv_t is 28 bytes or more, we assume that the structure is simply + // store the entire x87 fenv state descriptor (28 bytes) at the beginning of + // the struct. + char *x87_state_ptr = reinterpret_cast(&x87_state); + cpp::inline_copy(fenv_ptr, x87_state_ptr); + } else { + // When fenv_t is less than 28 bytes, we will assume that it is following + // mxcsr structure, so we simply put x87 state descriptor to the first + // 16-bit following mxcsr. + uint16_t mxcsr = 0; + static_assert(sizeof(fenv_t) >= sizeof(mxcsr)); + cpp::inline_copy(fenv_ptr, reinterpret_cast(&mxcsr)); + // We then load the current x87 state descriptor, then replace all + // relevant bits with mxcsr data before writing them back. + x87::get_x87_state_descriptor(x87_state); + internal::mxcsr_to_x87_state(mxcsr, x87_state); + } + x87::write_x87_state_descriptor(x87_state); + return 0; +} + +} // namespace fputil +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X87_ONLY_H diff --git a/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h b/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h new file mode 100644 index 0000000000000..f00aeebd084fd --- /dev/null +++ b/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h @@ -0,0 +1,194 @@ +//===-- x87 floating point env manipulation utilities -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X87_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X87_UTILS_H + +#include "hdr/stdint_proxy.h" +#include "hdr/types/fenv_t.h" +#include "src/__support/FPUtil/x86_64/fenv_x86_common.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/compiler.h" +#include "src/__support/macros/sanitizer.h" + +namespace LIBC_NAMESPACE_DECL { +namespace fputil { + +namespace x87 { + +using internal::ExceptionFlags; +using internal::RoundingControl; +using internal::X87StateDescriptor; + +LIBC_INLINE static uint16_t get_x87_control_word() { + uint16_t w; + +#ifdef LIBC_COMPILER_IS_MSVC + __asm fstcw w; +#else // !LIBC_COMPILER_IS_MSVC + asm volatile("fnstcw %0" : "=m"(w)::); + MSAN_UNPOISON(&w, sizeof(w)); +#endif // LIBC_COMPILER_IS_MSVC + + return w; +} + +LIBC_INLINE static void write_x87_control_word(uint16_t w) { +#ifdef LIBC_COMPILER_IS_MSVC + __asm fldcw w; +#else // !LIBC_COMPILER_IS_MSVC + asm volatile("fldcw %0" : : "m"(w) :); +#endif // LIBC_COMPILER_IS_MSVC +} + +LIBC_INLINE static uint16_t get_x87_status_word() { + uint16_t w; + +#ifdef LIBC_COMPILER_IS_MSVC + __asm fnstsw w; +#else // !LIBC_COMPILER_IS_MSVC + asm volatile("fnstsw %0" : "=m"(w)::); + MSAN_UNPOISON(&w, sizeof(w)); +#endif // LIBC_COMPILER_IS_MSVC + + return w; +} + +LIBC_INLINE static void clear_x87_exceptions() { +#ifdef LIBC_COMPILER_IS_MSVC + __asm fnclex; +#else // !LIBC_COMPILER_IS_MSVC + asm volatile("fnclex" : : :); +#endif // LIBC_COMPILER_IS_MSVC +} + +LIBC_INLINE static void get_x87_state_descriptor(X87StateDescriptor &s) { +#ifdef LIBC_COMPILER_IS_MSVC + __asm fnstenv s; +#else // !LIBC_COMPILER_IS_MSVC + asm volatile("fnstenv %0" : "=m"(s)); + MSAN_UNPOISON(&s, sizeof(s)); +#endif // LIBC_COMPILER_IS_MSVC +} + +LIBC_INLINE static void +write_x87_state_descriptor(const X87StateDescriptor &s) { +#ifdef LIBC_COMPILER_IS_MSVC + __asm fldenv s; +#else // !LIBC_COMPILER_IS_MSVC + asm volatile("fldenv %0" : : "m"(s) :); +#endif // LIBC_COMPILER_IS_MSVC +} + +LIBC_INLINE static void initialize_x87_state() { +#ifdef LIBC_COMPILER_IS_MSVC + __asm fninit; +#else // !LIBC_COMPILER_IS_MSVC + asm volatile("fninit" : : :); +#endif // LIBC_COMPILER_IS_MSVC +} + +LIBC_INLINE static void clear_except(uint16_t excepts) { + if (excepts == ExceptionFlags::ALL_F) { + clear_x87_exceptions(); + return; + } + + X87StateDescriptor x87_descriptor; + get_x87_state_descriptor(x87_descriptor); + x87_descriptor.status_word &= static_cast(~excepts); + write_x87_state_descriptor(x87_descriptor); +} + +LIBC_INLINE static uint16_t test_except(uint16_t excepts) { + uint16_t x87_status = get_x87_status_word(); + return static_cast(x87_status & excepts); +} + +LIBC_INLINE static uint16_t get_except() { + uint16_t x87_status = get_x87_control_word(); + return static_cast(x87_status & ExceptionFlags::ALL_F); +} + +LIBC_INLINE static void set_except(uint16_t excepts) { + X87StateDescriptor x87_descriptor; + get_x87_state_descriptor(x87_descriptor); + uint16_t current_excepts = + static_cast(x87_descriptor.status_word & ExceptionFlags::ALL_F); + // Do nothing if excepts are unchanged. + if (current_excepts == excepts) + return; + // Clear excepts. + x87_descriptor.status_word &= static_cast(~ExceptionFlags::ALL_F); + // Set excepts. + x87_descriptor.status_word |= excepts; + write_x87_state_descriptor(x87_descriptor); +} + +LIBC_INLINE static void raise_except(uint16_t excepts) { + X87StateDescriptor x87_descriptor; + get_x87_state_descriptor(x87_descriptor); + uint16_t current_excepts = + static_cast(x87_descriptor.status_word & ExceptionFlags::ALL_F); + // Do nothing if excepts are unchanged. + if ((current_excepts | excepts) == current_excepts) + return; + // Update excepts. + x87_descriptor.status_word |= excepts; + write_x87_state_descriptor(x87_descriptor); +} + +LIBC_INLINE static uint16_t enable_except(uint16_t excepts) { + uint16_t x87_control = get_x87_control_word(); + uint16_t old_excepts = + static_cast(~x87_control & ExceptionFlags::ALL_F); + // Only update if excepts are not enabled. + if ((excepts | old_excepts) != old_excepts) { + x87_control &= static_cast(~excepts); + write_x87_control_word(x87_control); + } + return old_excepts; +} + +LIBC_INLINE static uint16_t disable_except(uint16_t excepts) { + uint16_t x87_control = get_x87_control_word(); + uint16_t old_excepts = + static_cast(~x87_control & ExceptionFlags::ALL_F); + // Only update excepts if some of the excepts are enabled. + if ((x87_control | excepts) != x87_control) { + x87_control |= excepts; + write_x87_control_word(x87_control); + } + return old_excepts; +} + +LIBC_INLINE static uint16_t get_round() { + uint16_t x87_control = get_x87_control_word(); + return static_cast( + (x87_control >> RoundingControl::X87_BIT_POSITION) & + RoundingControl::ROUNDING_MASK); +} + +LIBC_INLINE static void set_round(uint16_t rounding_mode) { + uint16_t x87_control = get_x87_control_word(); + rounding_mode <<= RoundingControl::X87_BIT_POSITION; + uint16_t x87_control_new = + (x87_control & (~RoundingControl::X87_ROUNDING_MASK)) | rounding_mode; + // Only update if rounding mode changes. + if (x87_control_new != x87_control) + write_x87_control_word(x87_control_new); +} + +} // namespace x87 + +} // namespace fputil +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENV_X87_UTILS_H diff --git a/libc/src/__support/macros/properties/compiler.h b/libc/src/__support/macros/properties/compiler.h index 6947bc7aa9010..d51bdfcf5760c 100644 --- a/libc/src/__support/macros/properties/compiler.h +++ b/libc/src/__support/macros/properties/compiler.h @@ -38,6 +38,11 @@ #define LIBC_COMPILER_IS_MSVC // https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros #define LIBC_COMPILER_MSVC_VER (_MSC_VER) +#ifdef(_M_X64) +#define LIBC_COMPILER_IS_MSVC_X64 +#else +#define LIBC_COMPILER_IS_MSVC_X86 +#endif #endif #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_COMPILER_H diff --git a/libc/test/UnitTest/FEnvSafeTest.cpp b/libc/test/UnitTest/FEnvSafeTest.cpp index 4393f9d5e5c3b..7ef28efbf64a9 100644 --- a/libc/test/UnitTest/FEnvSafeTest.cpp +++ b/libc/test/UnitTest/FEnvSafeTest.cpp @@ -51,26 +51,50 @@ void FEnvSafeTest::expect_fenv_eq(const fenv_t &before_fenv, EXPECT_EQ(before_state.ControlWord, after_state.ControlWord); EXPECT_EQ(before_state.StatusWord, after_state.StatusWord); -#elif defined(LIBC_TARGET_ARCH_IS_X86) && !defined(__APPLE__) && \ - !defined(LIBC_COMPILER_IS_MSVC) - using LIBC_NAMESPACE::fputil::internal::FPState; - const FPState &before_state = reinterpret_cast(before_fenv); - const FPState &after_state = reinterpret_cast(after_fenv); - -#if defined(_WIN32) - EXPECT_EQ(before_state.control_word, after_state.control_word); - EXPECT_EQ(before_state.status_word, after_state.status_word); -#elif defined(__APPLE__) - EXPECT_EQ(before_state.control_word, after_state.control_word); - EXPECT_EQ(before_state.status_word, after_state.status_word); - EXPECT_EQ(before_state.mxcsr, after_state.mxcsr); -#else - EXPECT_EQ(before_state.x87_status.control_word, - after_state.x87_status.control_word); - EXPECT_EQ(before_state.x87_status.status_word, - after_state.x87_status.status_word); - EXPECT_EQ(before_state.mxcsr, after_state.mxcsr); -#endif +#elif defined(LIBC_TARGET_ARCH_IS_X86) + using LIBC_NAMESPACE::cpp::inline_copy; + using LIBC_NAMESPACE::fputil::internal::X87StateDescriptor; + if constexpr (sizeof(fenv_t) >= + sizeof(X87StateDescriptor) + sizeof(uint32_t)) { + const char *before_fenv_ptr = reinterpret_cast(&before_fenv); + const char *after_fenv_ptr = reinterpret_cast(&after_fenv); + X87StateDescriptor before_x87_state, after_x87_state; + uint32_t before_mxcsr, after_mxcsr; + inline_copy( + before_fenv_ptr, reinterpret_cast(&before_x87_state)); + inline_copy( + after_fenv_ptr, reinterpret_cast(&after_x87_state)); + inline_copy(before_fenv_ptr + sizeof(X87StateDescriptor), + reinterpret_cast(&before_mxcsr)); + inline_copy(after_fenv_ptr + sizeof(X87StateDescriptor), + reinterpret_cast(&after_mxcsr)); + + EXPECT_EQ(before_x87_state.control_word, after_x87_state.control_word); + EXPECT_EQ(before_x87_state.status_word, after_x87_state.status_word); + EXPECT_EQ(before_mxcsr, after_mxcsr); + + } else if constexpr (sizeof(fenv_t) == sizeof(X87StateDescriptor)) { + const X87StateDescriptor &before_state = + reinterpret_cast(before_fenv); + const X87StateDescriptor &after_state = + reinterpret_cast(after_fenv); + EXPECT_EQ(before_state.control_word, after_state.control_word); + EXPECT_EQ(before_state.status_word, after_state.status_word); + + } else if constexpr (sizeof(fenv_t) == sizeof(uint64_t)) { + const uint64_t &before_mxcsr = + reinterpret_cast(before_fenv); + const uint64_t &after_mxcsr = + reinterpret_cast(after_fenv); + EXPECT_EQ(before_mxcsr, after_mxcsr); + + } else if constexpr (sizeof(fenv_t) == sizeof(uint32_t)) { + const uint32_t &before_mxcsr = + reinterpret_cast(before_fenv); + const uint32_t &after_mxcsr = + reinterpret_cast(after_fenv); + EXPECT_EQ(before_mxcsr, after_mxcsr); + } #elif defined(LIBC_TARGET_ARCH_IS_ARM) && defined(__ARM_FP) using LIBC_NAMESPACE::fputil::FEnv; From 26f66c875f5a46c513a863badec476045f6eadc3 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Fri, 24 Oct 2025 18:05:34 +0000 Subject: [PATCH 2/4] Fix get_except and include FE_DENORM in FE_ALL_EXCEPT. --- libc/include/llvm-libc-macros/fenv-macros.h | 5 +++-- libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h | 2 +- libc/src/__support/FPUtil/x86_64/fenv_x86_common.h | 6 ++++++ libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h | 4 ++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/libc/include/llvm-libc-macros/fenv-macros.h b/libc/include/llvm-libc-macros/fenv-macros.h index 1826723f93490..bb5409f091088 100644 --- a/libc/include/llvm-libc-macros/fenv-macros.h +++ b/libc/include/llvm-libc-macros/fenv-macros.h @@ -14,9 +14,10 @@ #define FE_INVALID 0x4 #define FE_OVERFLOW 0x8 #define FE_UNDERFLOW 0x10 -#define __FE_DENORM 0x20 +#define FE_DENORM 0x20 #define FE_ALL_EXCEPT \ - (FE_DIVBYZERO | FE_INEXACT | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW) + (FE_DIVBYZERO | FE_INEXACT | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW | \ + FE_DENORM) #define FE_DOWNWARD 0x400 #define FE_TONEAREST 0 diff --git a/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h b/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h index 022a274798437..f7a132595bbbb 100644 --- a/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h +++ b/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h @@ -71,7 +71,7 @@ LIBC_INLINE static uint16_t test_except(uint16_t excepts) { } LIBC_INLINE static uint16_t get_except() { - uint32_t mxcsr = get_mxcsr(); + uint32_t mxcsr = ~get_mxcsr(); return static_cast((mxcsr >> ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION) & ExceptionFlags::ALL_F); } diff --git a/libc/src/__support/FPUtil/x86_64/fenv_x86_common.h b/libc/src/__support/FPUtil/x86_64/fenv_x86_common.h index 887f07a7cbe0e..4a8cc3087b7c2 100644 --- a/libc/src/__support/FPUtil/x86_64/fenv_x86_common.h +++ b/libc/src/__support/FPUtil/x86_64/fenv_x86_common.h @@ -49,6 +49,8 @@ LIBC_INLINE static constexpr bool fenv_exceptions_match_x86() { return (FE_INVALID == ExceptionFlags::INVALID_F) && #ifdef __FE_DENORM (__FE_DENORM == ExceptionFlags::DENORMAL_F) && +#elif defined(FE_DENORM) + (FE_DENORM == ExceptionFlags::DENORMAL_F) && #endif // __FE_DENORM (FE_DIVBYZERO == ExceptionFlags::DIV_BY_ZERO_F) && (FE_OVERFLOW == ExceptionFlags::OVERFLOW_F) && @@ -87,6 +89,8 @@ LIBC_INLINE static uint16_t get_status_value_from_except(int excepts) { return ((excepts & FE_INVALID) ? ExceptionFlags::INVALID_F : 0) | #ifdef __FE_DENORM ((excepts & __FE_DENORM) ? ExceptionFlags::DENORMAL_F : 0) | +#elif defined(FE_DENORM) + ((excepts & FE_DENORM) ? ExceptionFlags::DENORMAL_F : 0) | #endif // __FE_DENORM ((excepts & FE_DIVBYZERO) ? ExceptionFlags::DIV_BY_ZERO_F : 0) | ((excepts & FE_OVERFLOW) ? ExceptionFlags::OVERFLOW_F : 0) | @@ -102,6 +106,8 @@ LIBC_INLINE static int get_macro_from_exception_status(uint16_t status) { return ((status & ExceptionFlags::INVALID_F) ? FE_INVALID : 0) | #ifdef __FE_DENORM ((status & ExceptionFlags::DENORMAL_F) ? __FE_DENORM : 0) | +#elif defined(FE_DENORM) + ((status & ExceptionFlags::DENORMAL_F) ? FE_DENORM : 0) | #endif // __FE_DENORM ((status & ExceptionFlags::DIV_BY_ZERO_F) ? FE_DIVBYZERO : 0) | ((status & ExceptionFlags::OVERFLOW_F) ? FE_OVERFLOW : 0) | diff --git a/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h b/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h index f00aeebd084fd..a38e4c9988084 100644 --- a/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h +++ b/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h @@ -113,8 +113,8 @@ LIBC_INLINE static uint16_t test_except(uint16_t excepts) { } LIBC_INLINE static uint16_t get_except() { - uint16_t x87_status = get_x87_control_word(); - return static_cast(x87_status & ExceptionFlags::ALL_F); + uint16_t x87_control = get_x87_control_word(); + return static_cast((~x87_control) & ExceptionFlags::ALL_F); } LIBC_INLINE static void set_except(uint16_t excepts) { From 4419ff28de9305c6978f4b76531bd198f5d6e70a Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Fri, 24 Oct 2025 18:55:02 +0000 Subject: [PATCH 3/4] Fix formatting. --- libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h b/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h index f7a132595bbbb..3a718219661d6 100644 --- a/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h +++ b/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h @@ -72,7 +72,9 @@ LIBC_INLINE static uint16_t test_except(uint16_t excepts) { LIBC_INLINE static uint16_t get_except() { uint32_t mxcsr = ~get_mxcsr(); - return static_cast((mxcsr >> ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION) & ExceptionFlags::ALL_F); + return static_cast( + (mxcsr >> ExceptionFlags::MXCSR_EXCEPTION_MASK_BIT_POSITION) & + ExceptionFlags::ALL_F); } LIBC_INLINE static void set_except(uint16_t excepts) { From 909e064a672a171a87dcb73adfea05ce9e9da24c Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Fri, 24 Oct 2025 19:03:19 +0000 Subject: [PATCH 4/4] Fix for windows. --- libc/src/__support/FPUtil/x86_64/FEnvImpl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libc/src/__support/FPUtil/x86_64/FEnvImpl.h b/libc/src/__support/FPUtil/x86_64/FEnvImpl.h index 4dfd784fa9734..443c17a9769f7 100644 --- a/libc/src/__support/FPUtil/x86_64/FEnvImpl.h +++ b/libc/src/__support/FPUtil/x86_64/FEnvImpl.h @@ -142,7 +142,9 @@ LIBC_INLINE static int get_env(fenv_t *env) { // we merge sse data to x87 state. internal::mxcsr_to_x87_state(static_cast(mxcsr), x87_state); // Copy the state data; - cpp::bit_copy(x87_state, *env); + const char *x87_state_ptr = reinterpret_cast(&x87_state); + char *fenv_ptr = reinterpret_cast(env); + cpp::inline_copy(x87_state_ptr, fenv_ptr); } else { // We expect to have at least extra 32-bit for mxcsr register in the // fenv_t.