27 changes: 14 additions & 13 deletions libc/src/__support/FPUtil/aarch64/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_AARCH64_FENVIMPL_H

#include "src/__support/architectures.h"
#include "src/__support/common.h"

#if !defined(LLVM_LIBC_ARCH_AARCH64) || defined(__APPLE__)
#error "Invalid include"
Expand Down Expand Up @@ -50,15 +51,15 @@ struct FEnv {
static constexpr uint32_t ExceptionStatusFlagsBitPosition = 0;
static constexpr uint32_t ExceptionControlFlagsBitPosition = 8;

static inline uint32_t getStatusValueForExcept(int excepts) {
LIBC_INLINE uint32_t getStatusValueForExcept(int excepts) {
return (excepts & FE_INVALID ? INVALID : 0) |
(excepts & FE_DIVBYZERO ? DIVBYZERO : 0) |
(excepts & FE_OVERFLOW ? OVERFLOW : 0) |
(excepts & FE_UNDERFLOW ? UNDERFLOW : 0) |
(excepts & FE_INEXACT ? INEXACT : 0);
}

static inline int exceptionStatusToMacro(uint32_t status) {
LIBC_INLINE int exceptionStatusToMacro(uint32_t status) {
return (status & INVALID ? FE_INVALID : 0) |
(status & DIVBYZERO ? FE_DIVBYZERO : 0) |
(status & OVERFLOW ? FE_OVERFLOW : 0) |
Expand All @@ -75,7 +76,7 @@ struct FEnv {
static void writeStatusWord(uint32_t fpsr) { __arm_wsr("fpsr", fpsr); }
};

static inline int enable_except(int excepts) {
LIBC_INLINE int enable_except(int excepts) {
uint32_t newExcepts = FEnv::getStatusValueForExcept(excepts);
uint32_t controlWord = FEnv::getControlWord();
int oldExcepts =
Expand All @@ -85,7 +86,7 @@ static inline int enable_except(int excepts) {
return FEnv::exceptionStatusToMacro(oldExcepts);
}

static inline int disable_except(int excepts) {
LIBC_INLINE int disable_except(int excepts) {
uint32_t disabledExcepts = FEnv::getStatusValueForExcept(excepts);
uint32_t controlWord = FEnv::getControlWord();
int oldExcepts =
Expand All @@ -95,37 +96,37 @@ static inline int disable_except(int excepts) {
return FEnv::exceptionStatusToMacro(oldExcepts);
}

static inline int get_except() {
LIBC_INLINE int get_except() {
uint32_t controlWord = FEnv::getControlWord();
int enabledExcepts =
(controlWord >> FEnv::ExceptionControlFlagsBitPosition) & 0x1F;
return FEnv::exceptionStatusToMacro(enabledExcepts);
}

static inline int clear_except(int excepts) {
LIBC_INLINE int clear_except(int excepts) {
uint32_t statusWord = FEnv::getStatusWord();
uint32_t toClear = FEnv::getStatusValueForExcept(excepts);
statusWord &= ~(toClear << FEnv::ExceptionStatusFlagsBitPosition);
FEnv::writeStatusWord(statusWord);
return 0;
}

static inline int test_except(int excepts) {
LIBC_INLINE int test_except(int excepts) {
uint32_t toTest = FEnv::getStatusValueForExcept(excepts);
uint32_t statusWord = FEnv::getStatusWord();
return FEnv::exceptionStatusToMacro(
(statusWord >> FEnv::ExceptionStatusFlagsBitPosition) & toTest);
}

static inline int set_except(int excepts) {
LIBC_INLINE int set_except(int excepts) {
uint32_t statusWord = FEnv::getStatusWord();
uint32_t statusValue = FEnv::getStatusValueForExcept(excepts);
statusWord |= (statusValue << FEnv::ExceptionStatusFlagsBitPosition);
FEnv::writeStatusWord(statusWord);
return 0;
}

static inline int raise_except(int excepts) {
LIBC_INLINE int raise_except(int excepts) {
float zero = 0.0f;
float one = 1.0f;
float largeValue = float(FPBits<float>(FPBits<float>::MAX_NORMAL));
Expand Down Expand Up @@ -185,7 +186,7 @@ static inline int raise_except(int excepts) {
return result;
}

static inline int get_round() {
LIBC_INLINE int get_round() {
uint32_t roundingMode =
(FEnv::getControlWord() >> FEnv::RoundingControlBitPosition) & 0x3;
switch (roundingMode) {
Expand All @@ -202,7 +203,7 @@ static inline int get_round() {
}
}

static inline int set_round(int mode) {
LIBC_INLINE int set_round(int mode) {
uint16_t bitValue;
switch (mode) {
case FE_TONEAREST:
Expand All @@ -229,14 +230,14 @@ static inline int set_round(int mode) {
return 0;
}

static inline int get_env(fenv_t *envp) {
LIBC_INLINE int get_env(fenv_t *envp) {
FEnv::FPState *state = reinterpret_cast<FEnv::FPState *>(envp);
state->ControlWord = FEnv::getControlWord();
state->StatusWord = FEnv::getStatusWord();
return 0;
}

static inline int set_env(const fenv_t *envp) {
LIBC_INLINE int set_env(const fenv_t *envp) {
if (envp == FE_DFL_ENV) {
// Default status and control words bits are all zeros so we just
// write zeros.
Expand Down
31 changes: 16 additions & 15 deletions libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_AARCH64_FENV_DARWIN_IMPL_H

#include "src/__support/architectures.h"
#include "src/__support/common.h"

#if !defined(LLVM_LIBC_ARCH_AARCH64) || !defined(__APPLE__)
#error "Invalid include"
Expand Down Expand Up @@ -60,7 +61,7 @@ struct FEnv {
// __fpcr_flush_to_zero bit in the FPCR register. This control bit is
// located in a different place from FE_FLUSHTOZERO status bit relative to
// the other exceptions.
static inline uint32_t exception_value_from_status(int status) {
LIBC_INLINE uint32_t exception_value_from_status(int status) {
return (status & FE_INVALID ? EX_INVALID : 0) |
(status & FE_DIVBYZERO ? EX_DIVBYZERO : 0) |
(status & FE_OVERFLOW ? EX_OVERFLOW : 0) |
Expand All @@ -69,7 +70,7 @@ struct FEnv {
(status & FE_FLUSHTOZERO ? EX_FLUSHTOZERO : 0);
}

static inline uint32_t exception_value_from_control(int control) {
LIBC_INLINE uint32_t exception_value_from_control(int control) {
return (control & __fpcr_trap_invalid ? EX_INVALID : 0) |
(control & __fpcr_trap_divbyzero ? EX_DIVBYZERO : 0) |
(control & __fpcr_trap_overflow ? EX_OVERFLOW : 0) |
Expand All @@ -78,7 +79,7 @@ struct FEnv {
(control & __fpcr_flush_to_zero ? EX_FLUSHTOZERO : 0);
}

static inline int exception_value_to_status(uint32_t excepts) {
LIBC_INLINE int exception_value_to_status(uint32_t excepts) {
return (excepts & EX_INVALID ? FE_INVALID : 0) |
(excepts & EX_DIVBYZERO ? FE_DIVBYZERO : 0) |
(excepts & EX_OVERFLOW ? FE_OVERFLOW : 0) |
Expand All @@ -87,7 +88,7 @@ struct FEnv {
(excepts & EX_FLUSHTOZERO ? FE_FLUSHTOZERO : 0);
}

static inline int exception_value_to_control(uint32_t excepts) {
LIBC_INLINE int exception_value_to_control(uint32_t excepts) {
return (excepts & EX_INVALID ? __fpcr_trap_invalid : 0) |
(excepts & EX_DIVBYZERO ? __fpcr_trap_divbyzero : 0) |
(excepts & EX_OVERFLOW ? __fpcr_trap_overflow : 0) |
Expand All @@ -105,7 +106,7 @@ struct FEnv {
static void set_status_word(uint32_t fpsr) { __arm_wsr("fpsr", fpsr); }
};

static inline int enable_except(int excepts) {
LIBC_INLINE int enable_except(int excepts) {
uint32_t new_excepts = FEnv::exception_value_from_status(excepts);
uint32_t control_word = FEnv::get_control_word();
uint32_t old_excepts = FEnv::exception_value_from_control(control_word);
Expand All @@ -116,7 +117,7 @@ static inline int enable_except(int excepts) {
return FEnv::exception_value_to_status(old_excepts);
}

static inline int disable_except(int excepts) {
LIBC_INLINE int disable_except(int excepts) {
uint32_t disabled_excepts = FEnv::exception_value_from_status(excepts);
uint32_t control_word = FEnv::get_control_word();
uint32_t old_excepts = FEnv::exception_value_from_control(control_word);
Expand All @@ -125,35 +126,35 @@ static inline int disable_except(int excepts) {
return FEnv::exception_value_to_status(old_excepts);
}

static inline int get_except() {
LIBC_INLINE int get_except() {
uint32_t control_word = FEnv::get_control_word();
uint32_t enabled_excepts = FEnv::exception_value_from_control(control_word);
return FEnv::exception_value_to_status(enabled_excepts);
}

static inline int clear_except(int excepts) {
LIBC_INLINE int clear_except(int excepts) {
uint32_t status_word = FEnv::get_status_word();
uint32_t except_value = FEnv::exception_value_from_status(excepts);
status_word &= ~FEnv::exception_value_to_status(except_value);
FEnv::set_status_word(status_word);
return 0;
}

static inline int test_except(int excepts) {
LIBC_INLINE int test_except(int excepts) {
uint32_t statusWord = FEnv::get_status_word();
uint32_t ex_value = FEnv::exception_value_from_status(excepts);
return statusWord & FEnv::exception_value_to_status(ex_value);
}

static inline int set_except(int excepts) {
LIBC_INLINE int set_except(int excepts) {
uint32_t status_word = FEnv::get_status_word();
uint32_t new_exceptions = FEnv::exception_value_from_status(excepts);
status_word |= FEnv::exception_value_to_status(new_exceptions);
FEnv::set_status_word(status_word);
return 0;
}

static inline int raise_except(int excepts) {
LIBC_INLINE int raise_except(int excepts) {
float zero = 0.0f;
float one = 1.0f;
float large_value = float(FPBits<float>(FPBits<float>::MAX_NORMAL));
Expand Down Expand Up @@ -212,7 +213,7 @@ static inline int raise_except(int excepts) {
return result;
}

static inline int get_round() {
LIBC_INLINE int get_round() {
uint32_t rounding_mode =
(FEnv::get_control_word() >> FEnv::ROUNDING_CONTROL_BIT_POSITION) & 0x3;
switch (rounding_mode) {
Expand All @@ -229,7 +230,7 @@ static inline int get_round() {
}
}

static inline int set_round(int mode) {
LIBC_INLINE int set_round(int mode) {
uint16_t bit_value;
switch (mode) {
case FE_TONEAREST:
Expand All @@ -256,14 +257,14 @@ static inline int set_round(int mode) {
return 0;
}

static inline int get_env(fenv_t *envp) {
LIBC_INLINE int get_env(fenv_t *envp) {
FEnv::FPState *state = reinterpret_cast<FEnv::FPState *>(envp);
state->ControlWord = FEnv::get_control_word();
state->StatusWord = FEnv::get_status_word();
return 0;
}

static inline int set_env(const fenv_t *envp) {
LIBC_INLINE int set_env(const fenv_t *envp) {
if (envp == FE_DFL_ENV) {
// Default status and control words bits are all zeros so we just
// write zeros.
Expand Down
5 changes: 3 additions & 2 deletions libc/src/__support/FPUtil/aarch64/nearest_integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_AARCH64_NEAREST_INTEGER_H

#include "src/__support/architectures.h"
#include "src/__support/common.h"

#if !defined(LLVM_LIBC_ARCH_AARCH64)
#error "Invalid include"
Expand All @@ -18,13 +19,13 @@
namespace __llvm_libc {
namespace fputil {

static inline float nearest_integer(float x) {
LIBC_INLINE float nearest_integer(float x) {
float result;
__asm__ __volatile__("frintn %s0, %s1\n\t" : "=w"(result) : "w"(x));
return result;
}

static inline double nearest_integer(double x) {
LIBC_INLINE double nearest_integer(double x) {
double result;
__asm__ __volatile__("frintn %d0, %d1\n\t" : "=w"(result) : "w"(x));
return result;
Expand Down
4 changes: 2 additions & 2 deletions libc/src/__support/FPUtil/generic/FMA.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace __llvm_libc {
namespace fputil {
namespace generic {

template <typename T> static inline T fma(T x, T y, T z);
template <typename T> LIBC_INLINE T fma(T x, T y, T z);

// TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes.
// The implementation below only is only correct for the default rounding mode,
Expand Down Expand Up @@ -78,7 +78,7 @@ namespace internal {

// Extract the sticky bits and shift the `mantissa` to the right by
// `shift_length`.
static inline bool shift_mantissa(int shift_length, UInt128 &mant) {
LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
if (shift_length >= 128) {
mant = 0;
return true; // prod_mant is non-zero.
Expand Down
14 changes: 7 additions & 7 deletions libc/src/__support/FPUtil/generic/FMod.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ template <typename T> struct FModExceptionalInputHandler {
static_assert(cpp::is_floating_point_v<T>,
"FModCStandardWrapper instantiated with invalid type.");

static bool PreCheck(T x, T y, T &out) {
LIBC_INLINE static bool PreCheck(T x, T y, T &out) {
using FPB = fputil::FPBits<T>;
const T quiet_NaN = FPB::build_quiet_nan(0);
FPB sx(x), sy(y);
Expand Down Expand Up @@ -168,8 +168,8 @@ template <typename T> class FModDivisionSimpleHelper {
using intU_t = typename FPBits<T>::UIntType;

public:
inline constexpr static intU_t execute(int exp_diff, int sides_zeroes_count,
intU_t m_x, intU_t m_y) {
LIBC_INLINE constexpr static intU_t
execute(int exp_diff, int sides_zeroes_count, intU_t m_x, intU_t m_y) {
while (exp_diff > sides_zeroes_count) {
exp_diff -= sides_zeroes_count;
m_x <<= sides_zeroes_count;
Expand All @@ -187,8 +187,8 @@ template <typename T> class FModDivisionInvMultHelper {
using intU_t = typename FPB::UIntType;

public:
inline constexpr static intU_t execute(int exp_diff, int sides_zeroes_count,
intU_t m_x, intU_t m_y) {
LIBC_INLINE constexpr static intU_t
execute(int exp_diff, int sides_zeroes_count, intU_t m_x, intU_t m_y) {
if (exp_diff > sides_zeroes_count) {
intU_t inv_hy = (cpp::numeric_limits<intU_t>::max() / m_y);
while (exp_diff > sides_zeroes_count) {
Expand Down Expand Up @@ -223,7 +223,7 @@ class FMod {
using FPB = FPBits<T>;
using intU_t = typename FPB::UIntType;

inline static constexpr FPB eval_internal(FPB sx, FPB sy) {
LIBC_INLINE static constexpr FPB eval_internal(FPB sx, FPB sy) {

if (likely(sx.uintval() <= sy.uintval())) {
if (sx.uintval() < sy.uintval())
Expand Down Expand Up @@ -300,7 +300,7 @@ class FMod {
}

public:
static inline T eval(T x, T y) {
LIBC_INLINE static T eval(T x, T y) {
if (T out; Wrapper::PreCheck(x, y, out))
return out;
FPB sx(x), sy(y);
Expand Down
6 changes: 3 additions & 3 deletions libc/src/__support/FPUtil/generic/sqrt.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ template <> struct SpecialLongDouble<long double> {
#endif // SPECIAL_X86_LONG_DOUBLE

template <typename T>
static inline void normalize(int &exponent,
typename FPBits<T>::UIntType &mantissa) {
LIBC_INLINE void normalize(int &exponent,
typename FPBits<T>::UIntType &mantissa) {
const int shift = unsafe_clz(mantissa) -
(8 * sizeof(mantissa) - 1 - MantissaWidth<T>::VALUE);
exponent -= shift;
Expand Down Expand Up @@ -64,7 +64,7 @@ inline void normalize<long double>(int &exponent, UInt128 &mantissa) {
// Correctly rounded IEEE 754 SQRT for all rounding modes.
// Shift-and-add algorithm.
template <typename T>
static inline cpp::enable_if_t<cpp::is_floating_point_v<T>, T> sqrt(T x) {
LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<T>, T> sqrt(T x) {

if constexpr (internal::SpecialLongDouble<T>::VALUE) {
// Special 80-bit long double.
Expand Down
4 changes: 2 additions & 2 deletions libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ inline void normalize(int &exponent, UInt128 &mantissa) {

// if constexpr statement in sqrt.h still requires x86::sqrt to be declared
// even when it's not used.
static inline long double sqrt(long double x);
LIBC_INLINE long double sqrt(long double x);

// Correctly rounded SQRT for all rounding modes.
// Shift-and-add algorithm.
#if defined(SPECIAL_X86_LONG_DOUBLE)
static inline long double sqrt(long double x) {
LIBC_INLINE long double sqrt(long double x) {
using UIntType = typename FPBits<long double>::UIntType;
constexpr UIntType ONE = UIntType(1)
<< int(MantissaWidth<long double>::VALUE);
Expand Down
3 changes: 2 additions & 1 deletion libc/src/__support/FPUtil/multiply_add.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H

#include "src/__support/architectures.h"
#include "src/__support/common.h"

namespace __llvm_libc {
namespace fputil {
Expand All @@ -18,7 +19,7 @@ namespace fputil {
// multiply_add(x, y, z) = x*y + z
// which uses FMA instructions to speed up if available.

template <typename T> static inline T multiply_add(T x, T y, T z) {
template <typename T> LIBC_INLINE T multiply_add(T x, T y, T z) {
return x * y + z;
}

Expand Down
4 changes: 2 additions & 2 deletions libc/src/__support/FPUtil/nearest_integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace fputil {
// Notice that for AARCH64 and x86-64 with SSE4.2 support, we will use their
// corresponding rounding instruction instead. And in those cases, the results
// are rounded to the nearest integer, tie-to-even.
static inline float nearest_integer(float x) {
LIBC_INLINE float nearest_integer(float x) {
if (x < 0x1p24f && x > -0x1p24f) {
float r = x < 0 ? (x - 0x1.0p23f) + 0x1.0p23f : (x + 0x1.0p23f) - 0x1.0p23f;
float diff = x - r;
Expand All @@ -42,7 +42,7 @@ static inline float nearest_integer(float x) {
return x;
}

static inline double nearest_integer(double x) {
LIBC_INLINE double nearest_integer(double x) {
if (x < 0x1p53 && x > -0x1p53) {
double r = x < 0 ? (x - 0x1.0p52) + 0x1.0p52 : (x + 0x1.0p52) - 0x1.0p52;
double diff = x - r;
Expand Down
48 changes: 24 additions & 24 deletions libc/src/__support/FPUtil/x86_64/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ static constexpr uint16_t MXCSR_EXCEPTION_CONTOL_BIT_POISTION = 7;

// Exception flags are individual bits in the corresponding registers.
// So, we just OR the bit values to get the full set of exceptions.
static inline uint16_t get_status_value_for_except(int excepts) {
LIBC_INLINE uint16_t get_status_value_for_except(int excepts) {
// We will make use of the fact that exception control bits are single
// bit flags in the control registers.
return (excepts & FE_INVALID ? ExceptionFlags::INVALID_F : 0) |
Expand All @@ -81,7 +81,7 @@ static inline uint16_t get_status_value_for_except(int excepts) {
(excepts & FE_INEXACT ? ExceptionFlags::INEXACT_F : 0);
}

static inline int exception_status_to_macro(uint16_t status) {
LIBC_INLINE int exception_status_to_macro(uint16_t status) {
return (status & ExceptionFlags::INVALID_F ? FE_INVALID : 0) |
#ifdef __FE_DENORM
(status & ExceptionFlags::DENORMAL_F ? __FE_DENORM : 0) |
Expand All @@ -101,53 +101,53 @@ struct X87StateDescriptor {
uint32_t _[5];
};

static inline uint16_t get_x87_control_word() {
LIBC_INLINE uint16_t get_x87_control_word() {
uint16_t w;
__asm__ __volatile__("fnstcw %0" : "=m"(w)::);
SANITIZER_MEMORY_INITIALIZED(&w, sizeof(w));
return w;
}

static inline void write_x87_control_word(uint16_t w) {
LIBC_INLINE void write_x87_control_word(uint16_t w) {
__asm__ __volatile__("fldcw %0" : : "m"(w) :);
}

static inline uint16_t get_x87_status_word() {
LIBC_INLINE uint16_t get_x87_status_word() {
uint16_t w;
__asm__ __volatile__("fnstsw %0" : "=m"(w)::);
SANITIZER_MEMORY_INITIALIZED(&w, sizeof(w));
return w;
}

static inline void clear_x87_exceptions() {
LIBC_INLINE void clear_x87_exceptions() {
__asm__ __volatile__("fnclex" : : :);
}

static inline uint32_t get_mxcsr() {
LIBC_INLINE uint32_t get_mxcsr() {
uint32_t w;
__asm__ __volatile__("stmxcsr %0" : "=m"(w)::);
SANITIZER_MEMORY_INITIALIZED(&w, sizeof(w));
return w;
}

static inline void write_mxcsr(uint32_t w) {
LIBC_INLINE void write_mxcsr(uint32_t w) {
__asm__ __volatile__("ldmxcsr %0" : : "m"(w) :);
}

static inline void get_x87_state_descriptor(X87StateDescriptor &s) {
LIBC_INLINE void get_x87_state_descriptor(X87StateDescriptor &s) {
__asm__ __volatile__("fnstenv %0" : "=m"(s));
SANITIZER_MEMORY_INITIALIZED(&s, sizeof(s));
}

static inline void write_x87_state_descriptor(const X87StateDescriptor &s) {
LIBC_INLINE void write_x87_state_descriptor(const X87StateDescriptor &s) {
__asm__ __volatile__("fldenv %0" : : "m"(s) :);
}

static inline void fwait() { __asm__ __volatile__("fwait"); }
LIBC_INLINE void fwait() { __asm__ __volatile__("fwait"); }

} // namespace internal

static inline int enable_except(int excepts) {
LIBC_INLINE int enable_except(int excepts) {
// In the x87 control word and in MXCSR, an exception is blocked
// if the corresponding bit is set. That is the reason for all the
// bit-flip operations below as we need to turn the bits to zero
Expand All @@ -174,7 +174,7 @@ static inline int enable_except(int excepts) {
return internal::exception_status_to_macro(old_excepts);
}

static inline int disable_except(int excepts) {
LIBC_INLINE int disable_except(int excepts) {
// In the x87 control word and in MXCSR, an exception is blocked
// if the corresponding bit is set.

Expand All @@ -194,13 +194,13 @@ static inline int disable_except(int excepts) {
return internal::exception_status_to_macro(old_excepts);
}

static inline int get_except() {
LIBC_INLINE int get_except() {
uint16_t mxcsr = static_cast<uint16_t>(internal::get_mxcsr());
uint16_t enabled_excepts = ~(mxcsr >> 7) & 0x3F;
return internal::exception_status_to_macro(enabled_excepts);
}

static inline int clear_except(int excepts) {
LIBC_INLINE int clear_except(int excepts) {
internal::X87StateDescriptor state;
internal::get_x87_state_descriptor(state);
state.status_word &=
Expand All @@ -213,15 +213,15 @@ static inline int clear_except(int excepts) {
return 0;
}

static inline int test_except(int excepts) {
LIBC_INLINE int test_except(int excepts) {
uint16_t status_value = internal::get_status_value_for_except(excepts);
// Check both x87 status word and MXCSR.
return internal::exception_status_to_macro(
static_cast<uint16_t>(status_value & internal::get_mxcsr()));
}

// Sets the exception flags but does not trigger the exception handler.
static inline int set_except(int excepts) {
LIBC_INLINE int set_except(int excepts) {
uint16_t status_value = internal::get_status_value_for_except(excepts);
internal::X87StateDescriptor state;
internal::get_x87_state_descriptor(state);
Expand All @@ -235,7 +235,7 @@ static inline int set_except(int excepts) {
return 0;
}

static inline int raise_except(int excepts) {
LIBC_INLINE int raise_except(int excepts) {
uint16_t status_value = internal::get_status_value_for_except(excepts);

// We set the status flag for exception one at a time and call the
Expand Down Expand Up @@ -287,7 +287,7 @@ static inline int raise_except(int excepts) {
return 0;
}

static inline int get_round() {
LIBC_INLINE int get_round() {
uint16_t bit_value =
(internal::get_mxcsr() >> internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION) &
0x3;
Expand All @@ -305,7 +305,7 @@ static inline int get_round() {
}
}

static inline int set_round(int mode) {
LIBC_INLINE int set_round(int mode) {
uint16_t bit_value;
switch (mode) {
case FE_TONEAREST:
Expand Down Expand Up @@ -461,7 +461,7 @@ struct WinExceptionFlags {
same order in both.
*/

static inline int get_env(fenv_t *envp) {
LIBC_INLINE int get_env(fenv_t *envp) {
internal::FPState *state = reinterpret_cast<internal::FPState *>(envp);

uint32_t status_word = 0;
Expand Down Expand Up @@ -505,7 +505,7 @@ static inline int get_env(fenv_t *envp) {
return 0;
}

static inline int set_env(const fenv_t *envp) {
LIBC_INLINE int set_env(const fenv_t *envp) {
const internal::FPState *state =
reinterpret_cast<const internal::FPState *>(envp);

Expand Down Expand Up @@ -554,14 +554,14 @@ static inline int set_env(const fenv_t *envp) {
return 0;
}
#else
static inline int get_env(fenv_t *envp) {
LIBC_INLINE int get_env(fenv_t *envp) {
internal::FPState *state = reinterpret_cast<internal::FPState *>(envp);
internal::get_x87_state_descriptor(state->x87_status);
state->mxcsr = internal::get_mxcsr();
return 0;
}

static inline int set_env(const fenv_t *envp) {
LIBC_INLINE int set_env(const fenv_t *envp) {
// envp contains everything including pieces like the current
// top of FPU stack. We cannot arbitrarily change them. So, we first
// read the current status and update only those pieces which are
Expand Down
6 changes: 3 additions & 3 deletions libc/src/__support/FPUtil/x86_64/FMA.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_X86_64_FMA_H

#include "src/__support/architectures.h"
#include "src/__support/common.h"

#if !defined(LLVM_LIBC_ARCH_X86_64)
#error "Invalid include"
Expand All @@ -26,7 +27,7 @@ namespace __llvm_libc {
namespace fputil {

template <typename T>
static inline cpp::enable_if_t<cpp::is_same_v<T, float>, T> fma(T x, T y, T z) {
LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, float>, T> fma(T x, T y, T z) {
float result;
__m128 xmm = _mm_load_ss(&x); // NOLINT
__m128 ymm = _mm_load_ss(&y); // NOLINT
Expand All @@ -37,8 +38,7 @@ static inline cpp::enable_if_t<cpp::is_same_v<T, float>, T> fma(T x, T y, T z) {
}

template <typename T>
static inline cpp::enable_if_t<cpp::is_same_v<T, double>, T> fma(T x, T y,
T z) {
LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, double>, T> fma(T x, T y, T z) {
double result;
__m128d xmm = _mm_load_sd(&x); // NOLINT
__m128d ymm = _mm_load_sd(&y); // NOLINT
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/FPUtil/x86_64/NextAfterLongDouble.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
namespace __llvm_libc {
namespace fputil {

static inline long double nextafter(long double from, long double to) {
LIBC_INLINE long double nextafter(long double from, long double to) {
using FPBits = FPBits<long double>;
FPBits from_bits(from);
if (from_bits.is_nan())
Expand Down
5 changes: 3 additions & 2 deletions libc/src/__support/FPUtil/x86_64/nearest_integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_X86_64_NEAREST_INTEGER_H

#include "src/__support/architectures.h"
#include "src/__support/common.h"

#if !defined(LLVM_LIBC_ARCH_X86_64)
#error "Invalid include"
Expand All @@ -24,14 +25,14 @@
namespace __llvm_libc {
namespace fputil {

static inline float nearest_integer(float x) {
LIBC_INLINE float nearest_integer(float x) {
__m128 xmm = _mm_set_ss(x); // NOLINT
__m128 ymm =
_mm_round_ss(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT
return ymm[0];
}

static inline double nearest_integer(double x) {
LIBC_INLINE double nearest_integer(double x) {
__m128d xmm = _mm_set_sd(x); // NOLINT
__m128d ymm =
_mm_round_sd(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/OSUtil/linux/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

namespace __llvm_libc {

static inline void write_to_stderr(const char *msg) {
LIBC_INLINE void write_to_stderr(const char *msg) {
__llvm_libc::syscall_impl(SYS_write, 2 /* stderr */, msg,
internal::string_length(msg));
}
Expand Down
4 changes: 3 additions & 1 deletion libc/src/__support/OSUtil/linux/quick_exit.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@

#include "syscall.h" // For internal syscall function.

#include "src/__support/common.h"

#include <sys/syscall.h> // For syscall numbers.

namespace __llvm_libc {

static inline void quick_exit(int status) {
LIBC_INLINE void quick_exit(int status) {
for (;;) {
__llvm_libc::syscall_impl(SYS_exit_group, status);
__llvm_libc::syscall_impl(SYS_exit, status);
Expand Down
15 changes: 8 additions & 7 deletions libc/src/__support/builtin_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "named_pair.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/common.h"
#include "src/__support/compiler_features.h"

namespace __llvm_libc {
Expand All @@ -22,14 +23,14 @@ namespace __llvm_libc {
// compiler match for us.
namespace __internal {

template <typename T> static inline int correct_zero(T val, int bits) {
template <typename T> LIBC_INLINE int correct_zero(T val, int bits) {
if (val == T(0))
return sizeof(T(0)) * 8;
else
return bits;
}

template <typename T> static inline int clz(T val);
template <typename T> LIBC_INLINE int clz(T val);
template <> inline int clz<unsigned int>(unsigned int val) {
return __builtin_clz(val);
}
Expand All @@ -40,7 +41,7 @@ template <> inline int clz<unsigned long long int>(unsigned long long int val) {
return __builtin_clzll(val);
}

template <typename T> static inline int ctz(T val);
template <typename T> LIBC_INLINE int ctz(T val);
template <> inline int ctz<unsigned int>(unsigned int val) {
return __builtin_ctz(val);
}
Expand All @@ -52,19 +53,19 @@ template <> inline int ctz<unsigned long long int>(unsigned long long int val) {
}
} // namespace __internal

template <typename T> static inline int safe_ctz(T val) {
template <typename T> LIBC_INLINE int safe_ctz(T val) {
return __internal::correct_zero(val, __internal::ctz(val));
}

template <typename T> static inline int unsafe_ctz(T val) {
template <typename T> LIBC_INLINE int unsafe_ctz(T val) {
return __internal::ctz(val);
}

template <typename T> static inline int safe_clz(T val) {
template <typename T> LIBC_INLINE int safe_clz(T val) {
return __internal::correct_zero(val, __internal::clz(val));
}

template <typename T> static inline int unsafe_clz(T val) {
template <typename T> LIBC_INLINE int unsafe_clz(T val) {
return __internal::clz(val);
}

Expand Down
4 changes: 4 additions & 0 deletions libc/src/__support/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
#define LLVM_LIBC_FUNCTION_ATTR
#endif

#ifndef LIBC_INLINE
#define LIBC_INLINE inline
#endif

// We use OpenMP to declare these functions on the device.
#define STR(X) #X
#define LLVM_LIBC_DECLARE_DEVICE(name) \
Expand Down
4 changes: 3 additions & 1 deletion libc/src/__support/detailed_powers_of_ten.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#ifndef LIBC_SRC_SUPPORT_DETAILED_POWERS_OF_TEN_H
#define LIBC_SRC_SUPPORT_DETAILED_POWERS_OF_TEN_H

#include "src/__support/common.h"

#include <stdint.h>

namespace __llvm_libc {
Expand All @@ -27,7 +29,7 @@ constexpr int32_t DETAILED_POWERS_OF_TEN_MIN_EXP_10 = -348;
constexpr int32_t DETAILED_POWERS_OF_TEN_MAX_EXP_10 = 347;

// This rescales the base 10 exponent by a factor of log(10)/log(2).
static inline int64_t exp10_to_exp2(int64_t exp10) {
LIBC_INLINE int64_t exp10_to_exp2(int64_t exp10) {
return (217706 * exp10) >> 16;
}

Expand Down
9 changes: 5 additions & 4 deletions libc/src/__support/float_to_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/UInt.h"
#include "src/__support/common.h"
#include "src/__support/ryu_constants.h"

// This implementation is based on the Ryu Printf algorithm by Ulf Adams:
Expand Down Expand Up @@ -177,7 +178,7 @@ inline cpp::UInt<MID_INT_SIZE> get_table_negative(int exponent, size_t i,
return num;
}

static inline uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
// The formula for mult_const is:
// 1 + floor((2^(bits in target integer size + log_2(divider))) / divider)
// Where divider is 10^9 and target integer size is 128.
Expand All @@ -189,9 +190,9 @@ static inline uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
return static_cast<uint32_t>(val) - (1000000000 * shifted);
}

static inline uint32_t mul_shift_mod_1e9(const MantissaInt mantissa,
const cpp::UInt<MID_INT_SIZE> &large,
const int32_t shift_amount) {
LIBC_INLINE uint32_t mul_shift_mod_1e9(const MantissaInt mantissa,
const cpp::UInt<MID_INT_SIZE> &large,
const int32_t shift_amount) {
constexpr size_t MANT_INT_SIZE = sizeof(MantissaInt) * 8;
cpp::UInt<MID_INT_SIZE + MANT_INT_SIZE> val(large);
// TODO: Find a better way to force __uint128_t to be UInt<128>
Expand Down
39 changes: 22 additions & 17 deletions libc/src/__support/str_to_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/UInt128.h"
#include "src/__support/builtin_wrappers.h"
#include "src/__support/common.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/detailed_powers_of_ten.h"
#include "src/__support/high_precision_decimal.h"
Expand All @@ -22,7 +23,7 @@
namespace __llvm_libc {
namespace internal {

template <class T> uint32_t inline leading_zeroes(T inputNumber) {
template <class T> LIBC_INLINE uint32_t leading_zeroes(T inputNumber) {
constexpr uint32_t BITS_IN_T = sizeof(T) * 8;
if (inputNumber == 0) {
return BITS_IN_T;
Expand Down Expand Up @@ -51,23 +52,27 @@ template <class T> uint32_t inline leading_zeroes(T inputNumber) {
return BITS_IN_T - cur_guess;
}

template <> uint32_t inline leading_zeroes<uint32_t>(uint32_t inputNumber) {
template <>
LIBC_INLINE uint32_t leading_zeroes<uint32_t>(uint32_t inputNumber) {
return safe_clz(inputNumber);
}

template <> uint32_t inline leading_zeroes<uint64_t>(uint64_t inputNumber) {
template <>
LIBC_INLINE uint32_t leading_zeroes<uint64_t>(uint64_t inputNumber) {
return safe_clz(inputNumber);
}

static inline uint64_t low64(const UInt128 &num) {
LIBC_INLINE uint64_t low64(const UInt128 &num) {
return static_cast<uint64_t>(num & 0xffffffffffffffff);
}

static inline uint64_t high64(const UInt128 &num) {
LIBC_INLINE uint64_t high64(const UInt128 &num) {
return static_cast<uint64_t>(num >> 64);
}

template <class T> inline void set_implicit_bit(fputil::FPBits<T> &) { return; }
template <class T> LIBC_INLINE void set_implicit_bit(fputil::FPBits<T> &) {
return;
}

#if defined(SPECIAL_X86_LONG_DOUBLE)
template <>
Expand All @@ -85,7 +90,7 @@ inline void set_implicit_bit<long double>(fputil::FPBits<long double> &result) {
// (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25)
// for some optimizations as well as handling 32 bit floats.
template <class T>
static inline bool
LIBC_INLINE bool
eisel_lemire(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10,
typename fputil::FPBits<T>::UIntType *outputMantissa,
uint32_t *outputExp2) {
Expand Down Expand Up @@ -306,7 +311,7 @@ constexpr int32_t NUM_POWERS_OF_TWO =
// on the Simple Decimal Conversion algorithm by Nigel Tao, described at this
// link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
template <class T>
static inline void
LIBC_INLINE void
simple_decimal_conversion(const char *__restrict numStart,
typename fputil::FPBits<T>::UIntType *outputMantissa,
uint32_t *outputExp2) {
Expand Down Expand Up @@ -504,7 +509,7 @@ template <> class ClingerConsts<long double> {
// exponents, but handles them quickly. This is an implementation of Clinger's
// Fast Path, as described above.
template <class T>
static inline bool
LIBC_INLINE bool
clinger_fast_path(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10,
typename fputil::FPBits<T>::UIntType *outputMantissa,
uint32_t *outputExp2) {
Expand Down Expand Up @@ -587,7 +592,7 @@ template <> constexpr int32_t get_lower_bound<double>() {
// accuracy. The resulting mantissa and exponent are placed in outputMantissa
// and outputExp2.
template <class T>
static inline void
LIBC_INLINE void
decimal_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa,
int32_t exp10, const char *__restrict numStart,
bool truncated,
Expand Down Expand Up @@ -649,7 +654,7 @@ decimal_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa,
// form, this is mostly just shifting and rounding. This is used for hexadecimal
// numbers since a base 16 exponent multiplied by 4 is the base 2 exponent.
template <class T>
static inline void
LIBC_INLINE void
binary_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp2,
bool truncated,
typename fputil::FPBits<T>::UIntType *outputMantissa,
Expand Down Expand Up @@ -736,8 +741,8 @@ binary_exp_to_float(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp2,

// checks if the next 4 characters of the string pointer are the start of a
// hexadecimal floating point number. Does not advance the string pointer.
static inline bool is_float_hex_start(const char *__restrict src,
const char decimalPoint) {
LIBC_INLINE bool is_float_hex_start(const char *__restrict src,
const char decimalPoint) {
if (!(*src == '0' && (*(src + 1) | 32) == 'x')) {
return false;
}
Expand All @@ -755,7 +760,7 @@ static inline bool is_float_hex_start(const char *__restrict src,
// If the return value is false, then it is assumed that there is no number
// here.
template <class T>
static inline bool
LIBC_INLINE bool
decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
char **__restrict strEnd,
typename fputil::FPBits<T>::UIntType *outputMantissa,
Expand Down Expand Up @@ -849,7 +854,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
// If the return value is false, then it is assumed that there is no number
// here.
template <class T>
static inline bool hexadecimal_string_to_float(
LIBC_INLINE bool hexadecimal_string_to_float(
const char *__restrict src, const char DECIMAL_POINT,
char **__restrict strEnd,
typename fputil::FPBits<T>::UIntType *outputMantissa,
Expand Down Expand Up @@ -940,8 +945,8 @@ static inline bool hexadecimal_string_to_float(
// Takes a pointer to a string and a pointer to a string pointer. This function
// is used as the backend for all of the string to float functions.
template <class T>
static inline T strtofloatingpoint(const char *__restrict src,
char **__restrict strEnd) {
LIBC_INLINE T strtofloatingpoint(const char *__restrict src,
char **__restrict strEnd) {
using BitsType = typename fputil::FPBits<T>::UIntType;
fputil::FPBits<T> result = fputil::FPBits<T>();
const char *original_src = src;
Expand Down
13 changes: 7 additions & 6 deletions libc/src/__support/str_to_integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LIBC_SRC_SUPPORT_STR_TO_INTEGER_H

#include "src/__support/CPP/limits.h"
#include "src/__support/common.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/str_to_num_result.h"
#include <errno.h>
Expand All @@ -20,14 +21,14 @@ namespace internal {

// Returns a pointer to the first character in src that is not a whitespace
// character (as determined by isspace())
static inline const char *first_non_whitespace(const char *__restrict src) {
LIBC_INLINE const char *first_non_whitespace(const char *__restrict src) {
while (internal::isspace(*src)) {
++src;
}
return src;
}

static inline int b36_char_to_int(char input) {
LIBC_INLINE int b36_char_to_int(char input) {
if (isdigit(input))
return input - '0';
if (isalpha(input))
Expand All @@ -37,15 +38,15 @@ static inline int b36_char_to_int(char input) {

// checks if the next 3 characters of the string pointer are the start of a
// hexadecimal number. Does not advance the string pointer.
static inline bool is_hex_start(const char *__restrict src) {
LIBC_INLINE bool is_hex_start(const char *__restrict src) {
return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
b36_char_to_int(*(src + 2)) < 16;
}

// Takes the address of the string pointer and parses the base from the start of
// it. This function will advance |src| to the first valid digit in the inferred
// base.
static inline int infer_base(const char *__restrict *__restrict src) {
LIBC_INLINE int infer_base(const char *__restrict *__restrict src) {
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
// sequence of the deimal digits and the letters a (or A) through f (or F)
// with values 10 through 15 respectively." (C standard 6.4.4.1)
Expand All @@ -67,8 +68,8 @@ static inline int infer_base(const char *__restrict *__restrict src) {
// Takes a pointer to a string and the base to convert to. This function is used
// as the backend for all of the string to int functions.
template <class T>
static inline StrToNumResult<T> strtointeger(const char *__restrict src,
int base) {
LIBC_INLINE StrToNumResult<T> strtointeger(const char *__restrict src,
int base) {
unsigned long long result = 0;
bool is_number = false;
const char *original_src = src;
Expand Down
3 changes: 2 additions & 1 deletion libc/src/__support/threads/linux/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ add_object_library(
libc.config.linux.app_h
libc.include.sys_syscall
libc.src.__support.CPP.atomic
libc.src.__support.error_or
libc.src.__support.CPP.stringstream
libc.src.__support.CPP.string_view
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.threads.thread_common
COMPILE_OPTIONS
-O3
Expand Down
5 changes: 3 additions & 2 deletions libc/src/__support/threads/linux/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "src/__support/CPP/string_view.h"
#include "src/__support/CPP/stringstream.h"
#include "src/__support/OSUtil/syscall.h" // For syscall functions.
#include "src/__support/common.h"
#include "src/__support/error_or.h"
#include "src/__support/threads/linux/futex_word.h" // For FutexWordType

Expand Down Expand Up @@ -54,7 +55,7 @@ static constexpr unsigned CLONE_SYSCALL_FLAGS =
// wake the joining thread.
| CLONE_SETTLS; // Setup the thread pointer of the new thread.

static inline ErrorOr<void *> alloc_stack(size_t size) {
LIBC_INLINE ErrorOr<void *> alloc_stack(size_t size) {
long mmap_result =
__llvm_libc::syscall_impl(MMAP_SYSCALL_NUMBER,
0, // No special address
Expand All @@ -69,7 +70,7 @@ static inline ErrorOr<void *> alloc_stack(size_t size) {
return reinterpret_cast<void *>(mmap_result);
}

static inline void free_stack(void *stack, size_t size) {
LIBC_INLINE void free_stack(void *stack, size_t size) {
__llvm_libc::syscall_impl(SYS_munmap, stack, size);
}

Expand Down
23 changes: 12 additions & 11 deletions libc/src/string/memory_utils/bcmp_implementations.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

namespace __llvm_libc {

[[maybe_unused]] static inline BcmpReturnType
[[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
LLVM_LIBC_LOOP_NOUNROLL
for (size_t offset = 0; offset < count; ++offset)
Expand All @@ -30,7 +30,7 @@ inline_bcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
}

#if defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)
[[maybe_unused]] static inline BcmpReturnType
[[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
if (count < 256)
return generic::Bcmp<16>::loop_and_tail(p1, p2, count);
Expand All @@ -42,7 +42,7 @@ inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
#endif // defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)

#if defined(LLVM_LIBC_ARCH_X86)
[[maybe_unused]] static inline BcmpReturnType
[[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_x86_sse2_gt16(CPtr p1, CPtr p2, size_t count) {
if (count <= 32)
return x86::sse2::Bcmp<16>::head_tail(p1, p2, count);
Expand All @@ -54,7 +54,7 @@ inline_bcmp_x86_sse2_gt16(CPtr p1, CPtr p2, size_t count) {
return x86::sse2::Bcmp<64>::loop_and_tail(p1, p2, count);
}

[[maybe_unused]] static inline BcmpReturnType
[[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_x86_avx2_gt16(CPtr p1, CPtr p2, size_t count) {
if (count <= 32)
return x86::sse2::Bcmp<16>::head_tail(p1, p2, count);
Expand All @@ -70,7 +70,7 @@ inline_bcmp_x86_avx2_gt16(CPtr p1, CPtr p2, size_t count) {
return x86::avx2::Bcmp<64>::loop_and_tail(p1, p2, count);
}

[[maybe_unused]] static inline BcmpReturnType
[[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
if (count <= 32)
return x86::sse2::Bcmp<16>::head_tail(p1, p2, count);
Expand All @@ -86,8 +86,8 @@ inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
return x86::avx512bw::Bcmp<64>::loop_and_tail(p1, p2, count);
}

[[maybe_unused]] static inline BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
size_t count) {
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
size_t count) {
if (count == 0)
return BcmpReturnType::ZERO();
if (count == 1)
Expand All @@ -112,8 +112,9 @@ inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
#endif // defined(LLVM_LIBC_ARCH_X86)

#if defined(LLVM_LIBC_ARCH_AARCH64)
[[maybe_unused]] static inline BcmpReturnType
inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) {
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
CPtr p2,
size_t count) {
if (likely(count <= 32)) {
if (unlikely(count >= 16)) {
return aarch64::Bcmp<16>::head_tail(p1, p2, count);
Expand Down Expand Up @@ -159,7 +160,7 @@ inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) {
}
#endif // defined(LLVM_LIBC_ARCH_AARCH64)

static inline BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
LIBC_INLINE BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
#if defined(LLVM_LIBC_ARCH_X86)
return inline_bcmp_x86(p1, p2, count);
#elif defined(LLVM_LIBC_ARCH_AARCH64)
Expand All @@ -173,7 +174,7 @@ static inline BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
#endif
}

static inline int inline_bcmp(const void *p1, const void *p2, size_t count) {
LIBC_INLINE int inline_bcmp(const void *p1, const void *p2, size_t count) {
return static_cast<int>(inline_bcmp(reinterpret_cast<CPtr>(p1),
reinterpret_cast<CPtr>(p2), count));
}
Expand Down
16 changes: 8 additions & 8 deletions libc/src/string/memory_utils/memcmp_implementations.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include <stddef.h> // size_t

namespace __llvm_libc {
[[maybe_unused]] static inline MemcmpReturnType
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
inline_memcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
LLVM_LIBC_LOOP_NOUNROLL
for (size_t offset = 0; offset < count; ++offset)
Expand All @@ -30,7 +30,7 @@ inline_memcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
}

#if defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)
[[maybe_unused]] static inline MemcmpReturnType
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
if (unlikely(count >= 384)) {
if (auto value = generic::Memcmp<16>::block(p1, p2))
Expand All @@ -42,7 +42,7 @@ inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
#endif // defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)

#if defined(LLVM_LIBC_ARCH_X86)
[[maybe_unused]] static inline MemcmpReturnType
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
inline_memcmp_x86_sse2_gt16(CPtr p1, CPtr p2, size_t count) {
if (unlikely(count >= 384)) {
if (auto value = x86::sse2::Memcmp<16>::block(p1, p2))
Expand All @@ -52,7 +52,7 @@ inline_memcmp_x86_sse2_gt16(CPtr p1, CPtr p2, size_t count) {
return x86::sse2::Memcmp<16>::loop_and_tail(p1, p2, count);
}

[[maybe_unused]] static inline MemcmpReturnType
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
inline_memcmp_x86_avx2_gt16(CPtr p1, CPtr p2, size_t count) {
if (count <= 32)
return x86::sse2::Memcmp<16>::head_tail(p1, p2, count);
Expand All @@ -68,7 +68,7 @@ inline_memcmp_x86_avx2_gt16(CPtr p1, CPtr p2, size_t count) {
return x86::avx2::Memcmp<32>::loop_and_tail(p1, p2, count);
}

[[maybe_unused]] static inline MemcmpReturnType
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
inline_memcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
if (count <= 32)
return x86::sse2::Memcmp<16>::head_tail(p1, p2, count);
Expand All @@ -87,7 +87,7 @@ inline_memcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
#endif // defined(LLVM_LIBC_ARCH_X86)

#if defined(LLVM_LIBC_ARCH_AARCH64)
[[maybe_unused]] static inline MemcmpReturnType
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
if (unlikely(count >= 128)) { // [128, ∞]
if (auto value = generic::Memcmp<16>::block(p1, p2))
Expand All @@ -108,7 +108,7 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
}
#endif // defined(LLVM_LIBC_ARCH_AARCH64)

static inline MemcmpReturnType inline_memcmp(CPtr p1, CPtr p2, size_t count) {
LIBC_INLINE MemcmpReturnType inline_memcmp(CPtr p1, CPtr p2, size_t count) {
#if defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)
if (count == 0)
return MemcmpReturnType::ZERO();
Expand Down Expand Up @@ -146,7 +146,7 @@ static inline MemcmpReturnType inline_memcmp(CPtr p1, CPtr p2, size_t count) {
#endif
}

static inline int inline_memcmp(const void *p1, const void *p2, size_t count) {
LIBC_INLINE int inline_memcmp(const void *p1, const void *p2, size_t count) {
return static_cast<int>(inline_memcmp(reinterpret_cast<CPtr>(p1),
reinterpret_cast<CPtr>(p2), count));
}
Expand Down
16 changes: 8 additions & 8 deletions libc/src/string/memory_utils/memcpy_implementations.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

namespace __llvm_libc {

[[maybe_unused]] static inline void
[[maybe_unused]] LIBC_INLINE void
inline_memcpy_embedded_tiny(Ptr __restrict dst, CPtr __restrict src,
size_t count) {
LLVM_LIBC_LOOP_NOUNROLL
Expand All @@ -30,7 +30,7 @@ inline_memcpy_embedded_tiny(Ptr __restrict dst, CPtr __restrict src,
}

#if defined(LLVM_LIBC_ARCH_X86)
[[maybe_unused]] static inline void
[[maybe_unused]] LIBC_INLINE void
inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) {
if (count == 0)
return;
Expand Down Expand Up @@ -60,7 +60,7 @@ inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) {
return builtin::Memcpy<kBlockSize>::loop_and_tail(dst, src, count);
}

[[maybe_unused]] static inline void
[[maybe_unused]] LIBC_INLINE void
inline_memcpy_x86_maybe_interpose_repmovsb(Ptr __restrict dst,
CPtr __restrict src, size_t count) {
// Whether to use rep;movsb exclusively, not at all, or only above a certain
Expand Down Expand Up @@ -89,7 +89,7 @@ inline_memcpy_x86_maybe_interpose_repmovsb(Ptr __restrict dst,
#endif // defined(LLVM_LIBC_ARCH_X86)

#if defined(LLVM_LIBC_ARCH_AARCH64)
[[maybe_unused]] static inline void
[[maybe_unused]] LIBC_INLINE void
inline_memcpy_aarch64(Ptr __restrict dst, CPtr __restrict src, size_t count) {
if (count == 0)
return;
Expand Down Expand Up @@ -117,8 +117,8 @@ inline_memcpy_aarch64(Ptr __restrict dst, CPtr __restrict src, size_t count) {
}
#endif // defined(LLVM_LIBC_ARCH_AARCH64)

static inline void inline_memcpy(Ptr __restrict dst, CPtr __restrict src,
size_t count) {
LIBC_INLINE void inline_memcpy(Ptr __restrict dst, CPtr __restrict src,
size_t count) {
using namespace __llvm_libc::builtin;
#if defined(LLVM_LIBC_ARCH_X86)
return inline_memcpy_x86_maybe_interpose_repmovsb(dst, src, count);
Expand All @@ -133,8 +133,8 @@ static inline void inline_memcpy(Ptr __restrict dst, CPtr __restrict src,
#endif
}

static inline void inline_memcpy(void *__restrict dst,
const void *__restrict src, size_t count) {
LIBC_INLINE void inline_memcpy(void *__restrict dst, const void *__restrict src,
size_t count) {
inline_memcpy(reinterpret_cast<Ptr>(dst), reinterpret_cast<CPtr>(src), count);
}

Expand Down
10 changes: 5 additions & 5 deletions libc/src/string/memory_utils/memmove_implementations.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

namespace __llvm_libc {

[[maybe_unused]] static inline void
[[maybe_unused]] LIBC_INLINE void
inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) {
if ((count == 0) || (dst == src))
return;
Expand All @@ -34,8 +34,8 @@ inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) {
}

template <size_t MaxSize>
[[maybe_unused]] static inline void inline_memmove_generic(Ptr dst, CPtr src,
size_t count) {
[[maybe_unused]] LIBC_INLINE void inline_memmove_generic(Ptr dst, CPtr src,
size_t count) {
if (count == 0)
return;
if (count == 1)
Expand Down Expand Up @@ -65,7 +65,7 @@ template <size_t MaxSize>
}
}

static inline void inline_memmove(Ptr dst, CPtr src, size_t count) {
LIBC_INLINE void inline_memmove(Ptr dst, CPtr src, size_t count) {
#if defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)
#if defined(LLVM_LIBC_ARCH_X86)
static constexpr size_t kMaxSize = x86::kAvx512F ? 64
Expand Down Expand Up @@ -110,7 +110,7 @@ static inline void inline_memmove(Ptr dst, CPtr src, size_t count) {
#endif
}

static inline void inline_memmove(void *dst, const void *src, size_t count) {
LIBC_INLINE void inline_memmove(void *dst, const void *src, size_t count) {
inline_memmove(reinterpret_cast<Ptr>(dst), reinterpret_cast<CPtr>(src),
count);
}
Expand Down
15 changes: 8 additions & 7 deletions libc/src/string/memory_utils/op_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace neon {
template <size_t Size> struct BzeroCacheLine {
static constexpr size_t SIZE = Size;

static inline void block(Ptr dst, uint8_t) {
LIBC_INLINE void block(Ptr dst, uint8_t) {
static_assert(Size == 64);
#if __SIZEOF_POINTER__ == 4
asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory");
Expand All @@ -42,7 +42,7 @@ template <size_t Size> struct BzeroCacheLine {
#endif
}

static inline void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
LIBC_INLINE void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
static_assert(Size > 1, "a loop of size 1 does not need tail");
size_t offset = 0;
do {
Expand Down Expand Up @@ -73,11 +73,11 @@ template <size_t Size> struct Bcmp {
static constexpr size_t SIZE = Size;
static constexpr size_t BlockSize = 32;

static const unsigned char *as_u8(CPtr ptr) {
LIBC_INLINE static const unsigned char *as_u8(CPtr ptr) {
return reinterpret_cast<const unsigned char *>(ptr);
}

static inline BcmpReturnType block(CPtr p1, CPtr p2) {
LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
if constexpr (Size == 16) {
auto _p1 = as_u8(p1);
auto _p2 = as_u8(p2);
Expand Down Expand Up @@ -113,11 +113,11 @@ template <size_t Size> struct Bcmp {
return BcmpReturnType::ZERO();
}

static inline BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
return block(p1 + count - SIZE, p2 + count - SIZE);
}

static inline BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
if constexpr (Size == 16) {
auto _p1 = as_u8(p1);
auto _p2 = as_u8(p2);
Expand Down Expand Up @@ -159,7 +159,8 @@ template <size_t Size> struct Bcmp {
return BcmpReturnType::ZERO();
}

static inline BcmpReturnType loop_and_tail(CPtr p1, CPtr p2, size_t count) {
LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
size_t count) {
static_assert(Size > 1, "a loop of size 1 does not need tail");
size_t offset = 0;
do {
Expand Down