diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td index a132d8308122f..008eb45386f24 100644 --- a/libc/config/baremetal/api.td +++ b/libc/config/baremetal/api.td @@ -2,6 +2,7 @@ include "config/public_api.td" include "spec/stdc.td" include "spec/stdc_ext.td" +include "spec/llvm_libc_ext.td" def AssertMacro : MacroDef<"assert"> { let Defn = [{ diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index c9887b6e855a6..99796ad5edf5d 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -306,6 +306,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.sqrtur # libc.src.stdfix.sqrtulk libc.src.stdfix.sqrtulr + libc.src.stdfix.uhksqrtus + libc.src.stdfix.uksqrtui ) endif() diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index c9887b6e855a6..99796ad5edf5d 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -306,6 +306,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.sqrtur # libc.src.stdfix.sqrtulk libc.src.stdfix.sqrtulr + libc.src.stdfix.uhksqrtus + libc.src.stdfix.uksqrtui ) endif() diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 5a1d7642f1aeb..526fd03f94f6a 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -5,8 +5,8 @@ include "spec/posix.td" include "spec/linux.td" include "spec/gnu_ext.td" include "spec/bsd_ext.td" -include "spec/llvm_libc_ext.td" include "spec/stdc_ext.td" +include "spec/llvm_libc_ext.td" def AssertMacro : MacroDef<"assert"> { let Defn = [{ diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index a6c3041773dfc..705ec10960c4d 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -495,6 +495,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT) libc.src.stdfix.sqrtur # libc.src.stdfix.sqrtulk libc.src.stdfix.sqrtulr + libc.src.stdfix.uhksqrtus + libc.src.stdfix.uksqrtui ) endif() diff --git a/libc/docs/math/stdfix.rst b/libc/docs/math/stdfix.rst index 79f499e61f121..5e39d5c01d1e5 100644 --- a/libc/docs/math/stdfix.rst +++ b/libc/docs/math/stdfix.rst @@ -4,14 +4,19 @@ StdFix Functions .. include:: ../check.rst -Standards ---------- +Standards and Goals +------------------- - stdfix.h is specified in the `ISO/IEC TR 18037:2008 `_, C extensions to support embedded processors . - Its `specifications `_. +- Our goal is to implement a complete set of math functions for fixed point + types, most of them are currently not included in the ISO/IEC TR + 18037:2008 standard. Our math functions for fixed point types are modeled + after the C99/C23 math functions for floating point types. + --------------- Source location --------------- @@ -53,6 +58,8 @@ Predefined Macros Fixed-point Arithmetics ======================= +The following functions are included in the ISO/IEC TR 18037:2008 standard. + +---------------+------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------+ | Function Name | _Fract (r) | _Accum (k) | | +------------------------------+----------------------------+------------------------------+------------------------------+----------------------------+------------------------------+ @@ -78,8 +85,6 @@ Fixed-point Arithmetics +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ | round | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ -| sqrt | |check| | | |check| | | |check| | | |check| | | |check| | | | | -+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ ================== ========= Type Generic Macro Available @@ -93,6 +98,9 @@ roundfx Higher math functions ===================== +The following math functions are modeled after C99/C23 math functions for +floating point types, but are not part of the ISO/IEC TR 18037:2008 spec. + +---------------+------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------+ | Function Name | _Fract (r) | _Accum (k) | | +------------------------------+----------------------------+------------------------------+------------------------------+----------------------------+------------------------------+ @@ -108,6 +116,8 @@ Higher math functions +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ | sin | | | | | | | | | | | | | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| sqrt | |check| | | |check| | | |check| | | |check| | | |check| | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ | tan | | | | | | | | | | | | | +---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ @@ -115,6 +125,9 @@ Higher math functions Conversion Functions ==================== +The following conversion functions are included in the ISO/IEC TR 18037:2008 +standard. + +---------------+------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------+ | Function Name | _Fract (r) | _Accum (k) | | +------------------------------+----------------------------+------------------------------+------------------------------+----------------------------+------------------------------+ diff --git a/libc/spec/llvm_libc_ext.td b/libc/spec/llvm_libc_ext.td index 2fd0c5f78fb8a..274284ed5705e 100644 --- a/libc/spec/llvm_libc_ext.td +++ b/libc/spec/llvm_libc_ext.td @@ -51,9 +51,29 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> { ] >; + HeaderSpec StdFix = HeaderSpec< + "stdfix.h", + [], // macros + [], // types + [], // enums + [ // functions + GuardedFunctionSpec<"sqrtuhr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtur", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtulr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + + GuardedFunctionSpec<"sqrtuhk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtuk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"sqrtulk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + + GuardedFunctionSpec<"uhksqrtus", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + GuardedFunctionSpec<"uksqrtui", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, + ] + >; + let Headers = [ - Strings, - Sched, Assert, + Sched, + Stdfix, + Strings, ]; } diff --git a/libc/spec/stdc_ext.td b/libc/spec/stdc_ext.td index be1e6d4ba2fcd..6620142146c47 100644 --- a/libc/spec/stdc_ext.td +++ b/libc/spec/stdc_ext.td @@ -47,14 +47,6 @@ def StdcExt : StandardSpec<"stdc_ext"> { GuardedFunctionSpec<"rounduhk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, GuardedFunctionSpec<"rounduk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, GuardedFunctionSpec<"roundulk", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"sqrtuhr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtur", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtulr", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - - GuardedFunctionSpec<"sqrtuhk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtuk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, - GuardedFunctionSpec<"sqrtulk", RetValSpec, [ArgSpec], "LIBC_COMPILER_HAS_FIXED_POINT">, ] >; diff --git a/libc/src/__support/fixed_point/CMakeLists.txt b/libc/src/__support/fixed_point/CMakeLists.txt index 0ed118f240884..3b744081765e4 100644 --- a/libc/src/__support/fixed_point/CMakeLists.txt +++ b/libc/src/__support/fixed_point/CMakeLists.txt @@ -32,5 +32,6 @@ add_header_library( libc.src.__support.macros.attributes libc.src.__support.macros.optimization libc.src.__support.CPP.bit + libc.src.__support.CPP.limits libc.src.__support.CPP.type_traits ) diff --git a/libc/src/__support/fixed_point/fx_rep.h b/libc/src/__support/fixed_point/fx_rep.h index f8593a93684cb..042cd2b20714c 100644 --- a/libc/src/__support/fixed_point/fx_rep.h +++ b/libc/src/__support/fixed_point/fx_rep.h @@ -48,6 +48,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return SFRACT_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0HR; } LIBC_INLINE static constexpr Type EPS() { return SFRACT_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HR; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25HR; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_signed_t; @@ -66,6 +68,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return USFRACT_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UHR; } LIBC_INLINE static constexpr Type EPS() { return USFRACT_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHR; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25UHR; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_unsigned_t; @@ -84,6 +88,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return FRACT_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0R; } LIBC_INLINE static constexpr Type EPS() { return FRACT_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5R; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25R; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_signed_t; @@ -102,6 +108,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return UFRACT_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UR; } LIBC_INLINE static constexpr Type EPS() { return UFRACT_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UR; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25UR; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_unsigned_t; @@ -120,6 +128,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return LFRACT_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0LR; } LIBC_INLINE static constexpr Type EPS() { return LFRACT_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LR; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25LR; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_signed_t; @@ -138,6 +148,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0ULR; } LIBC_INLINE static constexpr Type EPS() { return ULFRACT_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULR; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25ULR; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_unsigned_t; @@ -156,6 +168,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return SACCUM_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0HK; } LIBC_INLINE static constexpr Type EPS() { return SACCUM_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HK; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25HK; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_signed_t; @@ -174,6 +188,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return USACCUM_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UHK; } LIBC_INLINE static constexpr Type EPS() { return USACCUM_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHK; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25UHK; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_unsigned_t; @@ -192,6 +208,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return ACCUM_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0K; } LIBC_INLINE static constexpr Type EPS() { return ACCUM_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5K; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25K; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_signed_t; @@ -210,6 +228,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return UACCUM_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0UK; } LIBC_INLINE static constexpr Type EPS() { return UACCUM_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UK; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25UK; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_unsigned_t; @@ -228,6 +248,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return LACCUM_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0LK; } LIBC_INLINE static constexpr Type EPS() { return LACCUM_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LK; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25LK; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_signed_t; @@ -246,6 +268,8 @@ template <> struct FXRep { LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MIN; } LIBC_INLINE static constexpr Type ZERO() { return 0.0ULK; } LIBC_INLINE static constexpr Type EPS() { return ULACCUM_EPSILON; } + LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULK; } + LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25ULK; } using StorageType = typename internal::Storage::Type; using CompType = cpp::make_unsigned_t; diff --git a/libc/src/__support/fixed_point/sqrt.h b/libc/src/__support/fixed_point/sqrt.h index d8df294b18a1a..4ec016ceab002 100644 --- a/libc/src/__support/fixed_point/sqrt.h +++ b/libc/src/__support/fixed_point/sqrt.h @@ -11,6 +11,7 @@ #include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/bit.h" +#include "src/__support/CPP/limits.h" // CHAR_BIT #include "src/__support/CPP/type_traits.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY @@ -28,16 +29,73 @@ template struct SqrtConfig; template <> struct SqrtConfig { using Type = unsigned short fract; static constexpr int EXTRA_STEPS = 0; + + // Linear approximation for the initial values, with errors bounded by: + // max(1.5 * 2^-11, eps) + // Generated with Sollya: + // > for i from 4 to 15 do { + // P = fpminimax(sqrt(x), 1, [|8, 8|], [i * 2^-4, (i + 1)*2^-4], + // fixed, absolute); + // print("{", coeff(P, 1), "uhr,", coeff(P, 0), "uhr},"); + // }; + static constexpr Type FIRST_APPROX[12][2] = { + {0x1.e8p-1uhr, 0x1.0cp-2uhr}, {0x1.bap-1uhr, 0x1.28p-2uhr}, + {0x1.94p-1uhr, 0x1.44p-2uhr}, {0x1.74p-1uhr, 0x1.6p-2uhr}, + {0x1.6p-1uhr, 0x1.74p-2uhr}, {0x1.4ep-1uhr, 0x1.88p-2uhr}, + {0x1.3ep-1uhr, 0x1.9cp-2uhr}, {0x1.32p-1uhr, 0x1.acp-2uhr}, + {0x1.22p-1uhr, 0x1.c4p-2uhr}, {0x1.18p-1uhr, 0x1.d4p-2uhr}, + {0x1.08p-1uhr, 0x1.fp-2uhr}, {0x1.04p-1uhr, 0x1.f8p-2uhr}, + }; }; template <> struct SqrtConfig { using Type = unsigned fract; static constexpr int EXTRA_STEPS = 1; + + // Linear approximation for the initial values, with errors bounded by: + // max(1.5 * 2^-11, eps) + // Generated with Sollya: + // > for i from 4 to 15 do { + // P = fpminimax(sqrt(x), 1, [|16, 16|], [i * 2^-4, (i + 1)*2^-4], + // fixed, absolute); + // print("{", coeff(P, 1), "ur,", coeff(P, 0), "ur},"); + // }; + static constexpr Type FIRST_APPROX[12][2] = { + {0x1.e378p-1ur, 0x1.0ebp-2ur}, {0x1.b512p-1ur, 0x1.2b94p-2ur}, + {0x1.91fp-1ur, 0x1.45dcp-2ur}, {0x1.7622p-1ur, 0x1.5e24p-2ur}, + {0x1.5f5ap-1ur, 0x1.74e4p-2ur}, {0x1.4c58p-1ur, 0x1.8a4p-2ur}, + {0x1.3c1ep-1ur, 0x1.9e84p-2ur}, {0x1.2e0cp-1ur, 0x1.b1d8p-2ur}, + {0x1.21aap-1ur, 0x1.c468p-2ur}, {0x1.16bap-1ur, 0x1.d62cp-2ur}, + {0x1.0cfp-1ur, 0x1.e74cp-2ur}, {0x1.0418p-1ur, 0x1.f7ep-2ur}, + }; }; template <> struct SqrtConfig { using Type = unsigned long fract; static constexpr int EXTRA_STEPS = 2; + + // Linear approximation for the initial values, with errors bounded by: + // max(1.5 * 2^-11, eps) + // Generated with Sollya: + // > for i from 4 to 15 do { + // P = fpminimax(sqrt(x), 1, [|32, 32|], [i * 2^-4, (i + 1)*2^-4], + // fixed, absolute); + // print("{", coeff(P, 1), "ulr,", coeff(P, 0), "ulr},"); + // }; + static constexpr Type FIRST_APPROX[12][2] = { + {0x1.e3779b98p-1ulr, 0x1.0eaff788p-2ulr}, + {0x1.b5167872p-1ulr, 0x1.2b908ad4p-2ulr}, + {0x1.91f195cap-1ulr, 0x1.45da800cp-2ulr}, + {0x1.761ebcb4p-1ulr, 0x1.5e27004cp-2ulr}, + {0x1.5f619986p-1ulr, 0x1.74db933cp-2ulr}, + {0x1.4c583adep-1ulr, 0x1.8a3fbfccp-2ulr}, + {0x1.3c1a591cp-1ulr, 0x1.9e88373cp-2ulr}, + {0x1.2e08545ap-1ulr, 0x1.b1dd2534p-2ulr}, + {0x1.21b05c0ap-1ulr, 0x1.c45e023p-2ulr}, + {0x1.16becd02p-1ulr, 0x1.d624031p-2ulr}, + {0x1.0cf49fep-1ulr, 0x1.e743b844p-2ulr}, + {0x1.04214e9cp-1ulr, 0x1.f7ce2c3cp-2ulr}, + }; }; template <> @@ -46,46 +104,38 @@ struct SqrtConfig : SqrtConfig {}; template <> struct SqrtConfig : SqrtConfig {}; -// TODO: unsigned long accum type is 64-bit, and will need 64-bit fract type. -// Probably we will use DyadicFloat<64> for intermediate computations instead. - -// Linear approximation for the initial values, with errors bounded by: -// max(1.5 * 2^-11, eps) -// Generated with Sollya: -// > for i from 4 to 15 do { -// P = fpminimax(sqrt(x), 1, [|8, 8|], [i * 2^-4, (i + 1)*2^-4], -// fixed, absolute); -// print("{", coeff(P, 1), "uhr,", coeff(P, 0), "uhr},"); -// }; -static constexpr unsigned short fract SQRT_FIRST_APPROX[12][2] = { - {0x1.e8p-1uhr, 0x1.0cp-2uhr}, {0x1.bap-1uhr, 0x1.28p-2uhr}, - {0x1.94p-1uhr, 0x1.44p-2uhr}, {0x1.74p-1uhr, 0x1.6p-2uhr}, - {0x1.6p-1uhr, 0x1.74p-2uhr}, {0x1.4ep-1uhr, 0x1.88p-2uhr}, - {0x1.3ep-1uhr, 0x1.9cp-2uhr}, {0x1.32p-1uhr, 0x1.acp-2uhr}, - {0x1.22p-1uhr, 0x1.c4p-2uhr}, {0x1.18p-1uhr, 0x1.d4p-2uhr}, - {0x1.08p-1uhr, 0x1.fp-2uhr}, {0x1.04p-1uhr, 0x1.f8p-2uhr}, +// Integer square root +template <> struct SqrtConfig { + using OutType = unsigned short accum; + using FracType = unsigned fract; + // For fast-but-less-accurate version + using FastFracType = unsigned short fract; + using HalfType = unsigned char; }; -} // namespace internal +template <> struct SqrtConfig { + using OutType = unsigned accum; + using FracType = unsigned long fract; + // For fast-but-less-accurate version + using FastFracType = unsigned fract; + using HalfType = unsigned short; +}; -template -LIBC_INLINE constexpr cpp::enable_if_t, T> sqrt(T x) { - using BitType = typename FXRep::StorageType; - BitType x_bit = cpp::bit_cast(x); +// TODO: unsigned long accum type is 64-bit, and will need 64-bit fract type. +// Probably we will use DyadicFloat<64> for intermediate computations instead. - if (LIBC_UNLIKELY(x_bit == 0)) - return FXRep::ZERO(); +} // namespace internal - int leading_zeros = cpp::countl_zero(x_bit); - constexpr int STORAGE_LENGTH = sizeof(BitType) * CHAR_BIT; - constexpr int EXP_ADJUSTMENT = STORAGE_LENGTH - FXRep::FRACTION_LEN - 1; - // x_exp is the real exponent of the leading bit of x. - int x_exp = EXP_ADJUSTMENT - leading_zeros; - int shift = EXP_ADJUSTMENT - 1 - (x_exp & (~1)); - // Normalize. - x_bit <<= shift; - using FracType = typename internal::SqrtConfig::Type; - FracType x_frac = cpp::bit_cast(x_bit); +// Core computation for sqrt with normalized inputs (0.25 <= x < 1). +template +LIBC_INLINE constexpr typename Config::Type +sqrt_core(typename Config::Type x_frac) { + using FracType = typename Config::Type; + using FXRep = FXRep; + using StorageType = typename FXRep::StorageType; + // Exact case: + if (x_frac == FXRep::ONE_FOURTH()) + return FXRep::ONE_HALF(); // Use use Newton method to approximate sqrt(a): // x_{n + 1} = 1/2 (x_n + a / x_n) @@ -96,9 +146,10 @@ LIBC_INLINE constexpr cpp::enable_if_t, T> sqrt(T x) { // are between 0b0100 and 0b1111. Hence the lookup table only needs 12 // entries, and we can get the index by subtracting the leading 4 bits of // x_frac by 4 = 0b0100. - int index = (x_bit >> (STORAGE_LENGTH - 4)) - 4; - FracType a = static_cast(internal::SQRT_FIRST_APPROX[index][0]); - FracType b = static_cast(internal::SQRT_FIRST_APPROX[index][1]); + StorageType x_bit = cpp::bit_cast(x_frac); + int index = (static_cast(x_bit >> (FXRep::TOTAL_LEN - 4))) - 4; + FracType a = Config::FIRST_APPROX[index][0]; + FracType b = Config::FIRST_APPROX[index][1]; // Initial approximation step. // Estimated error bounds: | r - sqrt(x_frac) | < max(1.5 * 2^-11, eps). @@ -112,9 +163,34 @@ LIBC_INLINE constexpr cpp::enable_if_t, T> sqrt(T x) { // Blanchard, J. D. and Chamberland, M., "Newton's Method Without Division", // The American Mathematical Monthly (2023). // https://chamberland.math.grinnell.edu/papers/newton.pdf - for (int i = 0; i < internal::SqrtConfig::EXTRA_STEPS; ++i) + for (int i = 0; i < Config::EXTRA_STEPS; ++i) r = (r >> 1) + (x_frac >> 1) / r; + return r; +} + +template +LIBC_INLINE constexpr cpp::enable_if_t, T> sqrt(T x) { + using BitType = typename FXRep::StorageType; + BitType x_bit = cpp::bit_cast(x); + + if (LIBC_UNLIKELY(x_bit == 0)) + return FXRep::ZERO(); + + int leading_zeros = cpp::countl_zero(x_bit); + constexpr int STORAGE_LENGTH = sizeof(BitType) * CHAR_BIT; + constexpr int EXP_ADJUSTMENT = STORAGE_LENGTH - FXRep::FRACTION_LEN - 1; + // x_exp is the real exponent of the leading bit of x. + int x_exp = EXP_ADJUSTMENT - leading_zeros; + int shift = EXP_ADJUSTMENT - 1 - (x_exp & (~1)); + // Normalize. + x_bit <<= shift; + using FracType = typename internal::SqrtConfig::Type; + FracType x_frac = cpp::bit_cast(x_bit); + + // Compute sqrt(x_frac) using Newton-method. + FracType r = sqrt_core>(x_frac); + // Re-scaling r >>= EXP_ADJUSTMENT - (x_exp >> 1); @@ -122,6 +198,59 @@ LIBC_INLINE constexpr cpp::enable_if_t, T> sqrt(T x) { return cpp::bit_cast(r); } +// Integer square root - Accurate version: +// Absolute errors < 2^(-fraction length). +template +LIBC_INLINE constexpr typename internal::SqrtConfig::OutType isqrt(T x) { + using OutType = typename internal::SqrtConfig::OutType; + using FracType = typename internal::SqrtConfig::FracType; + + if (x == 0) + return FXRep::ZERO(); + + // Normalize the leading bits to the first two bits. + // Shift and then Bit cast x to x_frac gives us: + // x = 2^(FRACTION_LEN + 1 - shift) * x_frac; + int leading_zeros = cpp::countl_zero(x); + int shift = ((leading_zeros >> 1) << 1); + x <<= shift; + // Convert to frac type and compute square root. + FracType x_frac = cpp::bit_cast(x); + FracType r = sqrt_core>(x_frac); + // To rescale back to the OutType (Accum) + r >>= (shift >> 1); + + return cpp::bit_cast(r); +} + +// Integer square root - Fast but less accurate version: +// Relative errors < 2^(-fraction length). +template +LIBC_INLINE constexpr typename internal::SqrtConfig::OutType +isqrt_fast(T x) { + using OutType = typename internal::SqrtConfig::OutType; + using FracType = typename internal::SqrtConfig::FastFracType; + using StorageType = typename FXRep::StorageType; + + if (x == 0) + return FXRep::ZERO(); + + // Normalize the leading bits to the first two bits. + // Shift and then Bit cast x to x_frac gives us: + // x = 2^(FRACTION_LEN + 1 - shift) * x_frac; + int leading_zeros = cpp::countl_zero(x); + int shift = (leading_zeros & (~1)); + x <<= shift; + // Convert to frac type and compute square root. + FracType x_frac = cpp::bit_cast( + static_cast(x >> FXRep::FRACTION_LEN)); + OutType r = + static_cast(sqrt_core>(x_frac)); + // To rescale back to the OutType (Accum) + r <<= (FXRep::INTEGRAL_LEN - (shift >> 1)); + return cpp::bit_cast(r); +} + } // namespace LIBC_NAMESPACE::fixed_point #endif // LIBC_COMPILER_HAS_FIXED_POINT diff --git a/libc/src/stdfix/CMakeLists.txt b/libc/src/stdfix/CMakeLists.txt index cb2134fe33cf9..3a1cb66b7abca 100644 --- a/libc/src/stdfix/CMakeLists.txt +++ b/libc/src/stdfix/CMakeLists.txt @@ -11,7 +11,6 @@ foreach(suffix IN ITEMS hr r lr hk k lk) abs${suffix}.cpp COMPILE_OPTIONS -O3 - -ffixed-point DEPENDS libc.src.__support.fixed_point.fx_bits ) @@ -26,7 +25,6 @@ foreach(suffix IN ITEMS uhr ur ulr uhk uk) sqrt${suffix}.cpp COMPILE_OPTIONS -O3 - -ffixed-point DEPENDS libc.src.__support.fixed_point.sqrt ) @@ -41,8 +39,31 @@ foreach(suffix IN ITEMS hr r lr hk k lk uhr ur ulr uhk uk ulk) round${suffix}.cpp COMPILE_OPTIONS -O3 - -ffixed-point DEPENDS libc.src.__support.fixed_point.fx_bits ) endforeach() + +add_entrypoint_object( + uhksqrtus + HDRS + uhksqrtus.h + SRCS + uhksqrtus.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.fixed_point.sqrt +) + +add_entrypoint_object( + uksqrtui + HDRS + uksqrtui.h + SRCS + uksqrtui.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.fixed_point.sqrt +) diff --git a/libc/src/stdfix/uhksqrtus.cpp b/libc/src/stdfix/uhksqrtus.cpp new file mode 100644 index 0000000000000..335750ae902b2 --- /dev/null +++ b/libc/src/stdfix/uhksqrtus.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of uhksqrtus function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "uhksqrtus.h" +#include "src/__support/common.h" +#include "src/__support/fixed_point/sqrt.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned short accum, uhksqrtus, (unsigned short x)) { +#ifdef LIBC_FAST_MATH + return fixed_point::isqrt_fast(x); +#else + return fixed_point::isqrt(x); +#endif +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdfix/uhksqrtus.h b/libc/src/stdfix/uhksqrtus.h new file mode 100644 index 0000000000000..c24846a800303 --- /dev/null +++ b/libc/src/stdfix/uhksqrtus.h @@ -0,0 +1,20 @@ +//===-- Implementation header for uhksqrtus ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDFIX_UHKSQRTUS_H +#define LLVM_LIBC_SRC_STDFIX_UHKSQRTUS_H + +#include "include/llvm-libc-macros/stdfix-macros.h" + +namespace LIBC_NAMESPACE { + +unsigned short accum uhksqrtus(unsigned short x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDFIX_UHKSQRTUS_H diff --git a/libc/src/stdfix/uksqrtui.cpp b/libc/src/stdfix/uksqrtui.cpp new file mode 100644 index 0000000000000..ee1ae1335027a --- /dev/null +++ b/libc/src/stdfix/uksqrtui.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of uksqrtui function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "uksqrtui.h" +#include "src/__support/common.h" +#include "src/__support/fixed_point/sqrt.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned accum, uksqrtui, (unsigned int x)) { +#ifdef LIBC_FAST_MATH + return fixed_point::isqrt_fast(x); +#else + return fixed_point::isqrt(x); +#endif +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdfix/uksqrtui.h b/libc/src/stdfix/uksqrtui.h new file mode 100644 index 0000000000000..cd4ff41ea100a --- /dev/null +++ b/libc/src/stdfix/uksqrtui.h @@ -0,0 +1,20 @@ +//===-- Implementation header for uksqrtui ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDFIX_UKSQRTUI_H +#define LLVM_LIBC_SRC_STDFIX_UKSQRTUI_H + +#include "include/llvm-libc-macros/stdfix-macros.h" + +namespace LIBC_NAMESPACE { + +unsigned accum uksqrtui(unsigned int x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDFIX_UKSQRTUI_H diff --git a/libc/test/src/stdfix/CMakeLists.txt b/libc/test/src/stdfix/CMakeLists.txt index 4140b5b29f3b3..d3e122884eb40 100644 --- a/libc/test/src/stdfix/CMakeLists.txt +++ b/libc/test/src/stdfix/CMakeLists.txt @@ -15,7 +15,6 @@ foreach(suffix IN ITEMS hr r lr hk k lk) abs${suffix}_test.cpp COMPILE_OPTIONS -O3 - -ffixed-point DEPENDS libc.src.stdfix.abs${suffix} libc.src.__support.fixed_point.fx_bits @@ -33,7 +32,6 @@ foreach(suffix IN ITEMS uhr ur ulr uhk uk) sqrt${suffix}_test.cpp COMPILE_OPTIONS -O3 - -ffixed-point DEPENDS libc.src.stdfix.sqrt${suffix} libc.src.__support.CPP.bit @@ -55,9 +53,46 @@ foreach(suffix IN ITEMS hr r lr hk k lk uhr ur ulr uhk uk ulk) round${suffix}_test.cpp COMPILE_OPTIONS -O3 - -ffixed-point DEPENDS libc.src.stdfix.round${suffix} libc.src.__support.fixed_point.fx_bits ) endforeach() + +add_libc_test( + uhksqrtus_test + SUITE + libc-stdfix-tests + HDRS + ISqrtTest.h + SRCS + uhksqrtus_test.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.stdfix.uhksqrtus + libc.src.__support.CPP.bit + libc.src.__support.fixed_point.fx_rep + libc.src.__support.fixed_point.sqrt + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.sqrt +) + +add_libc_test( + uksqrtui_test + SUITE + libc-stdfix-tests + HDRS + ISqrtTest.h + SRCS + uksqrtui_test.cpp + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.stdfix.uksqrtui + libc.src.__support.CPP.bit + libc.src.__support.fixed_point.fx_rep + libc.src.__support.fixed_point.sqrt + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.sqrt +) diff --git a/libc/test/src/stdfix/ISqrtTest.h b/libc/test/src/stdfix/ISqrtTest.h new file mode 100644 index 0000000000000..405162b706a9f --- /dev/null +++ b/libc/test/src/stdfix/ISqrtTest.h @@ -0,0 +1,63 @@ +//===-- Utility class to test integer sqrt ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/fixed_point/fx_rep.h" +#include "src/__support/fixed_point/sqrt.h" + +template class ISqrtTest : public LIBC_NAMESPACE::testing::Test { + + using OutType = + typename LIBC_NAMESPACE::fixed_point::internal::SqrtConfig::OutType; + using FXRep = LIBC_NAMESPACE::fixed_point::FXRep; + static constexpr OutType zero = FXRep::ZERO(); + static constexpr OutType one = static_cast(1); + static constexpr OutType eps = FXRep::EPS(); + +public: + typedef OutType (*SqrtFunc)(T); + + void testSpecialNumbers(SqrtFunc func) { + EXPECT_EQ(zero, func(T(0))); + + EXPECT_EQ(one, func(T(1))); + EXPECT_EQ(static_cast(2.0), func(T(4))); + EXPECT_EQ(static_cast(4.0), func(T(16))); + EXPECT_EQ(static_cast(16.0), func(T(256))); + + constexpr int COUNT = 255; + constexpr double ERR = 3.0 * static_cast(eps); + double x_d = 0.0; + T x = 0; + for (int i = 0; i < COUNT; ++i) { + x_d += 1.0; + ++x; + double y_d = static_cast(func(x)); + double result = LIBC_NAMESPACE::fputil::sqrt(x_d); + double errors = LIBC_NAMESPACE::fputil::abs((y_d / result) - 1.0); + if (errors > ERR) { + // Print out the failure input and output. + EXPECT_EQ(x, T(0)); + EXPECT_EQ(func(x), zero); + } + ASSERT_TRUE(errors <= ERR); + } + } +}; + +#define LIST_ISQRT_TESTS(Name, T, func) \ + using LlvmLibcISqrt##Name##Test = ISqrtTest; \ + TEST_F(LlvmLibcISqrt##Name##Test, SpecialNumbers) { \ + testSpecialNumbers(&func); \ + } \ + static_assert(true, "Require semicolon.") diff --git a/libc/test/src/stdfix/uhksqrtus_test.cpp b/libc/test/src/stdfix/uhksqrtus_test.cpp new file mode 100644 index 0000000000000..a33297413980d --- /dev/null +++ b/libc/test/src/stdfix/uhksqrtus_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for uhksqrtus -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ISqrtTest.h" + +#include "src/__support/fixed_point/sqrt.h" +#include "src/stdfix/uhksqrtus.h" + +unsigned short accum uhksqrtus_fast(unsigned short x) { + return LIBC_NAMESPACE::fixed_point::isqrt_fast(x); +} + +LIST_ISQRT_TESTS(US, unsigned short, LIBC_NAMESPACE::uhksqrtus); + +LIST_ISQRT_TESTS(USFast, unsigned short, uhksqrtus_fast); diff --git a/libc/test/src/stdfix/uksqrtui_test.cpp b/libc/test/src/stdfix/uksqrtui_test.cpp new file mode 100644 index 0000000000000..0f4c057099daa --- /dev/null +++ b/libc/test/src/stdfix/uksqrtui_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for uksqrtui --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ISqrtTest.h" + +#include "src/__support/fixed_point/sqrt.h" +#include "src/stdfix/uksqrtui.h" + +unsigned accum uksqrtui_fast(unsigned int x) { + return LIBC_NAMESPACE::fixed_point::isqrt_fast(x); +} + +LIST_ISQRT_TESTS(UI, unsigned int, LIBC_NAMESPACE::uksqrtui); + +LIST_ISQRT_TESTS(UIFast, unsigned int, uksqrtui_fast);