| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| //===-- Half-precision log10(x) function ----------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/math/log10f16.h" | ||
| #include "expxf16.h" | ||
| #include "hdr/errno_macros.h" | ||
| #include "hdr/fenv_macros.h" | ||
| #include "src/__support/FPUtil/FEnvImpl.h" | ||
| #include "src/__support/FPUtil/FPBits.h" | ||
| #include "src/__support/FPUtil/PolyEval.h" | ||
| #include "src/__support/FPUtil/cast.h" | ||
| #include "src/__support/FPUtil/except_value_utils.h" | ||
| #include "src/__support/FPUtil/multiply_add.h" | ||
| #include "src/__support/common.h" | ||
| #include "src/__support/macros/config.h" | ||
| #include "src/__support/macros/optimization.h" | ||
| #include "src/__support/macros/properties/cpu_features.h" | ||
|
|
||
| namespace LIBC_NAMESPACE_DECL { | ||
|
|
||
| #ifdef LIBC_TARGET_CPU_HAS_FMA | ||
| static constexpr size_t N_LOG10F16_EXCEPTS = 11; | ||
| #else | ||
| static constexpr size_t N_LOG10F16_EXCEPTS = 17; | ||
| #endif | ||
|
|
||
| static constexpr fputil::ExceptValues<float16, N_LOG10F16_EXCEPTS> | ||
| LOG10F16_EXCEPTS = {{ | ||
| // (input, RZ output, RU offset, RD offset, RN offset) | ||
| // x = 0x1.e3cp-3, log10f16(x) = -0x1.40cp-1 (RZ) | ||
| {0x338fU, 0xb903U, 0U, 1U, 0U}, | ||
| // x = 0x1.fep-3, log10f16(x) = -0x1.35p-1 (RZ) | ||
| {0x33f8U, 0xb8d4U, 0U, 1U, 1U}, | ||
| #ifndef LIBC_TARGET_CPU_HAS_FMA | ||
| // x = 0x1.394p-1, log10f16(x) = -0x1.b4cp-3 (RZ) | ||
| {0x38e5U, 0xb2d3U, 0U, 1U, 1U}, | ||
| #endif | ||
| // x = 0x1.ea8p-1, log10f16(x) = -0x1.31p-6 (RZ) | ||
| {0x3baaU, 0xa4c4U, 0U, 1U, 1U}, | ||
| // x = 0x1.ebp-1, log10f16(x) = -0x1.29cp-6 (RZ) | ||
| {0x3bacU, 0xa4a7U, 0U, 1U, 1U}, | ||
| // x = 0x1.f3p-1, log10f16(x) = -0x1.6dcp-7 (RZ) | ||
| {0x3bccU, 0xa1b7U, 0U, 1U, 1U}, | ||
| // x = 0x1.f38p-1, log10f16(x) = -0x1.5f8p-7 (RZ) | ||
| #ifndef LIBC_TARGET_CPU_HAS_FMA | ||
| {0x3bceU, 0xa17eU, 0U, 1U, 1U}, | ||
| // x = 0x1.fd8p-1, log10f16(x) = -0x1.168p-9 (RZ) | ||
| {0x3bf6U, 0x985aU, 0U, 1U, 1U}, | ||
| // x = 0x1.ff8p-1, log10f16(x) = -0x1.bccp-12 (RZ) | ||
| {0x3bfeU, 0x8ef3U, 0U, 1U, 1U}, | ||
| // x = 0x1.374p+0, log10f16(x) = 0x1.5b8p-4 (RZ) | ||
| {0x3cddU, 0x2d6eU, 1U, 0U, 1U}, | ||
| // x = 0x1.3ecp+1, log10f16(x) = 0x1.958p-2 (RZ) | ||
| {0x40fbU, 0x3656U, 1U, 0U, 1U}, | ||
| #endif | ||
| // x = 0x1.4p+3, log10f16(x) = 0x1p+0 (RZ) | ||
| {0x4900U, 0x3c00U, 0U, 0U, 0U}, | ||
| // x = 0x1.9p+6, log10f16(x) = 0x1p+1 (RZ) | ||
| {0x5640U, 0x4000U, 0U, 0U, 0U}, | ||
| // x = 0x1.f84p+6, log10f16(x) = 0x1.0ccp+1 (RZ) | ||
| {0x57e1U, 0x4033U, 1U, 0U, 0U}, | ||
| // x = 0x1.f4p+9, log10f16(x) = 0x1.8p+1 (RZ) | ||
| {0x63d0U, 0x4200U, 0U, 0U, 0U}, | ||
| // x = 0x1.388p+13, log10f16(x) = 0x1p+2 (RZ) | ||
| {0x70e2U, 0x4400U, 0U, 0U, 0U}, | ||
| // x = 0x1.674p+13, log10f16(x) = 0x1.03cp+2 (RZ) | ||
| {0x719dU, 0x440fU, 1U, 0U, 0U}, | ||
| }}; | ||
|
|
||
| LLVM_LIBC_FUNCTION(float16, log10f16, (float16 x)) { | ||
| using FPBits = fputil::FPBits<float16>; | ||
| FPBits x_bits(x); | ||
|
|
||
| uint16_t x_u = x_bits.uintval(); | ||
|
|
||
| // If x <= 0, or x is 1, or x is +inf, or x is NaN. | ||
| if (LIBC_UNLIKELY(x_u == 0U || x_u == 0x3c00U || x_u >= 0x7c00U)) { | ||
| // log10(NaN) = NaN | ||
| if (x_bits.is_nan()) { | ||
| if (x_bits.is_signaling_nan()) { | ||
| fputil::raise_except_if_required(FE_INVALID); | ||
| return FPBits::quiet_nan().get_val(); | ||
| } | ||
|
|
||
| return x; | ||
| } | ||
|
|
||
| // log10(+/-0) = −inf | ||
| if ((x_u & 0x7fffU) == 0U) { | ||
| fputil::raise_except_if_required(FE_DIVBYZERO); | ||
| return FPBits::inf(Sign::NEG).get_val(); | ||
| } | ||
|
|
||
| if (x_u == 0x3c00U) | ||
| return FPBits::zero().get_val(); | ||
|
|
||
| // When x < 0. | ||
| if (x_u > 0x8000U) { | ||
| fputil::set_errno_if_required(EDOM); | ||
| fputil::raise_except_if_required(FE_INVALID); | ||
| return FPBits::quiet_nan().get_val(); | ||
| } | ||
|
|
||
| // log10(+inf) = +inf | ||
| return FPBits::inf().get_val(); | ||
| } | ||
|
|
||
| if (auto r = LOG10F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) | ||
| return r.value(); | ||
|
|
||
| // To compute log10(x), we perform the following range reduction: | ||
| // x = 2^m * 1.mant, | ||
| // log10(x) = m * log10(2) + log10(1.mant). | ||
| // To compute log10(1.mant), let f be the highest 6 bits including the hidden | ||
| // bit, and d be the difference (1.mant - f), i.e., the remaining 5 bits of | ||
| // the mantissa, then: | ||
| // log10(1.mant) = log10(f) + log10(1.mant / f) | ||
| // = log10(f) + log10(1 + d/f) | ||
| // since d/f is sufficiently small. | ||
| // We store log10(f) and 1/f in the lookup tables LOG10F_F and ONE_OVER_F_F | ||
| // respectively. | ||
|
|
||
| int m = -FPBits::EXP_BIAS; | ||
|
|
||
| // When x is subnormal, normalize it. | ||
| if ((x_u & FPBits::EXP_MASK) == 0U) { | ||
| // Can't pass an integer to fputil::cast directly. | ||
| constexpr float NORMALIZE_EXP = 1U << FPBits::FRACTION_LEN; | ||
| x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(NORMALIZE_EXP)); | ||
| x_u = x_bits.uintval(); | ||
| m -= FPBits::FRACTION_LEN; | ||
| } | ||
|
|
||
| uint16_t mant = x_bits.get_mantissa(); | ||
| // Leading 10 - 5 = 5 bits of the mantissa. | ||
| int f = mant >> 5; | ||
| // Unbiased exponent. | ||
| m += x_u >> FPBits::FRACTION_LEN; | ||
|
|
||
| // Set bits to 1.mant instead of 2^m * 1.mant. | ||
| x_bits.set_biased_exponent(FPBits::EXP_BIAS); | ||
| float mant_f = x_bits.get_val(); | ||
| // v = 1.mant * 1/f - 1 = d/f | ||
| float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f); | ||
|
|
||
| // Degree-3 minimax polynomial generated by Sollya with the following | ||
| // commands: | ||
| // > display = hexadecimal; | ||
| // > P = fpminimax(log10(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]); | ||
| // > x * P; | ||
| float log10p1_d_over_f = | ||
| v * fputil::polyeval(v, 0x1.bcb7bp-2f, -0x1.bce168p-3f, 0x1.28acb8p-3f); | ||
| // log10(1.mant) = log10(f) + log10(1 + d/f) | ||
| float log10_1_mant = LOG10F_F[f] + log10p1_d_over_f; | ||
| return fputil::cast<float16>( | ||
| fputil::multiply_add(static_cast<float>(m), LOG10F_2, log10_1_mant)); | ||
| } | ||
|
|
||
| } // namespace LIBC_NAMESPACE_DECL |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| //===-- Half-precision log2(x) function -----------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/math/log2f16.h" | ||
| #include "expxf16.h" | ||
| #include "hdr/errno_macros.h" | ||
| #include "hdr/fenv_macros.h" | ||
| #include "src/__support/FPUtil/FEnvImpl.h" | ||
| #include "src/__support/FPUtil/FPBits.h" | ||
| #include "src/__support/FPUtil/PolyEval.h" | ||
| #include "src/__support/FPUtil/cast.h" | ||
| #include "src/__support/FPUtil/except_value_utils.h" | ||
| #include "src/__support/FPUtil/multiply_add.h" | ||
| #include "src/__support/common.h" | ||
| #include "src/__support/macros/config.h" | ||
| #include "src/__support/macros/optimization.h" | ||
| #include "src/__support/macros/properties/cpu_features.h" | ||
|
|
||
| namespace LIBC_NAMESPACE_DECL { | ||
|
|
||
| #ifdef LIBC_TARGET_CPU_HAS_FMA | ||
| static constexpr size_t N_LOG2F16_EXCEPTS = 2; | ||
| #else | ||
| static constexpr size_t N_LOG2F16_EXCEPTS = 9; | ||
| #endif | ||
|
|
||
| static constexpr fputil::ExceptValues<float16, N_LOG2F16_EXCEPTS> | ||
| LOG2F16_EXCEPTS = {{ | ||
| // (input, RZ output, RU offset, RD offset, RN offset) | ||
| #ifndef LIBC_TARGET_CPU_HAS_FMA | ||
| // x = 0x1.224p-1, log2f16(x) = -0x1.a34p-1 (RZ) | ||
| {0x3889U, 0xba8dU, 0U, 1U, 0U}, | ||
| // x = 0x1.e34p-1, log2f16(x) = -0x1.558p-4 (RZ) | ||
| {0x3b8dU, 0xad56U, 0U, 1U, 0U}, | ||
| #endif | ||
| // x = 0x1.e8cp-1, log2f16(x) = -0x1.128p-4 (RZ) | ||
| {0x3ba3U, 0xac4aU, 0U, 1U, 0U}, | ||
| #ifndef LIBC_TARGET_CPU_HAS_FMA | ||
| // x = 0x1.f98p-1, log2f16(x) = -0x1.2ep-6 (RZ) | ||
| {0x3be6U, 0xa4b8U, 0U, 1U, 0U}, | ||
| // x = 0x1.facp-1, log2f16(x) = -0x1.e7p-7 (RZ) | ||
| {0x3bebU, 0xa39cU, 0U, 1U, 1U}, | ||
| #endif | ||
| // x = 0x1.fb4p-1, log2f16(x) = -0x1.b88p-7 (RZ) | ||
| {0x3bedU, 0xa2e2U, 0U, 1U, 1U}, | ||
| #ifndef LIBC_TARGET_CPU_HAS_FMA | ||
| // x = 0x1.fecp-1, log2f16(x) = -0x1.cep-9 (RZ) | ||
| {0x3bfbU, 0x9b38U, 0U, 1U, 1U}, | ||
| // x = 0x1.ffcp-1, log2f16(x) = -0x1.714p-11 (RZ) | ||
| {0x3bffU, 0x91c5U, 0U, 1U, 1U}, | ||
| // x = 0x1.224p+0, log2f16(x) = 0x1.72cp-3 (RZ) | ||
| {0x3c89U, 0x31cbU, 1U, 0U, 1U}, | ||
| #endif | ||
| }}; | ||
|
|
||
| LLVM_LIBC_FUNCTION(float16, log2f16, (float16 x)) { | ||
| using FPBits = fputil::FPBits<float16>; | ||
| FPBits x_bits(x); | ||
|
|
||
| uint16_t x_u = x_bits.uintval(); | ||
|
|
||
| // If x <= 0, or x is 1, or x is +inf, or x is NaN. | ||
| if (LIBC_UNLIKELY(x_u == 0U || x_u == 0x3c00U || x_u >= 0x7c00U)) { | ||
| // log2(NaN) = NaN | ||
| if (x_bits.is_nan()) { | ||
| if (x_bits.is_signaling_nan()) { | ||
| fputil::raise_except_if_required(FE_INVALID); | ||
| return FPBits::quiet_nan().get_val(); | ||
| } | ||
|
|
||
| return x; | ||
| } | ||
|
|
||
| // log2(+/-0) = −inf | ||
| if ((x_u & 0x7fffU) == 0U) { | ||
| fputil::raise_except_if_required(FE_DIVBYZERO); | ||
| return FPBits::inf(Sign::NEG).get_val(); | ||
| } | ||
|
|
||
| if (x_u == 0x3c00U) | ||
| return FPBits::zero().get_val(); | ||
|
|
||
| // When x < 0. | ||
| if (x_u > 0x8000U) { | ||
| fputil::set_errno_if_required(EDOM); | ||
| fputil::raise_except_if_required(FE_INVALID); | ||
| return FPBits::quiet_nan().get_val(); | ||
| } | ||
|
|
||
| // log2(+inf) = +inf | ||
| return FPBits::inf().get_val(); | ||
| } | ||
|
|
||
| if (auto r = LOG2F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) | ||
| return r.value(); | ||
|
|
||
| // To compute log2(x), we perform the following range reduction: | ||
| // x = 2^m * 1.mant, | ||
| // log2(x) = m + log2(1.mant). | ||
| // To compute log2(1.mant), let f be the highest 6 bits including the hidden | ||
| // bit, and d be the difference (1.mant - f), i.e., the remaining 5 bits of | ||
| // the mantissa, then: | ||
| // log2(1.mant) = log2(f) + log2(1.mant / f) | ||
| // = log2(f) + log2(1 + d/f) | ||
| // since d/f is sufficiently small. | ||
| // We store log2(f) and 1/f in the lookup tables LOG2F_F and ONE_OVER_F_F | ||
| // respectively. | ||
|
|
||
| int m = -FPBits::EXP_BIAS; | ||
|
|
||
| // When x is subnormal, normalize it. | ||
| if ((x_u & FPBits::EXP_MASK) == 0U) { | ||
| // Can't pass an integer to fputil::cast directly. | ||
| constexpr float NORMALIZE_EXP = 1U << FPBits::FRACTION_LEN; | ||
| x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(NORMALIZE_EXP)); | ||
| x_u = x_bits.uintval(); | ||
| m -= FPBits::FRACTION_LEN; | ||
| } | ||
|
|
||
| uint16_t mant = x_bits.get_mantissa(); | ||
| // Leading 10 - 5 = 5 bits of the mantissa. | ||
| int f = mant >> 5; | ||
| // Unbiased exponent. | ||
| m += x_u >> FPBits::FRACTION_LEN; | ||
|
|
||
| // Set bits to 1.mant instead of 2^m * 1.mant. | ||
| x_bits.set_biased_exponent(FPBits::EXP_BIAS); | ||
| float mant_f = x_bits.get_val(); | ||
| // v = 1.mant * 1/f - 1 = d/f | ||
| float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f); | ||
|
|
||
| // Degree-3 minimax polynomial generated by Sollya with the following | ||
| // commands: | ||
| // > display = hexadecimal; | ||
| // > P = fpminimax(log2(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]); | ||
| // > x * P; | ||
| float log2p1_d_over_f = | ||
| v * fputil::polyeval(v, 0x1.715476p+0f, -0x1.71771ap-1f, 0x1.ecb38ep-2f); | ||
| // log2(1.mant) = log2(f) + log2(1 + d/f) | ||
| float log2_1_mant = LOG2F_F[f] + log2p1_d_over_f; | ||
| return fputil::cast<float16>(static_cast<float>(m) + log2_1_mant); | ||
| } | ||
|
|
||
| } // namespace LIBC_NAMESPACE_DECL |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| //===-- Implementation header for log10f16 ----------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_SRC_MATH_LOG10F16_H | ||
| #define LLVM_LIBC_SRC_MATH_LOG10F16_H | ||
|
|
||
| #include "src/__support/macros/config.h" | ||
| #include "src/__support/macros/properties/types.h" | ||
|
|
||
| namespace LIBC_NAMESPACE_DECL { | ||
|
|
||
| float16 log10f16(float16 x); | ||
|
|
||
| } // namespace LIBC_NAMESPACE_DECL | ||
|
|
||
| #endif // LLVM_LIBC_SRC_MATH_LOG10F16_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| //===-- Implementation header for log2f16 -----------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_SRC_MATH_LOG2F16_H | ||
| #define LLVM_LIBC_SRC_MATH_LOG2F16_H | ||
|
|
||
| #include "src/__support/macros/config.h" | ||
| #include "src/__support/macros/properties/types.h" | ||
|
|
||
| namespace LIBC_NAMESPACE_DECL { | ||
|
|
||
| float16 log2f16(float16 x); | ||
|
|
||
| } // namespace LIBC_NAMESPACE_DECL | ||
|
|
||
| #endif // LLVM_LIBC_SRC_MATH_LOG2F16_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| //===-- Exhaustive test for log10f16 --------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/math/log10f16.h" | ||
| #include "test/UnitTest/FPMatcher.h" | ||
| #include "test/UnitTest/Test.h" | ||
| #include "utils/MPFRWrapper/MPFRUtils.h" | ||
|
|
||
| using LlvmLibcLog10f16Test = LIBC_NAMESPACE::testing::FPTest<float16>; | ||
|
|
||
| namespace mpfr = LIBC_NAMESPACE::testing::mpfr; | ||
|
|
||
| // Range: [0, Inf]; | ||
| static constexpr uint16_t POS_START = 0x0000U; | ||
| static constexpr uint16_t POS_STOP = 0x7c00U; | ||
|
|
||
| // Range: [-Inf, 0]; | ||
| static constexpr uint16_t NEG_START = 0x8000U; | ||
| static constexpr uint16_t NEG_STOP = 0xfc00U; | ||
|
|
||
| TEST_F(LlvmLibcLog10f16Test, PositiveRange) { | ||
| for (uint16_t v = POS_START; v <= POS_STOP; ++v) { | ||
| float16 x = FPBits(v).get_val(); | ||
| EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log10, x, | ||
| LIBC_NAMESPACE::log10f16(x), 0.5); | ||
| } | ||
| } | ||
|
|
||
| TEST_F(LlvmLibcLog10f16Test, NegativeRange) { | ||
| for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { | ||
| float16 x = FPBits(v).get_val(); | ||
| EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log10, x, | ||
| LIBC_NAMESPACE::log10f16(x), 0.5); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| //===-- Exhaustive test for log2f16 ---------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/math/log2f16.h" | ||
| #include "test/UnitTest/FPMatcher.h" | ||
| #include "test/UnitTest/Test.h" | ||
| #include "utils/MPFRWrapper/MPFRUtils.h" | ||
|
|
||
| using LlvmLibcLog2f16Test = LIBC_NAMESPACE::testing::FPTest<float16>; | ||
|
|
||
| namespace mpfr = LIBC_NAMESPACE::testing::mpfr; | ||
|
|
||
| // Range: [0, Inf]; | ||
| static constexpr uint16_t POS_START = 0x0000U; | ||
| static constexpr uint16_t POS_STOP = 0x7c00U; | ||
|
|
||
| // Range: [-Inf, 0]; | ||
| static constexpr uint16_t NEG_START = 0x8000U; | ||
| static constexpr uint16_t NEG_STOP = 0xfc00U; | ||
|
|
||
| TEST_F(LlvmLibcLog2f16Test, PositiveRange) { | ||
| for (uint16_t v = POS_START; v <= POS_STOP; ++v) { | ||
| float16 x = FPBits(v).get_val(); | ||
| EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x, | ||
| LIBC_NAMESPACE::log2f16(x), 0.5); | ||
| } | ||
| } | ||
|
|
||
| TEST_F(LlvmLibcLog2f16Test, NegativeRange) { | ||
| for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { | ||
| float16 x = FPBits(v).get_val(); | ||
| EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x, | ||
| LIBC_NAMESPACE::log2f16(x), 0.5); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| //===-- Unittests for log10f16 --------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "hdr/fenv_macros.h" | ||
| #include "src/__support/FPUtil/cast.h" | ||
| #include "src/errno/libc_errno.h" | ||
| #include "src/math/log10f16.h" | ||
| #include "test/UnitTest/FPMatcher.h" | ||
| #include "test/UnitTest/Test.h" | ||
|
|
||
| using LlvmLibcLog10f16Test = LIBC_NAMESPACE::testing::FPTest<float16>; | ||
|
|
||
| TEST_F(LlvmLibcLog10f16Test, SpecialNumbers) { | ||
| LIBC_NAMESPACE::libc_errno = 0; | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::log10f16(aNaN)); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::log10f16(sNaN), FE_INVALID); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::log10f16(inf)); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::log10f16(neg_inf)); | ||
| EXPECT_MATH_ERRNO(EDOM); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING( | ||
| neg_inf, LIBC_NAMESPACE::log10f16(zero), FE_DIVBYZERO); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING( | ||
| neg_inf, LIBC_NAMESPACE::log10f16(neg_zero), FE_DIVBYZERO); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING( | ||
| zero, | ||
| LIBC_NAMESPACE::log10f16(LIBC_NAMESPACE::fputil::cast<float16>(1.0))); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING( | ||
| aNaN, | ||
| LIBC_NAMESPACE::log10f16(LIBC_NAMESPACE::fputil::cast<float16>(-1.0))); | ||
| EXPECT_MATH_ERRNO(EDOM); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| //===-- Unittests for log2f16 ---------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "hdr/fenv_macros.h" | ||
| #include "src/__support/FPUtil/cast.h" | ||
| #include "src/errno/libc_errno.h" | ||
| #include "src/math/log2f16.h" | ||
| #include "test/UnitTest/FPMatcher.h" | ||
| #include "test/UnitTest/Test.h" | ||
|
|
||
| using LlvmLibcLog2f16Test = LIBC_NAMESPACE::testing::FPTest<float16>; | ||
|
|
||
| TEST_F(LlvmLibcLog2f16Test, SpecialNumbers) { | ||
| LIBC_NAMESPACE::libc_errno = 0; | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::log2f16(aNaN)); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::log2f16(sNaN), FE_INVALID); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::log2f16(inf)); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::log2f16(neg_inf)); | ||
| EXPECT_MATH_ERRNO(EDOM); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING( | ||
| neg_inf, LIBC_NAMESPACE::log2f16(zero), FE_DIVBYZERO); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING( | ||
| neg_inf, LIBC_NAMESPACE::log2f16(neg_zero), FE_DIVBYZERO); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING( | ||
| zero, | ||
| LIBC_NAMESPACE::log2f16(LIBC_NAMESPACE::fputil::cast<float16>(1.0))); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING( | ||
| aNaN, | ||
| LIBC_NAMESPACE::log2f16(LIBC_NAMESPACE::fputil::cast<float16>(-1.0))); | ||
| EXPECT_MATH_ERRNO(EDOM); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| //===- Scheduler.h ----------------------------------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This is the bottom-up list scheduler used by the vectorizer. It is used for | ||
| // checking the legality of vectorization and for scheduling instructions in | ||
| // such a way that makes vectorization possible, if legal. | ||
| // | ||
| // The legality check is performed by `trySchedule(Instrs)`, which will try to | ||
| // schedule the IR until all instructions in `Instrs` can be scheduled together | ||
| // back-to-back. If this fails then it is illegal to vectorize `Instrs`. | ||
| // | ||
| // Internally the scheduler uses the vectorizer-specific DependencyGraph class. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SCHEDULER_H | ||
| #define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SCHEDULER_H | ||
|
|
||
| #include "llvm/SandboxIR/Instruction.h" | ||
| #include "llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h" | ||
| #include <queue> | ||
|
|
||
| namespace llvm::sandboxir { | ||
|
|
||
| class PriorityCmp { | ||
| public: | ||
| bool operator()(const DGNode *N1, const DGNode *N2) { | ||
| // TODO: This should be a hierarchical comparator. | ||
| return N1->getInstruction()->comesBefore(N2->getInstruction()); | ||
| } | ||
| }; | ||
|
|
||
| /// The list holding nodes that are ready to schedule. Used by the scheduler. | ||
| class ReadyListContainer { | ||
| PriorityCmp Cmp; | ||
| /// Control/Other dependencies are not modeled by the DAG to save memory. | ||
| /// These have to be modeled in the ready list for correctness. | ||
| /// This means that the list will hold back nodes that need to meet such | ||
| /// unmodeled dependencies. | ||
| std::priority_queue<DGNode *, std::vector<DGNode *>, PriorityCmp> List; | ||
|
|
||
| public: | ||
| ReadyListContainer() : List(Cmp) {} | ||
| void insert(DGNode *N) { List.push(N); } | ||
| DGNode *pop() { | ||
| auto *Back = List.top(); | ||
| List.pop(); | ||
| return Back; | ||
| } | ||
| bool empty() const { return List.empty(); } | ||
| #ifndef NDEBUG | ||
| void dump(raw_ostream &OS) const; | ||
| LLVM_DUMP_METHOD void dump() const; | ||
| #endif // NDEBUG | ||
| }; | ||
|
|
||
| /// The nodes that need to be scheduled back-to-back in a single scheduling | ||
| /// cycle form a SchedBundle. | ||
| class SchedBundle { | ||
| public: | ||
| using ContainerTy = SmallVector<DGNode *, 4>; | ||
|
|
||
| private: | ||
| ContainerTy Nodes; | ||
|
|
||
| public: | ||
| SchedBundle() = default; | ||
| SchedBundle(ContainerTy &&Nodes) : Nodes(std::move(Nodes)) {} | ||
| using iterator = ContainerTy::iterator; | ||
| using const_iterator = ContainerTy::const_iterator; | ||
| iterator begin() { return Nodes.begin(); } | ||
| iterator end() { return Nodes.end(); } | ||
| const_iterator begin() const { return Nodes.begin(); } | ||
| const_iterator end() const { return Nodes.end(); } | ||
| /// \Returns the bundle node that comes before the others in program order. | ||
| DGNode *getTop() const; | ||
| /// \Returns the bundle node that comes after the others in program order. | ||
| DGNode *getBot() const; | ||
| /// Move all bundle instructions to \p Where back-to-back. | ||
| void cluster(BasicBlock::iterator Where); | ||
| #ifndef NDEBUG | ||
| void dump(raw_ostream &OS) const; | ||
| LLVM_DUMP_METHOD void dump() const; | ||
| #endif | ||
| }; | ||
|
|
||
| /// The list scheduler. | ||
| class Scheduler { | ||
| ReadyListContainer ReadyList; | ||
| DependencyGraph DAG; | ||
| std::optional<BasicBlock::iterator> ScheduleTopItOpt; | ||
| SmallVector<std::unique_ptr<SchedBundle>> Bndls; | ||
|
|
||
| /// \Returns a scheduling bundle containing \p Instrs. | ||
| SchedBundle *createBundle(ArrayRef<Instruction *> Instrs); | ||
| /// Schedule nodes until we can schedule \p Instrs back-to-back. | ||
| bool tryScheduleUntil(ArrayRef<Instruction *> Instrs); | ||
| /// Schedules all nodes in \p Bndl, marks them as scheduled, updates the | ||
| /// UnscheduledSuccs counter of all dependency predecessors, and adds any of | ||
| /// them that become ready to the ready list. | ||
| void scheduleAndUpdateReadyList(SchedBundle &Bndl); | ||
|
|
||
| /// Disable copies. | ||
| Scheduler(const Scheduler &) = delete; | ||
| Scheduler &operator=(const Scheduler &) = delete; | ||
|
|
||
| public: | ||
| Scheduler(AAResults &AA) : DAG(AA) {} | ||
| ~Scheduler() {} | ||
|
|
||
| bool trySchedule(ArrayRef<Instruction *> Instrs); | ||
|
|
||
| #ifndef NDEBUG | ||
| void dump(raw_ostream &OS) const; | ||
| LLVM_DUMP_METHOD void dump() const; | ||
| #endif | ||
| }; | ||
|
|
||
| } // namespace llvm::sandboxir | ||
|
|
||
| #endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SCHEDULER_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| //===- Scheduler.cpp ------------------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h" | ||
|
|
||
| namespace llvm::sandboxir { | ||
|
|
||
| // TODO: Check if we can cache top/bottom to reduce compile-time. | ||
| DGNode *SchedBundle::getTop() const { | ||
| DGNode *TopN = Nodes.front(); | ||
| for (auto *N : drop_begin(Nodes)) { | ||
| if (N->getInstruction()->comesBefore(TopN->getInstruction())) | ||
| TopN = N; | ||
| } | ||
| return TopN; | ||
| } | ||
|
|
||
| DGNode *SchedBundle::getBot() const { | ||
| DGNode *BotN = Nodes.front(); | ||
| for (auto *N : drop_begin(Nodes)) { | ||
| if (BotN->getInstruction()->comesBefore(N->getInstruction())) | ||
| BotN = N; | ||
| } | ||
| return BotN; | ||
| } | ||
|
|
||
| void SchedBundle::cluster(BasicBlock::iterator Where) { | ||
| for (auto *N : Nodes) { | ||
| auto *I = N->getInstruction(); | ||
| if (I->getIterator() == Where) | ||
| ++Where; // Try to maintain bundle order. | ||
| I->moveBefore(*Where.getNodeParent(), Where); | ||
| } | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
| void SchedBundle::dump(raw_ostream &OS) const { | ||
| for (auto *N : Nodes) | ||
| OS << *N; | ||
| } | ||
|
|
||
| void SchedBundle::dump() const { | ||
| dump(dbgs()); | ||
| dbgs() << "\n"; | ||
| } | ||
| #endif // NDEBUG | ||
|
|
||
| #ifndef NDEBUG | ||
| void ReadyListContainer::dump(raw_ostream &OS) const { | ||
| auto ListCopy = List; | ||
| while (!ListCopy.empty()) { | ||
| OS << *ListCopy.top() << "\n"; | ||
| ListCopy.pop(); | ||
| } | ||
| } | ||
|
|
||
| void ReadyListContainer::dump() const { | ||
| dump(dbgs()); | ||
| dbgs() << "\n"; | ||
| } | ||
| #endif // NDEBUG | ||
|
|
||
| void Scheduler::scheduleAndUpdateReadyList(SchedBundle &Bndl) { | ||
| // Find where we should schedule the instructions. | ||
| assert(ScheduleTopItOpt && "Should have been set by now!"); | ||
| auto Where = *ScheduleTopItOpt; | ||
| // Move all instructions in `Bndl` to `Where`. | ||
| Bndl.cluster(Where); | ||
| // Update the last scheduled bundle. | ||
| ScheduleTopItOpt = Bndl.getTop()->getInstruction()->getIterator(); | ||
| // Set nodes as "scheduled" and decrement the UnsceduledSuccs counter of all | ||
| // dependency predecessors. | ||
| for (DGNode *N : Bndl) { | ||
| N->setScheduled(true); | ||
| for (auto *DepN : N->preds(DAG)) { | ||
| // TODO: preds() should not return nullptr. | ||
| if (DepN == nullptr) | ||
| continue; | ||
| DepN->decrUnscheduledSuccs(); | ||
| if (DepN->ready()) | ||
| ReadyList.insert(DepN); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| SchedBundle *Scheduler::createBundle(ArrayRef<Instruction *> Instrs) { | ||
| SchedBundle::ContainerTy Nodes; | ||
| Nodes.reserve(Instrs.size()); | ||
| for (auto *I : Instrs) | ||
| Nodes.push_back(DAG.getNode(I)); | ||
| auto BndlPtr = std::make_unique<SchedBundle>(std::move(Nodes)); | ||
| auto *Bndl = BndlPtr.get(); | ||
| Bndls.push_back(std::move(BndlPtr)); | ||
| return Bndl; | ||
| } | ||
|
|
||
| bool Scheduler::tryScheduleUntil(ArrayRef<Instruction *> Instrs) { | ||
| // Use a set of instructions, instead of `Instrs` for fast lookups. | ||
| DenseSet<Instruction *> InstrsToDefer(Instrs.begin(), Instrs.end()); | ||
| // This collects the nodes that correspond to instructions found in `Instrs` | ||
| // that have just become ready. These nodes won't be scheduled right away. | ||
| SmallVector<DGNode *, 8> DeferredNodes; | ||
|
|
||
| // Keep scheduling ready nodes until we either run out of ready nodes (i.e., | ||
| // ReadyList is empty), or all nodes that correspond to `Instrs` (the nodes of | ||
| // which are collected in DeferredNodes) are all ready to schedule. | ||
| while (!ReadyList.empty()) { | ||
| auto *ReadyN = ReadyList.pop(); | ||
| if (InstrsToDefer.contains(ReadyN->getInstruction())) { | ||
| // If the ready instruction is one of those in `Instrs`, then we don't | ||
| // schedule it right away. Instead we defer it until we can schedule it | ||
| // along with the rest of the instructions in `Instrs`, at the same | ||
| // time in a single scheduling bundle. | ||
| DeferredNodes.push_back(ReadyN); | ||
| bool ReadyToScheduleDeferred = DeferredNodes.size() == Instrs.size(); | ||
| if (ReadyToScheduleDeferred) { | ||
| scheduleAndUpdateReadyList(*createBundle(Instrs)); | ||
| return true; | ||
| } | ||
| } else { | ||
| // If the ready instruction is not found in `Instrs`, then we wrap it in a | ||
| // scheduling bundle and schedule it right away. | ||
| scheduleAndUpdateReadyList(*createBundle({ReadyN->getInstruction()})); | ||
| } | ||
| } | ||
| assert(DeferredNodes.size() != Instrs.size() && | ||
| "We should have succesfully scheduled and early-returned!"); | ||
| return false; | ||
| } | ||
|
|
||
| bool Scheduler::trySchedule(ArrayRef<Instruction *> Instrs) { | ||
| assert(all_of(drop_begin(Instrs), | ||
| [Instrs](Instruction *I) { | ||
| return I->getParent() == (*Instrs.begin())->getParent(); | ||
| }) && | ||
| "Instrs not in the same BB!"); | ||
| // Extend the DAG to include Instrs. | ||
| Interval<Instruction> Extension = DAG.extend(Instrs); | ||
| // TODO: Set the window of the DAG that we are interested in. | ||
| // We start scheduling at the bottom instr of Instrs. | ||
| auto getBottomI = [](ArrayRef<Instruction *> Instrs) -> Instruction * { | ||
| return *min_element(Instrs, | ||
| [](auto *I1, auto *I2) { return I1->comesBefore(I2); }); | ||
| }; | ||
| ScheduleTopItOpt = std::next(getBottomI(Instrs)->getIterator()); | ||
| // Add nodes to ready list. | ||
| for (auto &I : Extension) { | ||
| auto *N = DAG.getNode(&I); | ||
| if (N->ready()) | ||
| ReadyList.insert(N); | ||
| } | ||
| // Try schedule all nodes until we can schedule Instrs back-to-back. | ||
| return tryScheduleUntil(Instrs); | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
| void Scheduler::dump(raw_ostream &OS) const { | ||
| OS << "ReadyList:\n"; | ||
| ReadyList.dump(OS); | ||
| } | ||
| void Scheduler::dump() const { dump(dbgs()); } | ||
| #endif // NDEBUG | ||
|
|
||
| } // namespace llvm::sandboxir |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s | ||
|
|
||
| define i32 @test(i32 %n) { | ||
| ; CHECK-LABEL: define i32 @test( | ||
| ; CHECK-SAME: i32 [[N:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 2> | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], <i64 273837369, i64 273837369> | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true) | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1 | ||
| ; CHECK-NEXT: [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]] | ||
| ; CHECK-NEXT: ret i32 [[RES1]] | ||
| ; | ||
| entry: | ||
| %n1 = add i32 %n, 1 | ||
| %zn1 = zext nneg i32 %n1 to i64 | ||
| %m1 = mul nuw nsw i64 %zn1, 273837369 | ||
| %a1 = call i64 @llvm.abs.i64(i64 %m1, i1 true) | ||
| %t1 = trunc i64 %a1 to i32 | ||
| %n2 = add i32 %n, 2 | ||
| %zn2 = zext nneg i32 %n2 to i64 | ||
| %m2 = mul nuw nsw i64 %zn2, 273837369 | ||
| %a2 = call i64 @llvm.abs.i64(i64 %m2, i1 true) | ||
| %t2 = trunc i64 %a2 to i32 | ||
| %res1 = add i32 %t1, %t2 | ||
| ret i32 %res1 | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,204 @@ | ||
| //===- SchedulerTest.cpp --------------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h" | ||
| #include "llvm/ADT/SmallVector.h" | ||
| #include "llvm/Analysis/AliasAnalysis.h" | ||
| #include "llvm/Analysis/AssumptionCache.h" | ||
| #include "llvm/Analysis/BasicAliasAnalysis.h" | ||
| #include "llvm/Analysis/TargetLibraryInfo.h" | ||
| #include "llvm/AsmParser/Parser.h" | ||
| #include "llvm/IR/Dominators.h" | ||
| #include "llvm/SandboxIR/Context.h" | ||
| #include "llvm/SandboxIR/Function.h" | ||
| #include "llvm/SandboxIR/Instruction.h" | ||
| #include "llvm/Support/SourceMgr.h" | ||
| #include "gmock/gmock-matchers.h" | ||
| #include "gtest/gtest.h" | ||
|
|
||
| using namespace llvm; | ||
|
|
||
| struct SchedulerTest : public testing::Test { | ||
| LLVMContext C; | ||
| std::unique_ptr<Module> M; | ||
| std::unique_ptr<AssumptionCache> AC; | ||
| std::unique_ptr<DominatorTree> DT; | ||
| std::unique_ptr<BasicAAResult> BAA; | ||
| std::unique_ptr<AAResults> AA; | ||
|
|
||
| void parseIR(LLVMContext &C, const char *IR) { | ||
| SMDiagnostic Err; | ||
| M = parseAssemblyString(IR, Err, C); | ||
| if (!M) | ||
| Err.print("SchedulerTest", errs()); | ||
| } | ||
|
|
||
| AAResults &getAA(llvm::Function &LLVMF) { | ||
| TargetLibraryInfoImpl TLII; | ||
| TargetLibraryInfo TLI(TLII); | ||
| AA = std::make_unique<AAResults>(TLI); | ||
| AC = std::make_unique<AssumptionCache>(LLVMF); | ||
| DT = std::make_unique<DominatorTree>(LLVMF); | ||
| BAA = std::make_unique<BasicAAResult>(M->getDataLayout(), LLVMF, TLI, *AC, | ||
| DT.get()); | ||
| AA->addAAResult(*BAA); | ||
| return *AA; | ||
| } | ||
| }; | ||
|
|
||
| TEST_F(SchedulerTest, SchedBundle) { | ||
| parseIR(C, R"IR( | ||
| define void @foo(ptr %ptr, i8 %v0, i8 %v1) { | ||
| store i8 %v0, ptr %ptr | ||
| %other = add i8 %v0, %v1 | ||
| store i8 %v1, ptr %ptr | ||
| ret void | ||
| } | ||
| )IR"); | ||
| llvm::Function *LLVMF = &*M->getFunction("foo"); | ||
| sandboxir::Context Ctx(C); | ||
| auto *F = Ctx.createFunction(LLVMF); | ||
| auto *BB = &*F->begin(); | ||
| auto It = BB->begin(); | ||
| auto *S0 = cast<sandboxir::StoreInst>(&*It++); | ||
| auto *Other = &*It++; | ||
| auto *S1 = cast<sandboxir::StoreInst>(&*It++); | ||
| auto *Ret = cast<sandboxir::ReturnInst>(&*It++); | ||
|
|
||
| sandboxir::DependencyGraph DAG(getAA(*LLVMF)); | ||
| DAG.extend({&*BB->begin(), BB->getTerminator()}); | ||
| auto *SN0 = DAG.getNode(S0); | ||
| auto *SN1 = DAG.getNode(S1); | ||
| sandboxir::SchedBundle Bndl({SN0, SN1}); | ||
|
|
||
| // Check getTop(). | ||
| EXPECT_EQ(Bndl.getTop(), SN0); | ||
| // Check getBot(). | ||
| EXPECT_EQ(Bndl.getBot(), SN1); | ||
| // Check cluster(). | ||
| Bndl.cluster(S1->getIterator()); | ||
| { | ||
| auto It = BB->begin(); | ||
| EXPECT_EQ(&*It++, Other); | ||
| EXPECT_EQ(&*It++, S0); | ||
| EXPECT_EQ(&*It++, S1); | ||
| EXPECT_EQ(&*It++, Ret); | ||
| S0->moveBefore(Other); | ||
| } | ||
|
|
||
| Bndl.cluster(S0->getIterator()); | ||
| { | ||
| auto It = BB->begin(); | ||
| EXPECT_EQ(&*It++, S0); | ||
| EXPECT_EQ(&*It++, S1); | ||
| EXPECT_EQ(&*It++, Other); | ||
| EXPECT_EQ(&*It++, Ret); | ||
| S1->moveAfter(Other); | ||
| } | ||
|
|
||
| Bndl.cluster(Other->getIterator()); | ||
| { | ||
| auto It = BB->begin(); | ||
| EXPECT_EQ(&*It++, S0); | ||
| EXPECT_EQ(&*It++, S1); | ||
| EXPECT_EQ(&*It++, Other); | ||
| EXPECT_EQ(&*It++, Ret); | ||
| S1->moveAfter(Other); | ||
| } | ||
|
|
||
| Bndl.cluster(Ret->getIterator()); | ||
| { | ||
| auto It = BB->begin(); | ||
| EXPECT_EQ(&*It++, Other); | ||
| EXPECT_EQ(&*It++, S0); | ||
| EXPECT_EQ(&*It++, S1); | ||
| EXPECT_EQ(&*It++, Ret); | ||
| Other->moveBefore(S1); | ||
| } | ||
|
|
||
| Bndl.cluster(BB->end()); | ||
| { | ||
| auto It = BB->begin(); | ||
| EXPECT_EQ(&*It++, Other); | ||
| EXPECT_EQ(&*It++, Ret); | ||
| EXPECT_EQ(&*It++, S0); | ||
| EXPECT_EQ(&*It++, S1); | ||
| Ret->moveAfter(S1); | ||
| Other->moveAfter(S0); | ||
| } | ||
| // Check iterators. | ||
| EXPECT_THAT(Bndl, testing::ElementsAre(SN0, SN1)); | ||
| EXPECT_THAT((const sandboxir::SchedBundle &)Bndl, | ||
| testing::ElementsAre(SN0, SN1)); | ||
| } | ||
|
|
||
| TEST_F(SchedulerTest, Basic) { | ||
| parseIR(C, R"IR( | ||
| define void @foo(ptr %ptr, i8 %v0, i8 %v1) { | ||
| store i8 %v0, ptr %ptr | ||
| store i8 %v1, ptr %ptr | ||
| ret void | ||
| } | ||
| )IR"); | ||
| llvm::Function *LLVMF = &*M->getFunction("foo"); | ||
| sandboxir::Context Ctx(C); | ||
| auto *F = Ctx.createFunction(LLVMF); | ||
| auto *BB = &*F->begin(); | ||
| auto It = BB->begin(); | ||
| auto *S0 = cast<sandboxir::StoreInst>(&*It++); | ||
| auto *S1 = cast<sandboxir::StoreInst>(&*It++); | ||
| auto *Ret = cast<sandboxir::ReturnInst>(&*It++); | ||
|
|
||
| { | ||
| // Schedule all instructions in sequence. | ||
| sandboxir::Scheduler Sched(getAA(*LLVMF)); | ||
| EXPECT_TRUE(Sched.trySchedule({Ret})); | ||
| EXPECT_TRUE(Sched.trySchedule({S1})); | ||
| EXPECT_TRUE(Sched.trySchedule({S0})); | ||
| } | ||
| { | ||
| // Skip instructions. | ||
| sandboxir::Scheduler Sched(getAA(*LLVMF)); | ||
| EXPECT_TRUE(Sched.trySchedule({Ret})); | ||
| EXPECT_TRUE(Sched.trySchedule({S0})); | ||
| } | ||
| { | ||
| // Try invalid scheduling | ||
| sandboxir::Scheduler Sched(getAA(*LLVMF)); | ||
| EXPECT_TRUE(Sched.trySchedule({Ret})); | ||
| EXPECT_TRUE(Sched.trySchedule({S0})); | ||
| EXPECT_FALSE(Sched.trySchedule({S1})); | ||
| } | ||
| } | ||
|
|
||
| TEST_F(SchedulerTest, Bundles) { | ||
| parseIR(C, R"IR( | ||
| define void @foo(ptr noalias %ptr0, ptr noalias %ptr1) { | ||
| %ld0 = load i8, ptr %ptr0 | ||
| %ld1 = load i8, ptr %ptr1 | ||
| store i8 %ld0, ptr %ptr0 | ||
| store i8 %ld1, ptr %ptr1 | ||
| ret void | ||
| } | ||
| )IR"); | ||
| llvm::Function *LLVMF = &*M->getFunction("foo"); | ||
| sandboxir::Context Ctx(C); | ||
| auto *F = Ctx.createFunction(LLVMF); | ||
| auto *BB = &*F->begin(); | ||
| auto It = BB->begin(); | ||
| auto *L0 = cast<sandboxir::LoadInst>(&*It++); | ||
| auto *L1 = cast<sandboxir::LoadInst>(&*It++); | ||
| auto *S0 = cast<sandboxir::StoreInst>(&*It++); | ||
| auto *S1 = cast<sandboxir::StoreInst>(&*It++); | ||
| auto *Ret = cast<sandboxir::ReturnInst>(&*It++); | ||
|
|
||
| sandboxir::Scheduler Sched(getAA(*LLVMF)); | ||
| EXPECT_TRUE(Sched.trySchedule({Ret})); | ||
| EXPECT_TRUE(Sched.trySchedule({S0, S1})); | ||
| EXPECT_TRUE(Sched.trySchedule({L0, L1})); | ||
| } |