Skip to content

Commit

Permalink
Drop the ZeroBehavior parameter from countLeadingZeros and the like (…
Browse files Browse the repository at this point in the history
…NFC)

This patch drops the ZeroBehavior parameter from bit counting
functions like countLeadingZeros.  ZeroBehavior specifies the behavior
when the input to count{Leading,Trailing}Zeros is zero and when the
input to count{Leading,Trailing}Ones is all ones.

ZeroBehavior was first introduced on May 24, 2013 in commit
eb91eac.  While that patch did not
state the intention, I would guess ZeroBehavior was for performance
reasons.  The x86 machines around that time required a conditional
branch to implement countLeadingZero<uint32_t> that returns the 32 on
zero:

        test    edi, edi
        je      .LBB0_2
        bsr     eax, edi
        xor     eax, 31
.LBB1_2:
        mov     eax, 32

That is, we can remove the conditional branch if we don't care about
the behavior on zero.

IIUC, Intel's Haswell architecture, launched on June 4, 2013,
introduced several bit manipulation instructions, including lzcnt and
tzcnt, which eliminated the need for the conditional branch.

I think it's time to retire ZeroBehavior as its utility is very
limited.  If you care about compilation speed, you should build LLVM
with an appropriate -march= to take advantage of lzcnt and tzcnt.
Even if not, modern host compilers should be able to optimize away
quite a few conditional branches because the input is often known to
be nonzero from dominating conditional branches.

Differential Revision: https://reviews.llvm.org/D141798
  • Loading branch information
kazutakahirata committed Jan 19, 2023
1 parent 26f83b4 commit 83d56fb
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 52 deletions.
69 changes: 28 additions & 41 deletions llvm/include/llvm/Support/MathExtras.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ enum ZeroBehavior {
/// The returned value is undefined.
ZB_Undefined,
/// The returned value is numeric_limits<T>::max()
ZB_Max,
/// The returned value is numeric_limits<T>::digits
ZB_Width
ZB_Max
};

/// Mathematical constants.
Expand Down Expand Up @@ -84,7 +82,7 @@ constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A

namespace detail {
template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
static unsigned count(T Val, ZeroBehavior) {
static unsigned count(T Val) {
if (!Val)
return std::numeric_limits<T>::digits;
if (Val & 0x1)
Expand All @@ -108,8 +106,8 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {

#if defined(__GNUC__) || defined(_MSC_VER)
template <typename T> struct TrailingZerosCounter<T, 4> {
static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
static unsigned count(T Val) {
if (Val == 0)
return 32;

#if __has_builtin(__builtin_ctz) || defined(__GNUC__)
Expand All @@ -124,8 +122,8 @@ template <typename T> struct TrailingZerosCounter<T, 4> {

#if !defined(_MSC_VER) || defined(_M_X64)
template <typename T> struct TrailingZerosCounter<T, 8> {
static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
static unsigned count(T Val) {
if (Val == 0)
return 64;

#if __has_builtin(__builtin_ctzll) || defined(__GNUC__)
Expand All @@ -146,18 +144,16 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
///
/// Only unsigned integral types are allowed.
///
/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
/// valid arguments.
template <typename T>
unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
/// Returns std::numeric_limits<T>::digits on an input of 0.
template <typename T> unsigned countTrailingZeros(T Val) {
static_assert(std::is_unsigned_v<T>,
"Only unsigned integral types are allowed.");
return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val);
}

namespace detail {
template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
static unsigned count(T Val, ZeroBehavior) {
static unsigned count(T Val) {
if (!Val)
return std::numeric_limits<T>::digits;

Expand All @@ -176,8 +172,8 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {

#if defined(__GNUC__) || defined(_MSC_VER)
template <typename T> struct LeadingZerosCounter<T, 4> {
static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
static unsigned count(T Val) {
if (Val == 0)
return 32;

#if __has_builtin(__builtin_clz) || defined(__GNUC__)
Expand All @@ -192,8 +188,8 @@ template <typename T> struct LeadingZerosCounter<T, 4> {

#if !defined(_MSC_VER) || defined(_M_X64)
template <typename T> struct LeadingZerosCounter<T, 8> {
static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
static unsigned count(T Val) {
if (Val == 0)
return 64;

#if __has_builtin(__builtin_clzll) || defined(__GNUC__)
Expand All @@ -214,27 +210,24 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
///
/// Only unsigned integral types are allowed.
///
/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
/// valid arguments.
template <typename T>
unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
/// Returns std::numeric_limits<T>::digits on an input of 0.
template <typename T> unsigned countLeadingZeros(T Val) {
static_assert(std::is_unsigned_v<T>,
"Only unsigned integral types are allowed.");
return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val);
}

/// Get the index of the first set bit starting from the least
/// significant bit.
///
/// Only unsigned integral types are allowed.
///
/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
/// valid arguments.
/// \param ZB the behavior on an input of 0.
template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
if (ZB == ZB_Max && Val == 0)
return std::numeric_limits<T>::max();

return countTrailingZeros(Val, ZB_Undefined);
return countTrailingZeros(Val);
}

/// Create a bitmask with the N right-most bits set to 1, and all other
Expand Down Expand Up @@ -269,16 +262,14 @@ template <typename T> T maskLeadingZeros(unsigned N) {
///
/// Only unsigned integral types are allowed.
///
/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
/// valid arguments.
/// \param ZB the behavior on an input of 0.
template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
if (ZB == ZB_Max && Val == 0)
return std::numeric_limits<T>::max();

// Use ^ instead of - because both gcc and llvm can remove the associated ^
// in the __builtin_clz intrinsic on x86.
return countLeadingZeros(Val, ZB_Undefined) ^
(std::numeric_limits<T>::digits - 1);
return countLeadingZeros(Val) ^ (std::numeric_limits<T>::digits - 1);
}

/// Macro compressed bit reversal table for 256 bits.
Expand Down Expand Up @@ -470,13 +461,11 @@ constexpr inline bool isPowerOf2_64(uint64_t Value) {
/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
/// Only unsigned integral types are allowed.
///
/// \param ZB the behavior on an input of all ones. Only ZB_Width and
/// ZB_Undefined are valid arguments.
template <typename T>
unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
/// Returns std::numeric_limits<T>::digits on an input of all ones.
template <typename T> unsigned countLeadingOnes(T Value) {
static_assert(std::is_unsigned_v<T>,
"Only unsigned integral types are allowed.");
return countLeadingZeros<T>(~Value, ZB);
return countLeadingZeros<T>(~Value);
}

/// Count the number of ones from the least significant bit to the first
Expand All @@ -485,13 +474,11 @@ unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
/// Ex. countTrailingOnes(0x00FF00FF) == 8.
/// Only unsigned integral types are allowed.
///
/// \param ZB the behavior on an input of all ones. Only ZB_Width and
/// ZB_Undefined are valid arguments.
template <typename T>
unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
/// Returns std::numeric_limits<T>::digits on an input of all ones.
template <typename T> unsigned countTrailingOnes(T Value) {
static_assert(std::is_unsigned_v<T>,
"Only unsigned integral types are allowed.");
return countTrailingZeros<T>(~Value, ZB);
return countTrailingZeros<T>(~Value);
}

/// Count the number of set bits in a value.
Expand Down Expand Up @@ -622,7 +609,7 @@ constexpr inline uint64_t NextPowerOf2(uint64_t A) {
/// Essentially, it is a floor operation across the domain of powers of two.
inline uint64_t PowerOf2Floor(uint64_t A) {
if (!A) return 0;
return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
return 1ull << (63 - countLeadingZeros(A));
}

/// Returns the power of two which is greater than or equal to the given value.
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -701,8 +701,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
if ((PsInputBits & 0x7F) == 0 ||
((PsInputBits & 0xF) == 0 &&
(PsInputBits >> 11 & 1)))
Info->markPSInputEnabled(
countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr()));
}
}

Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2451,8 +2451,7 @@ SDValue SITargetLowering::LowerFormalArguments(
unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
if ((PsInputBits & 0x7F) == 0 ||
((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
Info->markPSInputEnabled(
countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr()));
}
} else if (IsKernel) {
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// or ADDIW. If there are trailing zeros, try generating a sign extended
// constant with no trailing zeros and use a final SLLI to restore them.
if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.size() >= 2) {
unsigned TrailingZeros = countTrailingZeros((uint64_t)Val, ZB_Undefined);
unsigned TrailingZeros = countTrailingZeros((uint64_t)Val);
int64_t ShiftedVal = Val >> TrailingZeros;
// If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since
// its more compressible. But only if LUI+ADDI(W) isn't fusable.
Expand All @@ -202,7 +202,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
if (Val > 0 && Res.size() > 2) {
assert(ActiveFeatures[RISCV::Feature64Bit] &&
"Expected RV32 to only need 2 instructions");
unsigned LeadingZeros = countLeadingZeros((uint64_t)Val, ZB_Undefined);
unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
// Fill in the bits that will be shifted out with 1s. An example where this
// helps is trailing one masks with 32 or more ones. This will generate
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/IPO/LowerTypeTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ BitSetInfo BitSetBuilder::build() {

BSI.AlignLog2 = 0;
if (Mask != 0)
BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
BSI.AlignLog2 = countTrailingZeros(Mask);

// Build the compressed bitset while normalizing the offsets against the
// computed alignment.
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,7 @@ wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
if (I < B.size())
BitsUsed |= B[I];
if (BitsUsed != 0xff)
return (MinByte + I) * 8 +
countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined);
return (MinByte + I) * 8 + countTrailingZeros(uint8_t(~BitsUsed));
}
} else {
// Find a free (Size/8) byte region in each member of Used.
Expand Down
3 changes: 1 addition & 2 deletions mlir/lib/Bytecode/Reader/BytecodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,7 @@ class EncodingReader {
// here because we only care about the first byte, and so that be actually
// get ctz intrinsic calls when possible (the `uint8_t` overload uses a loop
// implementation).
uint32_t numBytes =
llvm::countTrailingZeros<uint32_t>(result, llvm::ZB_Undefined);
uint32_t numBytes = llvm::countTrailingZeros<uint32_t>(result);
assert(numBytes > 0 && numBytes <= 7 &&
"unexpected number of trailing zeros in varint encoding");

Expand Down

0 comments on commit 83d56fb

Please sign in to comment.