Drop the ZeroBehavior parameter from countLeadingZeros and the like (…

…NFC) This patch drops the ZeroBehavior parameter from bit counting functions like countLeadingZeros. ZeroBehavior specifies the behavior when the input to count{Leading,Trailing}Zeros is zero and when the input to count{Leading,Trailing}Ones is all ones. ZeroBehavior was first introduced on May 24, 2013 in commit eb91eac. While that patch did not state the intention, I would guess ZeroBehavior was for performance reasons. The x86 machines around that time required a conditional branch to implement countLeadingZero<uint32_t> that returns the 32 on zero: test edi, edi je .LBB0_2 bsr eax, edi xor eax, 31 .LBB1_2: mov eax, 32 That is, we can remove the conditional branch if we don't care about the behavior on zero. IIUC, Intel's Haswell architecture, launched on June 4, 2013, introduced several bit manipulation instructions, including lzcnt and tzcnt, which eliminated the need for the conditional branch. I think it's time to retire ZeroBehavior as its utility is very limited. If you care about compilation speed, you should build LLVM with an appropriate -march= to take advantage of lzcnt and tzcnt. Even if not, modern host compilers should be able to optimize away quite a few conditional branches because the input is often known to be nonzero from dominating conditional branches. Differential Revision: https://reviews.llvm.org/D141798
llvm · Jan 19, 2023 · 83d56fb · 83d56fb
1 parent 26f83b4
commit 83d56fb
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 52 deletions.
diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
@@ -41,9 +41,7 @@ enum ZeroBehavior {
   /// The returned value is undefined.
   ZB_Undefined,
   /// The returned value is numeric_limits<T>::max()
-  ZB_Max,
-  /// The returned value is numeric_limits<T>::digits
-  ZB_Width
+  ZB_Max
 };
 
 /// Mathematical constants.
@@ -84,7 +82,7 @@ constexpr float ef          = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
-  static unsigned count(T Val, ZeroBehavior) {
+  static unsigned count(T Val) {
     if (!Val)
       return std::numeric_limits<T>::digits;
     if (Val & 0x1)
@@ -108,8 +106,8 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
 
 #if defined(__GNUC__) || defined(_MSC_VER)
 template <typename T> struct TrailingZerosCounter<T, 4> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
-    if (ZB != ZB_Undefined && Val == 0)
+  static unsigned count(T Val) {
+    if (Val == 0)
       return 32;
 
 #if __has_builtin(__builtin_ctz) || defined(__GNUC__)
@@ -124,8 +122,8 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct TrailingZerosCounter<T, 8> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
-    if (ZB != ZB_Undefined && Val == 0)
+  static unsigned count(T Val) {
+    if (Val == 0)
       return 64;
 
 #if __has_builtin(__builtin_ctzll) || defined(__GNUC__)
@@ -146,18 +144,16 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
 ///
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
-///   valid arguments.
-template <typename T>
-unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+/// Returns std::numeric_limits<T>::digits on an input of 0.
+template <typename T> unsigned countTrailingZeros(T Val) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
+  return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val);
 }
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
-  static unsigned count(T Val, ZeroBehavior) {
+  static unsigned count(T Val) {
     if (!Val)
       return std::numeric_limits<T>::digits;
 
@@ -176,8 +172,8 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
 
 #if defined(__GNUC__) || defined(_MSC_VER)
 template <typename T> struct LeadingZerosCounter<T, 4> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
-    if (ZB != ZB_Undefined && Val == 0)
+  static unsigned count(T Val) {
+    if (Val == 0)
       return 32;
 
 #if __has_builtin(__builtin_clz) || defined(__GNUC__)
@@ -192,8 +188,8 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct LeadingZerosCounter<T, 8> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
-    if (ZB != ZB_Undefined && Val == 0)
+  static unsigned count(T Val) {
+    if (Val == 0)
       return 64;
 
 #if __has_builtin(__builtin_clzll) || defined(__GNUC__)
@@ -214,27 +210,24 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
 ///
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
-///   valid arguments.
-template <typename T>
-unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+/// Returns std::numeric_limits<T>::digits on an input of 0.
+template <typename T> unsigned countLeadingZeros(T Val) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
+  return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val);
 }
 
 /// Get the index of the first set bit starting from the least
 ///   significant bit.
 ///
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
-///   valid arguments.
+/// \param ZB the behavior on an input of 0.
 template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
   if (ZB == ZB_Max && Val == 0)
     return std::numeric_limits<T>::max();
 
-  return countTrailingZeros(Val, ZB_Undefined);
+  return countTrailingZeros(Val);
 }
 
 /// Create a bitmask with the N right-most bits set to 1, and all other
@@ -269,16 +262,14 @@ template <typename T> T maskLeadingZeros(unsigned N) {
 ///
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
-///   valid arguments.
+/// \param ZB the behavior on an input of 0.
 template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
   if (ZB == ZB_Max && Val == 0)
     return std::numeric_limits<T>::max();
 
   // Use ^ instead of - because both gcc and llvm can remove the associated ^
   // in the __builtin_clz intrinsic on x86.
-  return countLeadingZeros(Val, ZB_Undefined) ^
-         (std::numeric_limits<T>::digits - 1);
+  return countLeadingZeros(Val) ^ (std::numeric_limits<T>::digits - 1);
 }
 
 /// Macro compressed bit reversal table for 256 bits.
@@ -470,13 +461,11 @@ constexpr inline bool isPowerOf2_64(uint64_t Value) {
 /// Ex. countLeadingOnes(0xFF0FFF00) == 8.
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of all ones. Only ZB_Width and
-/// ZB_Undefined are valid arguments.
-template <typename T>
-unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+/// Returns std::numeric_limits<T>::digits on an input of all ones.
+template <typename T> unsigned countLeadingOnes(T Value) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return countLeadingZeros<T>(~Value, ZB);
+  return countLeadingZeros<T>(~Value);
 }
 
 /// Count the number of ones from the least significant bit to the first
@@ -485,13 +474,11 @@ unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
 /// Ex. countTrailingOnes(0x00FF00FF) == 8.
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of all ones. Only ZB_Width and
-/// ZB_Undefined are valid arguments.
-template <typename T>
-unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+/// Returns std::numeric_limits<T>::digits on an input of all ones.
+template <typename T> unsigned countTrailingOnes(T Value) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return countTrailingZeros<T>(~Value, ZB);
+  return countTrailingZeros<T>(~Value);
 }
 
 /// Count the number of set bits in a value.
@@ -622,7 +609,7 @@ constexpr inline uint64_t NextPowerOf2(uint64_t A) {
 /// Essentially, it is a floor operation across the domain of powers of two.
 inline uint64_t PowerOf2Floor(uint64_t A) {
   if (!A) return 0;
-  return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
+  return 1ull << (63 - countLeadingZeros(A));
 }
 
 /// Returns the power of two which is greater than or equal to the given value.

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -701,8 +701,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
       if ((PsInputBits & 0x7F) == 0 ||
           ((PsInputBits & 0xF) == 0 &&
            (PsInputBits >> 11 & 1)))
-        Info->markPSInputEnabled(
-          countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
+        Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr()));
     }
   }
 

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2451,8 +2451,7 @@ SDValue SITargetLowering::LowerFormalArguments(
       unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
       if ((PsInputBits & 0x7F) == 0 ||
           ((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
-        Info->markPSInputEnabled(
-            countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
+        Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr()));
     }
   } else if (IsKernel) {
     assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -180,7 +180,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
   // or ADDIW. If there are trailing zeros, try generating a sign extended
   // constant with no trailing zeros and use a final SLLI to restore them.
   if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.size() >= 2) {
-    unsigned TrailingZeros = countTrailingZeros((uint64_t)Val, ZB_Undefined);
+    unsigned TrailingZeros = countTrailingZeros((uint64_t)Val);
     int64_t ShiftedVal = Val >> TrailingZeros;
     // If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since
     // its more compressible. But only if LUI+ADDI(W) isn't fusable.
@@ -202,7 +202,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
   if (Val > 0 && Res.size() > 2) {
     assert(ActiveFeatures[RISCV::Feature64Bit] &&
            "Expected RV32 to only need 2 instructions");
-    unsigned LeadingZeros = countLeadingZeros((uint64_t)Val, ZB_Undefined);
+    unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
     uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
     // Fill in the bits that will be shifted out with 1s. An example where this
     // helps is trailing one masks with 32 or more ones. This will generate

diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -172,7 +172,7 @@ BitSetInfo BitSetBuilder::build() {
 
   BSI.AlignLog2 = 0;
   if (Mask != 0)
-    BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
+    BSI.AlignLog2 = countTrailingZeros(Mask);
 
   // Build the compressed bitset while normalizing the offsets against the
   // computed alignment.

diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -259,8 +259,7 @@ wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
         if (I < B.size())
           BitsUsed |= B[I];
       if (BitsUsed != 0xff)
-        return (MinByte + I) * 8 +
-               countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined);
+        return (MinByte + I) * 8 + countTrailingZeros(uint8_t(~BitsUsed));
     }
   } else {
     // Find a free (Size/8) byte region in each member of Used.

diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -281,8 +281,7 @@ class EncodingReader {
     // here because we only care about the first byte, and so that be actually
     // get ctz intrinsic calls when possible (the `uint8_t` overload uses a loop
     // implementation).
-    uint32_t numBytes =
-        llvm::countTrailingZeros<uint32_t>(result, llvm::ZB_Undefined);
+    uint32_t numBytes = llvm::countTrailingZeros<uint32_t>(result);
     assert(numBytes > 0 && numBytes <= 7 &&
            "unexpected number of trailing zeros in varint encoding");