Skip to content

Commit

Permalink
Reland "Refactor BitCount"
Browse files Browse the repository at this point in the history
This is a reland of baecb7d

Directly use NEON instructions on ARM/ARM64 on MSVC.

Original change's description:
> Refactor BitCount
>
> POPCNT intrinsics cannot be used without hardware support, so a CPUID
> check and polyfills are required for some CPUs when using MSVC to
> avoid runtime failure.
>
> Other changes include:
> - Clang: use builtins on all platforms to provide exact intent to the
> compiler;
> - MSVC on ARM: use dedicated intrinsics;
> - x86/x64 fallback is now branchless and works in constant time.
>
> Bug: angleproject:4462
> Change-Id: I00fcabda1c842677d8cb4bfd280d932d0d10c0a5
> Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2102811
> Reviewed-by: Jamie Madill <jmadill@chromium.org>
> Reviewed-by: Geoff Lang <geofflang@chromium.org>
> Commit-Queue: Geoff Lang <geofflang@chromium.org>

Bug: angleproject:4462
Change-Id: Ia58ae00fa4230f77d981d4ba393fa3481806c5b0
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2113570
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Commit-Queue: Jamie Madill <jmadill@chromium.org>
  • Loading branch information
lexaknyazev authored and Commit Bot committed Mar 23, 2020
1 parent 4c7460a commit 31e36a6
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 41 deletions.
11 changes: 0 additions & 11 deletions src/common/mathutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,4 @@ void convert999E5toRGBFloats(unsigned int input, float *red, float *green, float
inputData->B * pow(2.0f, (int)inputData->E - g_sharedexp_bias - g_sharedexp_mantissabits);
}

int BitCountPolyfill(uint32_t bits)
{
int ones = 0;
while (bits)
{
ones += bool(bits & 1);
bits >>= 1;
}
return ones;
}

} // namespace gl
98 changes: 70 additions & 28 deletions src/common/mathutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -981,54 +981,96 @@ inline uint32_t BitfieldReverse(uint32_t value)
}

// Count the 1 bits.
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
# define ANGLE_HAS_BITCOUNT_32
#if defined(_MSC_VER) && !defined(__clang__)
# if defined(_M_IX86) || defined(_M_X64)
namespace priv
{
// Check POPCNT instruction support and cache the result.
// https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64#remarks
static const bool kHasPopcnt = [] {
int info[4];
__cpuid(&info[0], 1);
return static_cast<bool>(info[2] & 0x800000);
}();
} // namespace priv

// Polyfills for x86/x64 CPUs without POPCNT.
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
inline int BitCountPolyfill(uint32_t bits)
{
bits = bits - ((bits >> 1) & 0x55555555);
bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
bits = ((bits + (bits >> 4) & 0x0F0F0F0F) * 0x01010101) >> 24;
return static_cast<int>(bits);
}

inline int BitCountPolyfill(uint64_t bits)
{
bits = bits - ((bits >> 1) & 0x5555555555555555ull);
bits = (bits & 0x3333333333333333ull) + ((bits >> 2) & 0x3333333333333333ull);
bits = ((bits + (bits >> 4) & 0x0F0F0F0F0F0F0F0Full) * 0x0101010101010101ull) >> 56;
return static_cast<int>(bits);
}

inline int BitCount(uint32_t bits)
{
return static_cast<int>(__popcnt(bits));
if (priv::kHasPopcnt)
{
return static_cast<int>(__popcnt(bits));
}
return BitCountPolyfill(bits);
}
# if defined(_M_X64)
# define ANGLE_HAS_BITCOUNT_64

inline int BitCount(uint64_t bits)
{
return static_cast<int>(__popcnt64(bits));
if (priv::kHasPopcnt)
{
# if defined(_M_X64)
return static_cast<int>(__popcnt64(bits));
# else // x86
return static_cast<int>(__popcnt(static_cast<uint32_t>(bits >> 32)) +
__popcnt(static_cast<uint32_t>(bits)));
# endif // defined(_M_X64)
}
return BitCountPolyfill(bits);
}
# endif // defined(_M_X64)
#endif // defined(_M_IX86) || defined(_M_X64)

#if defined(ANGLE_PLATFORM_POSIX)
# define ANGLE_HAS_BITCOUNT_32
# elif defined(_M_ARM) || defined(_M_ARM64)

// MSVC's _CountOneBits* intrinsics are not defined for ARM64, moreover they do not use dedicated
// NEON instructions.

inline int BitCount(uint32_t bits)
{
return __builtin_popcount(bits);
// cast bits to 8x8 datatype and use VCNT on it
const uint8x8_t vsum = vcnt_u8(vcreate_u8(static_cast<uint64_t>(bits)));

// pairwise sums: 8x8 -> 16x4 -> 32x2
return static_cast<int>(vget_lane_u32(vpaddl_u16(vpaddl_u8(vsum)), 0));
}

# if defined(ANGLE_IS_64_BIT_CPU)
# define ANGLE_HAS_BITCOUNT_64
inline int BitCount(uint64_t bits)
{
return __builtin_popcountll(bits);
}
# endif // defined(ANGLE_IS_64_BIT_CPU)
#endif // defined(ANGLE_PLATFORM_POSIX)
// cast bits to 8x8 datatype and use VCNT on it
const uint8x8_t vsum = vcnt_u8(vcreate_u8(bits));

int BitCountPolyfill(uint32_t bits);
// pairwise sums: 8x8 -> 16x4 -> 32x2 -> 64x1
return static_cast<int>(vget_lane_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(vsum))), 0));
}
# endif // defined(_M_IX86) || defined(_M_X64)
#endif // defined(_MSC_VER) && !defined(__clang__)

#if !defined(ANGLE_HAS_BITCOUNT_32)
inline int BitCount(const uint32_t bits)
#if defined(ANGLE_PLATFORM_POSIX) || defined(__clang__)
inline int BitCount(uint32_t bits)
{
return BitCountPolyfill(bits);
return __builtin_popcount(bits);
}
#endif // !defined(ANGLE_HAS_BITCOUNT_32)

#if !defined(ANGLE_HAS_BITCOUNT_64)
inline int BitCount(const uint64_t bits)
inline int BitCount(uint64_t bits)
{
return BitCount(static_cast<uint32_t>(bits >> 32)) + BitCount(static_cast<uint32_t>(bits));
return __builtin_popcountll(bits);
}
#endif // !defined(ANGLE_HAS_BITCOUNT_64)
#undef ANGLE_HAS_BITCOUNT_32
#undef ANGLE_HAS_BITCOUNT_64
#endif // defined(ANGLE_PLATFORM_POSIX) || defined(__clang__)

inline int BitCount(uint8_t bits)
{
Expand Down
5 changes: 3 additions & 2 deletions src/common/mathutil_unittest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,12 @@ TEST(MathUtilTest, BitCount)
EXPECT_EQ(32, gl::BitCount(0xFFFFFFFFu));
EXPECT_EQ(10, gl::BitCount(0x17103121u));

#if defined(ANGLE_IS_64_BIT_CPU)
EXPECT_EQ(0, gl::BitCount(static_cast<uint64_t>(0ull)));
EXPECT_EQ(32, gl::BitCount(static_cast<uint64_t>(0xFFFFFFFFull)));
EXPECT_EQ(10, gl::BitCount(static_cast<uint64_t>(0x17103121ull)));
#endif // defined(ANGLE_IS_64_BIT_CPU)

EXPECT_EQ(33, gl::BitCount(static_cast<uint64_t>(0xFFFFFFFF80000000ull)));
EXPECT_EQ(11, gl::BitCount(static_cast<uint64_t>(0x1710312180000000ull)));
}

// Test ScanForward, which scans for the least significant 1 bit from a non-zero integer.
Expand Down

0 comments on commit 31e36a6

Please sign in to comment.