Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use C++20 std utils for counting bits and bit rotates #8024

Merged
merged 4 commits into from
Apr 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Utilities/Thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2406,7 +2406,7 @@ void thread_ctrl::set_thread_affinity_mask(u64 mask)
SetThreadAffinityMask(_this_thread, mask);
#elif __APPLE__
// Supports only one core
thread_affinity_policy_data_t policy = { static_cast<integer_t>(utils::cnttz64(mask)) };
thread_affinity_policy_data_t policy = { static_cast<integer_t>(std::countr_zero(mask)) };
thread_port_t mach_thread = pthread_mach_thread_np(pthread_self());
thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, reinterpret_cast<thread_policy_t>(&policy), 1);
#elif defined(__linux__) || defined(__DragonFly__) || defined(__FreeBSD__)
Expand Down
192 changes: 0 additions & 192 deletions Utilities/asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,162 +4,9 @@

namespace utils
{
inline u32 cntlz32(u32 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanReverse(&res, arg) || nonzero ? res ^ 31 : 32;
#elif __LZCNT__
return _lzcnt_u32(arg);
#else
return arg || nonzero ? __builtin_clz(arg) : 32;
#endif
}

inline u64 cntlz64(u64 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanReverse64(&res, arg) || nonzero ? res ^ 63 : 64;
#elif __LZCNT__
return _lzcnt_u64(arg);
#else
return arg || nonzero ? __builtin_clzll(arg) : 64;
#endif
}

inline u32 cnttz32(u32 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanForward(&res, arg) || nonzero ? res : 32;
#elif __BMI__
return _tzcnt_u32(arg);
#else
return arg || nonzero ? __builtin_ctz(arg) : 32;
#endif
}

inline u64 cnttz64(u64 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanForward64(&res, arg) || nonzero ? res : 64;
#elif __BMI__
return _tzcnt_u64(arg);
#else
return arg || nonzero ? __builtin_ctzll(arg) : 64;
#endif
}

inline u8 popcnt32(u32 arg)
{
#ifdef _MSC_VER
const u32 a1 = arg & 0x55555555;
const u32 a2 = (arg >> 1) & 0x55555555;
const u32 a3 = a1 + a2;
const u32 b1 = a3 & 0x33333333;
const u32 b2 = (a3 >> 2) & 0x33333333;
const u32 b3 = b1 + b2;
const u32 c3 = (b3 + (b3 >> 4)) & 0x0f0f0f0f;
const u32 d3 = c3 + (c3 >> 8);
return static_cast<u8>(d3 + (d3 >> 16));
#else
return __builtin_popcount(arg);
#endif
}

// Rotate helpers
#if defined(__GNUG__)

inline u8 rol8(u8 x, u8 n)
{
#if __has_builtin(__builtin_rotateleft8)
return __builtin_rotateleft8(x, n);
#else
u8 result = x;
__asm__("rolb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}

inline u8 ror8(u8 x, u8 n)
{
#if __has_builtin(__builtin_rotateright8)
return __builtin_rotateright8(x, n);
#else
u8 result = x;
__asm__("rorb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}

inline u16 rol16(u16 x, u16 n)
{
#if __has_builtin(__builtin_rotateleft16)
return __builtin_rotateleft16(x, n);
#else
u16 result = x;
__asm__("rolw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}

inline u16 ror16(u16 x, u16 n)
{
#if __has_builtin(__builtin_rotateright16)
return __builtin_rotateright16(x, n);
#else
u16 result = x;
__asm__("rorw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}

inline u32 rol32(u32 x, u32 n)
{
#if __has_builtin(__builtin_rotateleft32)
return __builtin_rotateleft32(x, n);
#else
u32 result = x;
__asm__("roll %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}

inline u32 ror32(u32 x, u32 n)
{
#if __has_builtin(__builtin_rotateright32)
return __builtin_rotateright32(x, n);
#else
u32 result = x;
__asm__("rorl %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}

inline u64 rol64(u64 x, u64 n)
{
#if __has_builtin(__builtin_rotateleft64)
return __builtin_rotateleft64(x, n);
#else
u64 result = x;
__asm__("rolq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}

inline u64 ror64(u64 x, u64 n)
{
#if __has_builtin(__builtin_rotateright64)
return __builtin_rotateright64(x, n);
#else
u64 result = x;
__asm__("rorq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}

constexpr u64 umulh64(u64 a, u64 b)
{
const __uint128_t x = a;
Expand Down Expand Up @@ -201,45 +48,6 @@ namespace utils
}

#elif defined(_MSC_VER)
inline u8 rol8(u8 x, u8 n)
{
return _rotl8(x, n);
}

inline u8 ror8(u8 x, u8 n)
{
return _rotr8(x, n);
}

inline u16 rol16(u16 x, u16 n)
{
return _rotl16(x, (u8)n);
}

inline u16 ror16(u16 x, u16 n)
{
return _rotr16(x, (u8)n);
}

inline u32 rol32(u32 x, u32 n)
{
return _rotl(x, (int)n);
}

inline u32 ror32(u32 x, u32 n)
{
return _rotr(x, (int)n);
}

inline u64 rol64(u64 x, u64 n)
{
return _rotl64(x, (int)n);
}

inline u64 ror64(u64 x, u64 n)
{
return _rotr64(x, (int)n);
}

inline u64 umulh64(u64 x, u64 y)
{
Expand Down
4 changes: 2 additions & 2 deletions Utilities/cfmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)

const auto write_octal = [&](u64 value, u64 min_num)
{
out.resize(out.size() + std::max<u64>(min_num, 66 / 3 - (utils::cntlz64(value | 1, true) + 2) / 3), '0');
out.resize(out.size() + std::max<u64>(min_num, 66 / 3 - (std::countl_zero<u64>(value | 1) + 2) / 3), '0');

// Write in reversed order
for (auto i = out.rbegin(); value; i++, value /= 8)
Expand All @@ -70,7 +70,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)

const auto write_hex = [&](u64 value, bool upper, u64 min_num)
{
out.resize(out.size() + std::max<u64>(min_num, 64 / 4 - utils::cntlz64(value | 1, true) / 4), '0');
out.resize(out.size() + std::max<u64>(min_num, 64 / 4 - std::countl_zero<u64>(value | 1) / 4), '0');

// Write in reversed order
for (auto i = out.rbegin(); value; i++, value /= 16)
Expand Down
7 changes: 5 additions & 2 deletions Utilities/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
#include <limits>
#include <array>

#if __has_include(<bit>)
#include <bit>
#ifdef _MSC_VER
#ifndef __cpp_lib_bitops
#define __cpp_lib_bitops
#endif
#endif
#include <bit>

#ifndef __has_builtin
#define __has_builtin(x) 0
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/CPU/CPUThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ struct cpu_counter
if (ok) [[likely]]
{
// Get actual slot number
array_slot = i * 64 + utils::cnttz64(~bits, false);
array_slot = i * 64 + std::countr_one(bits);
break;
}
}
Expand Down Expand Up @@ -314,7 +314,7 @@ void for_all_cpu(F&& func) noexcept
{
for (u64 bits = ctr->cpu_array_bits[i]; bits; bits &= bits - 1)
{
const u64 index = i * 64 + utils::cnttz64(bits, true);
const u64 index = i * 64 + std::countr_zero(bits);

if (cpu_thread* cpu = ctr->cpu_array[index].load())
{
Expand Down
5 changes: 2 additions & 3 deletions rpcs3/Emu/Cell/Modules/cellSpurs.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "stdafx.h"
#include "Emu/IdManager.h"
#include "Emu/Cell/PPUModule.h"
#include "Utilities/asm.h"
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/lv2/sys_lwmutex.h"
#include "Emu/Cell/lv2/sys_lwcond.h"
Expand Down Expand Up @@ -2114,7 +2113,7 @@ s32 _spurs::add_workload(vm::ptr<CellSpurs> spurs, vm::ptr<u32> wid, vm::cptr<vo
const u32 wmax = spurs->flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed
spurs->wklEnabled.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
{
wnum = utils::cntlz32(~value); // found empty position
wnum = std::countl_one<u32>(value); // found empty position
if (wnum < wmax)
{
value |= (0x80000000 >> wnum); // set workload bit
Expand Down Expand Up @@ -2237,7 +2236,7 @@ s32 _spurs::add_workload(vm::ptr<CellSpurs> spurs, vm::ptr<u32> wid, vm::cptr<vo
else
{
k |= 0x80000000 >> current->uniqueId;
res_wkl = utils::cntlz32(~k);
res_wkl = std::countl_one<u32>(k);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions rpcs3/Emu/Cell/Modules/cellSync.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "stdafx.h"
#include "Emu/Cell/PPUModule.h"
#include "Utilities/asm.h"
#include "Emu/Cell/lv2/sys_event.h"
#include "Emu/Cell/lv2/sys_process.h"
#include "cellSync.h"
Expand Down Expand Up @@ -1017,7 +1016,7 @@ error_code _cellSyncLFQueueCompletePushPointer(ppu_thread& ppu, vm::ptr<CellSync
{
var9_ = 1 << var9_;
}
s32 var9 = utils::cntlz32(static_cast<u16>(~(var9_ | push3.m_h6))) - 16; // count leading zeros in u16
s32 var9 = std::countl_zero<u32>(static_cast<u16>(~(var9_ | push3.m_h6))) - 16; // count leading zeros in u16

s32 var5 = push3.m_h6 | var9_;
if (var9 & 0x30)
Expand Down Expand Up @@ -1317,7 +1316,8 @@ error_code _cellSyncLFQueueCompletePopPointer(ppu_thread& ppu, vm::ptr<CellSyncL
{
var9_ = 1 << var9_;
}
s32 var9 = utils::cntlz32(static_cast<u16>(~(var9_ | pop3.m_h2))) - 16; // count leading zeros in u16

s32 var9 = std::countl_zero<u32>(static_cast<u16>(~(var9_ | pop3.m_h2))) - 16; // count leading zeros in u16

s32 var5 = pop3.m_h2 | var9_;
if (var9 & 0x30)
Expand Down