Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More simplifications and cleanup in affine_transform_sparse_input.h #4677

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 10 additions & 34 deletions src/nnue/layers/affine_transform_sparse_input.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,43 +34,15 @@
*/

namespace Stockfish::Eval::NNUE::Layers {
#if defined(__GNUC__) // GCC, Clang, ICC

static inline IndexType lsb_(std::uint32_t b) {
assert(b);
return IndexType(__builtin_ctzl(b));
}

#elif defined(_MSC_VER) // MSVC

static inline IndexType lsb_(std::uint32_t b) {
assert(b);
unsigned long idx;
_BitScanForward(&idx, b);
return (IndexType) idx;
}

#else // Compiler is neither GCC nor MSVC compatible

#error "Compiler not supported."

#endif


#if defined(USE_SSSE3)
alignas(CacheLineSize) static inline const std::array<std::array<std::uint16_t, 8>, 256> lookup_indices = [](){
std::array<std::array<std::uint16_t, 8>, 256> v{};
for (int i = 0; i < 256; ++i)
for (unsigned i = 0; i < 256; ++i)
{
int j = i;
int k = 0;
std::uint64_t j = i, k = 0;
while(j)
{
const IndexType lsbIndex = lsb_(std::uint32_t(j));
j &= j - 1;
v[i][k] = lsbIndex;
++k;
}
v[i][k++] = pop_lsb(j);
}
return v;
}();
Expand All @@ -83,7 +55,11 @@ namespace Stockfish::Eval::NNUE::Layers {
#define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
#elif defined (USE_AVX2)
using vec_t = __m256i;
#define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
#if defined(USE_VNNI) && !defined(USE_AVXVNNI)
#define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
#else
#define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
#endif
#elif defined (USE_SSSE3)
using vec_t = __m128i;
#define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
Expand All @@ -97,8 +73,8 @@ namespace Stockfish::Eval::NNUE::Layers {

const auto inputVector = reinterpret_cast<const vec_t*>(input);
IndexType count = 0;
__m128i base = _mm_set1_epi16(0);
__m128i increment = _mm_set1_epi16(8);
__m128i base = _mm_setzero_si128();
const __m128i increment = _mm_set1_epi16(8);
for (IndexType i = 0; i < NumChunks; ++i)
{
// bitmask of nonzero values in this chunk
Expand Down