Skip to content

Commit

Permalink
Update nnue_feature_transformer.h
Browse files Browse the repository at this point in the history
Unroll update_accumulator_refresh to process two
active indices simultaneously.

The compiler might not unroll effectively because
the number of active indices isn't known at
compile time.

STC https://tests.stockfishchess.org/tests/view/65faa8850ec64f0526c4fca9
LLR: 2.93 (-2.94,2.94) <0.00,2.00>
Total: 130464 W: 33882 L: 33431 D: 63151
Ptnml(0-2): 539, 14591, 34501, 15082, 519

closes #5125

No functional change
  • Loading branch information
mstembera authored and Disservin committed Mar 26, 2024
1 parent d99f895 commit 5001d49
Showing 1 changed file with 31 additions and 2 deletions.
33 changes: 31 additions & 2 deletions src/nnue/nnue_feature_transformer.h
Expand Up @@ -619,8 +619,22 @@ class FeatureTransformer {
for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = biasesTile[k];

for (const auto index : active)
int i = 0;
for (; i < int(active.size()) - 1; i += 2)
{
IndexType index0 = active[i];
IndexType index1 = active[i + 1];
const IndexType offset0 = HalfDimensions * index0 + j * TileHeight;
const IndexType offset1 = HalfDimensions * index1 + j * TileHeight;
auto column0 = reinterpret_cast<const vec_t*>(&weights[offset0]);
auto column1 = reinterpret_cast<const vec_t*>(&weights[offset1]);

for (unsigned k = 0; k < NumRegs; ++k)
acc[k] = vec_add_16(acc[k], vec_add_16(column0[k], column1[k]));
}
for (; i < int(active.size()); ++i)
{
IndexType index = active[i];
const IndexType offset = HalfDimensions * index + j * TileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights[offset]);

Expand All @@ -639,8 +653,23 @@ class FeatureTransformer {
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
psqt[k] = vec_zero_psqt();

for (const auto index : active)
int i = 0;
for (; i < int(active.size()) - 1; i += 2)
{
IndexType index0 = active[i];
IndexType index1 = active[i + 1];
const IndexType offset0 = PSQTBuckets * index0 + j * PsqtTileHeight;
const IndexType offset1 = PSQTBuckets * index1 + j * PsqtTileHeight;
auto columnPsqt0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset0]);
auto columnPsqt1 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset1]);

for (std::size_t k = 0; k < NumPsqtRegs; ++k)
psqt[k] =
vec_add_psqt_32(psqt[k], vec_add_psqt_32(columnPsqt0[k], columnPsqt1[k]));
}
for (; i < int(active.size()); ++i)
{
IndexType index = active[i];
const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight;
auto columnPsqt = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offset]);

Expand Down

0 comments on commit 5001d49

Please sign in to comment.