Skip to content

Commit

Permalink
WIP SIMD'ify Node4 insert position search
Browse files Browse the repository at this point in the history
  • Loading branch information
laurynas-biveinis committed Jun 21, 2021
1 parent 4a57c68 commit ca9033c
Showing 1 changed file with 32 additions and 0 deletions.
32 changes: 32 additions & 0 deletions art_internal_impl.hpp
Expand Up @@ -82,6 +82,12 @@ inline auto _mm_cmple_epu8(__m128i x, __m128i y) noexcept {
return _mm_cmpeq_epi8(_mm_max_epu8(y, x), y);
}

// Stolen from https://stackoverflow.com/a/24234695/80458
inline auto _mm_cmplt_epu8(__m128i x, __m128i y) noexcept {
return _mm_cmplt_epi8(_mm_add_epi8(x, _mm_set1_epi8(-128)),
_mm_add_epi8(y, _mm_set1_epi8(-128)));
}

#else // #ifdef __x86_64

// From public domain
Expand Down Expand Up @@ -1058,11 +1064,24 @@ class basic_inode_4 : public basic_inode_4_parent<ArtPolicy> {
const auto key_byte =
static_cast<std::uint8_t>(leaf_type::key(child.get())[depth]);

#if __x86_64
const auto replicated_insert_key =
_mm_set1_epi8(static_cast<char>(key_byte));
const auto keys_in_sse_reg =
_mm_cvtsi32_si128(static_cast<std::int32_t>(keys.integer.load()));
const auto lt_node_key_positions =
_mm_cmplt_epu8(keys_in_sse_reg, replicated_insert_key);
const auto bit_field =
static_cast<unsigned>(_mm_movemask_epi8(lt_node_key_positions)) & 0xFU;
const auto insert_pos_index =
static_cast<unsigned>(__builtin_popcount(bit_field));
#else
const auto first_lt = ((keys.integer & 0xFFU) < key_byte) ? 1 : 0;
const auto second_lt = (((keys.integer >> 8U) & 0xFFU) < key_byte) ? 1 : 0;
const auto third_lt = ((keys.integer >> 16U) & 0xFFU) < key_byte ? 1 : 0;
const auto insert_pos_index =
static_cast<unsigned>(first_lt + second_lt + third_lt);
#endif

for (typename decltype(keys.byte_array)::size_type i = children_count;
i > insert_pos_index; --i) {
Expand Down Expand Up @@ -1256,13 +1275,26 @@ class basic_inode_16 : public basic_inode_16_parent<ArtPolicy> {
const auto key_byte =
static_cast<std::uint8_t>(leaf_type::key(child.get())[depth]);

#if __x86_64
const auto replicated_insert_key =
_mm_set1_epi8(static_cast<char>(key_byte));
const auto keys_in_sse_reg = _mm_cvtsi32_si128(
static_cast<std::int32_t>(source_node->keys.integer.load()));
const auto lt_node_key_positions =
_mm_cmplt_epu8(keys_in_sse_reg, replicated_insert_key);
const auto bit_field =
static_cast<unsigned>(_mm_movemask_epi8(lt_node_key_positions)) & 0xFU;
const auto insert_pos_index =
static_cast<unsigned>(__builtin_popcount(bit_field));
#else
const auto keys_integer = source_node->keys.integer.load();
const auto first_lt = ((keys_integer & 0xFFU) < key_byte) ? 1 : 0;
const auto second_lt = (((keys_integer >> 8U) & 0xFFU) < key_byte) ? 1 : 0;
const auto third_lt = (((keys_integer >> 16U) & 0xFFU) < key_byte) ? 1 : 0;
const auto fourth_lt = (((keys_integer >> 24U) & 0xFFU) < key_byte) ? 1 : 0;
const auto insert_pos_index =
static_cast<unsigned>(first_lt + second_lt + third_lt + fourth_lt);
#endif

unsigned i = 0;
for (; i < insert_pos_index; ++i) {
Expand Down

0 comments on commit ca9033c

Please sign in to comment.