Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Adds missing extract implementations for AVX512. #2926

Merged
merged 1 commit into from Jan 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions include/seqan3/utility/simd/algorithm.hpp
Expand Up @@ -161,6 +161,8 @@ constexpr simd_t extract_half(simd_t const & src)
return detail::extract_half_sse4<index>(src);
else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
return detail::extract_half_avx2<index>(src);
else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
remyschwab marked this conversation as resolved.
Show resolved Hide resolved
return detail::extract_half_avx512<index>(src);
else // Anything else
return detail::extract_impl<2>(src, index);
}
Expand Down Expand Up @@ -210,6 +212,10 @@ constexpr simd_t extract_quarter(simd_t const & src)
return detail::extract_quarter_sse4<index>(src);
else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
return detail::extract_quarter_avx2<index>(src);
#if defined(__AVX512DQ__)
remyschwab marked this conversation as resolved.
Show resolved Hide resolved
else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
return detail::extract_quarter_avx512<index>(src);
#endif // defined(__AVX512DQ__)
else // Anything else
return detail::extract_impl<4>(src, index);
}
Expand Down Expand Up @@ -257,6 +263,10 @@ constexpr simd_t extract_eighth(simd_t const & src)
return detail::extract_eighth_sse4<index>(src);
else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
return detail::extract_eighth_avx2<index>(src);
#if defined(__AVX512DQ__)
remyschwab marked this conversation as resolved.
Show resolved Hide resolved
else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
return detail::extract_eighth_avx512<index>(src);
#endif // defined(__AVX512DQ__)
else // Anything else
return detail::extract_impl<8>(src, index);
}
Expand Down
28 changes: 22 additions & 6 deletions include/seqan3/utility/simd/detail/simd_algorithm_avx512.hpp
Expand Up @@ -253,17 +253,33 @@ constexpr target_simd_t upcast_unsigned_avx512(source_simd_t const & src)
}
}

// TODO: not implemented and used yet, if you implement it don't forget to add it to seqan3::detail::extract_half
template <uint8_t index, simd::simd_concept simd_t>
constexpr simd_t extract_half_avx512(simd_t const & src);
constexpr simd_t extract_half_avx512(simd_t const & src)
{
return reinterpret_cast<simd_t>(_mm512_castsi256_si512(
_mm512_extracti64x4_epi64(reinterpret_cast<__m512i const &>(src), index)));
}

// TODO: not implemented and used yet, if you implement it don't forget to add it to seqan3::detail::extract_quarter
#if defined(__AVX512DQ__)
template <uint8_t index, simd::simd_concept simd_t>
constexpr simd_t extract_quarter_avx512(simd_t const & src);
constexpr simd_t extract_quarter_avx512(simd_t const & src)
Comment on lines +263 to +265
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are extract_quarter/eighth_avx512 wrapped in #if defined(__AVX512DQ__) but extract_half_avx512 isn't?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests existed already (for SSE and AVX2). I ran the tests on icebear with AVX512 support to check the results.

The additional check is because the extract quarter requires an intrinsic that is only available by the AVX-512 CD subset. Not all AVX512 platforms might have this additional intrinsics subset, but still you can use other AVX512 intrinsics. The extract half doesn't need it because it only works with AVX512-F which is the foundational intrinsics set used by all that support AVX512.

{
return reinterpret_cast<simd_t>(_mm512_castsi128_si512(
_mm512_extracti64x2_epi64(reinterpret_cast<__m512i const &>(src), index)));
}

// TODO: not implemented and used yet, if you implement it don't forget to add it to seqan3::detail::extract_eighth
template <uint8_t index, simd::simd_concept simd_t>
constexpr simd_t extract_eighth_avx512(simd_t const & src);
constexpr simd_t extract_eighth_avx512(simd_t const & src)
{
__m512i tmp = reinterpret_cast<__m512i const &>(src);

// for uneven index exchange higher 64 bits with lower 64 bits for each 128 bit lane.
if constexpr (index % 2 == 1)
tmp = _mm512_shuffle_epi32(tmp, 0b0100'1110); // := [1, 0, 3, 2].

return reinterpret_cast<simd_t>(_mm512_castsi128_si512(_mm512_extracti64x2_epi64(tmp, index / 2)));
}
#endif // defined(__AVX512DQ__)

} // namespace seqan3::detail

Expand Down