Skip to content

Commit

Permalink
fallback implementation cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ldh4 committed Nov 1, 2023
1 parent 54f2e7f commit b76e1dc
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 51 deletions.
40 changes: 31 additions & 9 deletions simd/src/Kokkos_SIMD_AVX2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,11 @@ class simd<std::int32_t, simd_abi::avx2_fixed_size<4>> {
return simd(
_mm_add_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs)));
}
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*(
simd const& lhs, simd const& rhs) noexcept {
return simd(
_mm_mullo_epi32(static_cast<__m128i>(lhs), static_cast<__m128i>(rhs)));
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>(
simd const& lhs, int rhs) noexcept {
Expand Down Expand Up @@ -1278,6 +1283,13 @@ class simd<std::int64_t, simd_abi::avx2_fixed_size<4>> {
_mm256_add_epi64(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs)));
}

// fallback simd multiplication using generator constructor
// multiplying vectors of 64-bit signed integers is not available in AVX2
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*(
simd const& lhs, simd const& rhs) noexcept {
return simd([&](std::size_t i) { return lhs[i] * rhs[i]; });
}

// AVX2 only has eq and gt comparisons for int64
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type
operator==(simd const& lhs, simd const& rhs) noexcept {
Expand Down Expand Up @@ -1306,17 +1318,19 @@ class simd<std::int64_t, simd_abi::avx2_fixed_size<4>> {
return !(lhs == rhs);
}

// fallback simd shift right arithmetic using generator constructor
// Shift right arithmetic for 64bit packed ints is not availalbe in AVX2
// [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd(
// simd const& lhs, int rhs) noexcept {
// return simd(_mm256_srai_epi64(static_cast<__m256i>(lhs), rhs));
// }
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>(
simd const& lhs, int rhs) noexcept {
return simd([&](std::size_t i) { return lhs[i] >> rhs; });
}

// [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd(
// simd const& lhs, simd const& rhs) noexcept {
// return simd(_mm256_srav_epi64(static_cast<__m256i>(lhs),
// static_cast<__m256i>(rhs))));
// }
// fallback simd shift right arithmetic using generator constructor
// Shift right arithmetic for 64bit packed ints is not availalbe in AVX2
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>(
simd const& lhs, simd const& rhs) noexcept {
return simd([&](std::size_t i) { return lhs[i] >> rhs[i]; });
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<(
simd const& lhs, int rhs) noexcept {
Expand Down Expand Up @@ -1460,6 +1474,14 @@ class simd<std::uint64_t, simd_abi::avx2_fixed_size<4>> {
return simd(
_mm256_sub_epi64(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs)));
}

// fallback simd multiplication using generator constructor
// multiplying vectors of 64-bit unsigned integers is not available in AVX2
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*(
simd const& lhs, simd const& rhs) noexcept {
return simd([&](std::size_t i) { return lhs[i] * rhs[i]; });
}

[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>(
simd const& lhs, int rhs) noexcept {
return _mm256_srli_epi64(static_cast<__m256i>(lhs), rhs);
Expand Down
42 changes: 0 additions & 42 deletions simd/src/Kokkos_SIMD_Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,48 +117,6 @@ template <class T>
return const_where_expression(mask, value);
}

// fallback simd multiplication using generator constructor
// At the time of this writing, this fallback is only used
// to multiply vectors of 64-bit signed integers for the AVX2 backend

template <class T, class Abi>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi> operator*(
simd<T, Abi> const& lhs, simd<T, Abi> const& rhs) {
return simd<T, Abi>([&](std::size_t i) { return lhs[i] * rhs[i]; });
}

// fallback simd shift using generator constructor
// At the time of this edit, only the fallback for shift vectors of
// 64-bit signed integers for the AVX2 backend is used

template <typename T, typename Abi,
typename = std::enable_if_t<std::is_integral_v<T>>>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi> operator>>(
simd<T, Abi> const& lhs, int rhs) {
return simd<T, Abi>([&](std::size_t i) { return lhs[i] >> rhs; });
}

template <typename T, typename Abi,
typename = std::enable_if_t<std::is_integral_v<T>>>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi> operator<<(
simd<T, Abi> const& lhs, int rhs) {
return simd<T, Abi>([&](std::size_t i) { return lhs[i] << rhs; });
}

template <typename T, typename Abi,
typename = std::enable_if_t<std::is_integral_v<T>>>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi> operator>>(
simd<T, Abi> const& lhs, simd<T, Abi> const& rhs) {
return simd<T, Abi>([&](std::size_t i) { return lhs[i] >> rhs[i]; });
}

template <typename T, typename Abi,
typename = std::enable_if_t<std::is_integral_v<T>>>
[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd<T, Abi> operator<<(
simd<T, Abi> const& lhs, simd<T, Abi> const& rhs) {
return simd<T, Abi>([&](std::size_t i) { return lhs[i] << rhs[i]; });
}

// The code below provides:
// operator@(simd<T, Abi>, Arithmetic)
// operator@(Arithmetic, simd<T, Abi>)
Expand Down

0 comments on commit b76e1dc

Please sign in to comment.