Skip to content

Commit

Permalink
Remove unnecessary __at_align32__ in int_elementwise_binary_256 (#45470)
Browse files Browse the repository at this point in the history
Summary:
They were added in 4b3046e based on a
misunderstanding of `_mm256_storeu_si256`, but they
are actually unnecessary. The [document][1] of `_mm256_storeu_si256` says:

> Moves values from a integer vector to an **unaligned** memory location.

In this case, it's better to remove the `__at_align32__` qualifier to
leave the compiler and linker more flexibility to optimize.

[1]: https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html

Close #44810

Pull Request resolved: #45470

Reviewed By: zhangguanheng66

Differential Revision: D23980060

Pulled By: glaringlee

fbshipit-source-id: 12b3558b76c6e81d88a72081060fdb8674464768
  • Loading branch information
xuhdev authored and facebook-github-bot committed Sep 29, 2020
1 parent 6e55a26 commit 0df99ad
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions aten/src/ATen/cpu/vec256/vec256_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ class Vec256<int64_t> : public Vec256i {
}
void store(void* ptr, int count = size()) const {
if (count == size()) {
// ptr need not to be aligned here. See
// https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
} else if (count > 0) {
__at_align32__ int64_t tmp_values[size()];
Expand Down Expand Up @@ -228,6 +230,8 @@ class Vec256<int32_t> : public Vec256i {
}
void store(void* ptr, int count = size()) const {
if (count == size()) {
// ptr need not to be aligned here. See
// https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
} else if (count > 0) {
__at_align32__ int32_t tmp_values[size()];
Expand Down Expand Up @@ -449,6 +453,8 @@ class Vec256<int16_t> : public Vec256i {
}
void store(void* ptr, int count = size()) const {
if (count == size()) {
// ptr need not to be aligned here. See
// https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
} else if (count > 0) {
__at_align32__ int16_t tmp_values[size()];
Expand Down Expand Up @@ -699,6 +705,8 @@ class Vec256<int8_t> : public Vec256i {
}
void store(void* ptr, int count = size()) const {
if (count == size()) {
// ptr need not to be aligned here. See
// https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions/intrinsics-for-load-and-store-operations-1/mm256-storeu-si256.html
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
} else if (count > 0) {
__at_align32__ int8_t tmp_values[size()];
Expand Down Expand Up @@ -879,8 +887,8 @@ Vec256<int16_t> inline operator*(const Vec256<int16_t>& a, const Vec256<int16_t>

template <typename T, typename Op>
Vec256<T> inline int_elementwise_binary_256(const Vec256<T>& a, const Vec256<T>& b, Op op) {
__at_align32__ T values_a[Vec256<T>::size()];
__at_align32__ T values_b[Vec256<T>::size()];
T values_a[Vec256<T>::size()];
T values_b[Vec256<T>::size()];
a.store(values_a);
b.store(values_b);
for (int i = 0; i != Vec256<T>::size(); i++) {
Expand Down

0 comments on commit 0df99ad

Please sign in to comment.