Fix Clang compilation error with Lib ATen for ppc64le (#106446)

This patch fixes error while compiling with Clang for ppc64le I have used clang version 15.0.7 Errors are as follow: ``` No matching function for call to 'vec_sel’ No matching function for call to 'vec_splats' Excess elements in scalar initializer Use of undeclared identifier 'vec_vsubudm' Fix for multiple error within int64_t DEFINE_MEMBER_OP_AND_ONE ``` References: - https://releases.llvm.org/9.0.0/tools/clang/docs/AttributeReference.html - https://reviews.llvm.org/D81083 Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com> Pull Request resolved: #106446 Approved by: https://github.com/malfet
pytorch · Nov 9, 2023 · 4da5d4b · 4da5d4b
1 parent 289d887
commit 4da5d4b
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 7 deletions.
diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h
@@ -146,8 +146,8 @@ class Vectorized<ComplexFlt> {
     auto mask_complex = Vectorized<ComplexFlt>(
         vec_mergeh(mask._vec0, mask._vec0), vec_mergeh(mask._vec1, mask._vec1));
     return {
-        vec_sel(a._vec0, b._vec0, mask_complex._vec0),
-        vec_sel(a._vec1, b._vec1, mask_complex._vec1),
+        vec_sel(a._vec0, b._vec0, reinterpret_cast<vbool32>(mask_complex._vec0)),
+        vec_sel(a._vec1, b._vec1, reinterpret_cast<vbool32>(mask_complex._vec1)),
     };
   }
 
@@ -156,8 +156,8 @@ class Vectorized<ComplexFlt> {
       const Vectorized<ComplexFlt>& b,
       const Vectorized<ComplexFlt>& mask) {
     return {
-        vec_sel(a._vec0, b._vec0, mask._vec0),
-        vec_sel(a._vec1, b._vec1, mask._vec1),
+        vec_sel(a._vec0, b._vec0, reinterpret_cast<vbool32>(mask._vec0)),
+        vec_sel(a._vec1, b._vec1, reinterpret_cast<vbool32>(mask._vec1)),
     };
   }
 

diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
@@ -28,6 +28,7 @@ class Vectorized<int64_t> {
   using vec_internal_type = vint64;
   using vec_internal_mask_type = vbool64;
   using size_type = int;
+  using ElementType = signed long long;
   static constexpr size_type size() {
     return 4;
   }

diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
@@ -3,6 +3,22 @@
 #include <c10/macros/Macros.h>
 #include <ATen/cpu/vec/intrinsics.h>
 
+#if defined(__clang__)
+typedef __vector __bool char vbool8;
+typedef __vector __bool short vbool16;
+typedef __vector __bool int vbool32;
+typedef __vector __bool long long vbool64;
+using vint8    = __attribute__((vector_size(16))) signed char;
+using vint16   = __attribute__((vector_size(16))) signed short;
+using vint32   = __attribute__((vector_size(16))) signed int;
+using vint64   = __attribute__((vector_size(16))) signed long long;
+using vuint8   = __attribute__((vector_size(16))) unsigned char;
+using vuint16  = __attribute__((vector_size(16))) unsigned short;
+using vuint32  = __attribute__((vector_size(16))) unsigned int;
+using vuint64  = __attribute__((vector_size(16))) unsigned long long;
+using vfloat32 = __attribute__((vector_size(16))) float;
+using vfloat64 = __attribute__((vector_size(16))) double;
+#else
 using vbool8   =  __attribute__((altivec(vector__))) __attribute__((altivec(bool__))) char;
 using vbool16  =  __attribute__((altivec(vector__))) __attribute__((altivec(bool__))) short;
 using vbool32  =  __attribute__((altivec(vector__))) __attribute__((altivec(bool__))) int;
@@ -17,6 +33,7 @@ using vuint32  =  __attribute__((altivec(vector__)))  unsigned  int;
 using vuint64  =  __attribute__((altivec(vector__)))  unsigned long long;
 using vfloat32 =  __attribute__((altivec(vector__)))  float;
 using vfloat64 =  __attribute__((altivec(vector__)))  double;
+#endif
 
 #if !defined(vec_float)
 C10_ALWAYS_INLINE vfloat32 vec_float(const vint32& vec_in) {
@@ -64,8 +81,7 @@ C10_ALWAYS_INLINE vint32 vec_neg(const vint32& vec_in) {
 }
 
 C10_ALWAYS_INLINE vint64 vec_neg(const vint64& vec_in) {
-  vint64 vint0 = {0, 0};
-  return vec_vsubudm(vint0, vec_in);
+  return -vec_in;
 }
 #endif
 
@@ -84,7 +100,11 @@ vec_sldw_aux(const vfloat32& vec_in0, const vfloat32& vec_in1) {
 #endif
 
 #define vec_not(a) vec_nor(a, a)
-
+#if defined(__clang__) && !defined(vec_splats)
+C10_ALWAYS_INLINE vint64 vec_splats(const int64_t& a) {
+  return vec_splats(a);
+}
+#endif
 // Vectorized min/max which return a if any operand is nan
 template <class T>
 C10_ALWAYS_INLINE T vec_min_nan(const T& a, const T& b) {