Skip to content

Commit

Permalink
[PowerPC] [Clang] [AltiVec] The second parameter of vec_sr function s…
Browse files Browse the repository at this point in the history
…hould be modulo the number of bits in the element

The second parameter of vec_sr function is representing shift bits and it should be modulo the number of bits in the element like what vec_sl does now. 
This is actually required by the ABI:

Each element of the result vector is the result of logically right shifting the corresponding
element of ARG1 by the number of bits specified by the value of the corresponding
element of ARG2, modulo the number of bits in the element. The bits that are shifted out
are replaced by zeros.

Differential Revision: https://reviews.llvm.org/D54087

llvm-svn: 346471
  • Loading branch information
wuzish committed Nov 9, 2018
1 parent e2f6896 commit 71c35e1
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 57 deletions.
60 changes: 31 additions & 29 deletions clang/lib/Headers/altivec.h
Expand Up @@ -9492,49 +9492,51 @@ vec_splat_u32(signed char __a) {

/* vec_sr */

static __inline__ vector signed char __ATTRS_o_ai
vec_sr(vector signed char __a, vector unsigned char __b) {
vector unsigned char __res = (vector unsigned char)__a >> __b;
return (vector signed char)__res;
}

// vec_sr does modulo arithmetic on __b first, so __b is allowed to be more
// than the length of __a.
static __inline__ vector unsigned char __ATTRS_o_ai
vec_sr(vector unsigned char __a, vector unsigned char __b) {
return __a >> __b;
return __a >>
(__b % (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__));
}

static __inline__ vector signed short __ATTRS_o_ai
vec_sr(vector signed short __a, vector unsigned short __b) {
vector unsigned short __res = (vector unsigned short)__a >> __b;
return (vector signed short)__res;
static __inline__ vector signed char __ATTRS_o_ai
vec_sr(vector signed char __a, vector unsigned char __b) {
return (vector signed char)vec_sr((vector unsigned char)__a, __b);
}

static __inline__ vector unsigned short __ATTRS_o_ai
vec_sr(vector unsigned short __a, vector unsigned short __b) {
return __a >> __b;
return __a >>
(__b % (vector unsigned short)(sizeof(unsigned short) * __CHAR_BIT__));
}

static __inline__ vector signed int __ATTRS_o_ai
vec_sr(vector signed int __a, vector unsigned int __b) {
vector unsigned int __res = (vector unsigned int)__a >> __b;
return (vector signed int)__res;
static __inline__ vector short __ATTRS_o_ai vec_sr(vector short __a,
vector unsigned short __b) {
return (vector short)vec_sr((vector unsigned short)__a, __b);
}

static __inline__ vector unsigned int __ATTRS_o_ai
vec_sr(vector unsigned int __a, vector unsigned int __b) {
return __a >> __b;
return __a >>
(__b % (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__));
}

#ifdef __POWER8_VECTOR__
static __inline__ vector signed long long __ATTRS_o_ai
vec_sr(vector signed long long __a, vector unsigned long long __b) {
vector unsigned long long __res = (vector unsigned long long)__a >> __b;
return (vector signed long long)__res;
static __inline__ vector int __ATTRS_o_ai vec_sr(vector int __a,
vector unsigned int __b) {
return (vector int)vec_sr((vector unsigned int)__a, __b);
}

#ifdef __POWER8_VECTOR__
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
return __a >> __b;
return __a >> (__b % (vector unsigned long long)(sizeof(unsigned long long) *
__CHAR_BIT__));
}

static __inline__ vector long long __ATTRS_o_ai
vec_sr(vector long long __a, vector unsigned long long __b) {
return (vector long long)vec_sr((vector unsigned long long)__a, __b);
}
#endif

Expand All @@ -9544,12 +9546,12 @@ vec_sr(vector unsigned long long __a, vector unsigned long long __b) {

static __inline__ vector signed char __ATTRS_o_ai
vec_vsrb(vector signed char __a, vector unsigned char __b) {
return __a >> (vector signed char)__b;
return vec_sr(__a, __b);
}

static __inline__ vector unsigned char __ATTRS_o_ai
vec_vsrb(vector unsigned char __a, vector unsigned char __b) {
return __a >> __b;
return vec_sr(__a, __b);
}

/* vec_vsrh */
Expand All @@ -9558,12 +9560,12 @@ vec_vsrb(vector unsigned char __a, vector unsigned char __b) {

static __inline__ vector short __ATTRS_o_ai
vec_vsrh(vector short __a, vector unsigned short __b) {
return __a >> (vector short)__b;
return vec_sr(__a, __b);
}

static __inline__ vector unsigned short __ATTRS_o_ai
vec_vsrh(vector unsigned short __a, vector unsigned short __b) {
return __a >> __b;
return vec_sr(__a, __b);
}

/* vec_vsrw */
Expand All @@ -9572,12 +9574,12 @@ vec_vsrh(vector unsigned short __a, vector unsigned short __b) {

static __inline__ vector int __ATTRS_o_ai vec_vsrw(vector int __a,
vector unsigned int __b) {
return __a >> (vector int)__b;
return vec_sr(__a, __b);
}

static __inline__ vector unsigned int __ATTRS_o_ai
vec_vsrw(vector unsigned int __a, vector unsigned int __b) {
return __a >> __b;
return vec_sr(__a, __b);
}

/* vec_sra */
Expand Down
72 changes: 48 additions & 24 deletions clang/test/CodeGen/builtins-ppc-altivec.c
Expand Up @@ -4256,52 +4256,76 @@ void test6() {

/* vec_sr */
res_vsc = vec_sr(vsc, vuc);
// CHECK: lshr <16 x i8>
// CHECK-LE: lshr <16 x i8>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vuc = vec_sr(vuc, vuc);
// CHECK: lshr <16 x i8>
// CHECK-LE: lshr <16 x i8>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vs = vec_sr(vs, vus);
// CHECK: lshr <8 x i16>
// CHECK-LE: lshr <8 x i16>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
// CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
// CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vus = vec_sr(vus, vus);
// CHECK: lshr <8 x i16>
// CHECK-LE: lshr <8 x i16>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
// CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
// CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vi = vec_sr(vi, vui);
// CHECK: lshr <4 x i32>
// CHECK-LE: lshr <4 x i32>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
// CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
// CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vui = vec_sr(vui, vui);
// CHECK: lshr <4 x i32>
// CHECK-LE: lshr <4 x i32>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
// CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
// CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vsc = vec_vsrb(vsc, vuc);
// CHECK: shr <16 x i8>
// CHECK-LE: shr <16 x i8>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vuc = vec_vsrb(vuc, vuc);
// CHECK: shr <16 x i8>
// CHECK-LE: shr <16 x i8>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vs = vec_vsrh(vs, vus);
// CHECK: shr <8 x i16>
// CHECK-LE: shr <8 x i16>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
// CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
// CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vus = vec_vsrh(vus, vus);
// CHECK: shr <8 x i16>
// CHECK-LE: shr <8 x i16>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
// CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
// CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vi = vec_vsrw(vi, vui);
// CHECK: shr <4 x i32>
// CHECK-LE: shr <4 x i32>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
// CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
// CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

res_vui = vec_vsrw(vui, vui);
// CHECK: shr <4 x i32>
// CHECK-LE: shr <4 x i32>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
// CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
// CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

/* vec_sra */
res_vsc = vec_sra(vsc, vuc);
Expand Down
12 changes: 8 additions & 4 deletions clang/test/CodeGen/builtins-ppc-p8vector.c
Expand Up @@ -1066,13 +1066,17 @@ void test1() {

/* vec_sr */
res_vsll = vec_sr(vsll, vull);
// CHECK: lshr <2 x i64>
// CHECK-LE: lshr <2 x i64>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, <i64 64, i64 64>
// CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, <i64 64, i64 64>
// CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-PPC: error: call to 'vec_sr' is ambiguous

res_vull = vec_sr(vull, vull);
// CHECK: lshr <2 x i64>
// CHECK-LE: lshr <2 x i64>
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, <i64 64, i64 64>
// CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, <i64 64, i64 64>
// CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]]
// CHECK-PPC: error: call to 'vec_sr' is ambiguous

/* vec_sra */
Expand Down

0 comments on commit 71c35e1

Please sign in to comment.