Skip to content

Commit

Permalink
[PowerPC][altivec] Optimize codegen of vec_promote
Browse files Browse the repository at this point in the history
According to https://www.ibm.com/docs/en/xl-c-and-cpp-linux/16.1.1?topic=functions-vec-promote, elements not specified by the input index argument are undefined. So that we don't need to set these elements to be zeros.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D158487
  • Loading branch information
bzEq committed Aug 24, 2023
1 parent eff105b commit 1ceaec3
Show file tree
Hide file tree
Showing 3 changed files with 315 additions and 10 deletions.
39 changes: 29 additions & 10 deletions clang/lib/Headers/altivec.h
Original file line number Diff line number Diff line change
Expand Up @@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,

static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
int __b) {
vector signed char __res = (vector signed char)(0);
const vector signed char __zero = (vector signed char)0;
vector signed char __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0xf] = __a;
return __res;
}

static __inline__ vector unsigned char __ATTRS_o_ai
vec_promote(unsigned char __a, int __b) {
vector unsigned char __res = (vector unsigned char)(0);
const vector unsigned char __zero = (vector unsigned char)(0);
vector unsigned char __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0xf] = __a;
return __res;
}

static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
vector short __res = (vector short)(0);
const vector short __zero = (vector short)(0);
vector short __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}

static __inline__ vector unsigned short __ATTRS_o_ai
vec_promote(unsigned short __a, int __b) {
vector unsigned short __res = (vector unsigned short)(0);
const vector unsigned short __zero = (vector unsigned short)(0);
vector unsigned short __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1);
__res[__b & 0x7] = __a;
return __res;
}

static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
vector int __res = (vector int)(0);
const vector int __zero = (vector int)(0);
vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}

static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
int __b) {
vector unsigned int __res = (vector unsigned int)(0);
const vector unsigned int __zero = (vector unsigned int)(0);
vector unsigned int __res =
__builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}

static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
vector float __res = (vector float)(0);
const vector float __zero = (vector float)(0);
vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1);
__res[__b & 0x3] = __a;
return __res;
}

#ifdef __VSX__
static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) {
vector double __res = (vector double)(0);
const vector double __zero = (vector double)(0);
vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}

static __inline__ vector signed long long __ATTRS_o_ai
vec_promote(signed long long __a, int __b) {
vector signed long long __res = (vector signed long long)(0);
const vector signed long long __zero = (vector signed long long)(0);
vector signed long long __res =
__builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}

static __inline__ vector unsigned long long __ATTRS_o_ai
vec_promote(unsigned long long __a, int __b) {
vector unsigned long long __res = (vector unsigned long long)(0);
const vector unsigned long long __zero = (vector unsigned long long)(0);
vector unsigned long long __res =
__builtin_shufflevector(__zero, __zero, -1, -1);
__res[__b & 0x1] = __a;
return __res;
}
Expand Down
10 changes: 10 additions & 0 deletions clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
Original file line number Diff line number Diff line change
Expand Up @@ -2232,35 +2232,45 @@ res_vuc = vec_xxsldwi(vuc, vuc, 1);

res_vd = vec_promote(d, 0);
// CHECK: store <2 x double> zeroinitializer
// CHECK: store <2 x double> poison
// CHECK: insertelement <2 x double>
// CHECK-LE: store <2 x double> zeroinitializer
// CHECK-LE: store <2 x double> poison
// CHECK-LE: insertelement <2 x double>

res_vsll = vec_promote(sll, 0);
// CHECK: store <2 x i64> zeroinitializer
// CHECK: store <2 x i64> poison
// CHECK: insertelement <2 x i64>
// CHECK-LE: store <2 x i64> zeroinitializer
// CHECK-LE: store <2 x i64> poison
// CHECK-LE: insertelement <2 x i64>

res_vull = vec_promote(ull, 0);
// CHECK: store <2 x i64> zeroinitializer
// CHECK: store <2 x i64> poison
// CHECK: insertelement <2 x i64>
// CHECK-LE: store <2 x i64> zeroinitializer
// CHECK-LE: store <2 x i64> poison
// CHECK-LE: insertelement <2 x i64>

res_vsc = vec_promote(asc[0], 8);
// CHECK: store <16 x i8> zeroinitializer
// CHECK: store <16 x i8> poison
// CHECK: [[IDX:%.*]] = and i32 {{.*}}, 15
// CHECK: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
// CHECK-LE: store <16 x i8> zeroinitializer
// CHECK-LE: store <16 x i8> poison
// CHECK-LE: [[IDX:%.*]] = and i32 {{.*}}, 15
// CHECK-LE: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]

res_vuc = vec_promote(auc[0], 8);
// CHECK: store <16 x i8> zeroinitializer
// CHECK: store <16 x i8> poison
// CHECK: [[IDX:%.*]] = and i32 {{.*}}, 15
// CHECK: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
// CHECK-LE: store <16 x i8> zeroinitializer
// CHECK-LE: store <16 x i8> poison
// CHECK-LE: [[IDX:%.*]] = and i32 {{.*}}, 15
// CHECK-LE: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]]
}
Expand Down

0 comments on commit 1ceaec3

Please sign in to comment.