diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 44b5a24de89f1..4971631c50f41 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -14647,67 +14647,86 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b, static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a, int __b) { - vector signed char __res = (vector signed char)(0); + const vector signed char __zero = (vector signed char)0; + vector signed char __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0xf] = __a; return __res; } static __inline__ vector unsigned char __ATTRS_o_ai vec_promote(unsigned char __a, int __b) { - vector unsigned char __res = (vector unsigned char)(0); + const vector unsigned char __zero = (vector unsigned char)(0); + vector unsigned char __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0xf] = __a; return __res; } static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) { - vector short __res = (vector short)(0); + const vector short __zero = (vector short)(0); + vector short __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0x7] = __a; return __res; } static __inline__ vector unsigned short __ATTRS_o_ai vec_promote(unsigned short __a, int __b) { - vector unsigned short __res = (vector unsigned short)(0); + const vector unsigned short __zero = (vector unsigned short)(0); + vector unsigned short __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __res[__b & 0x7] = __a; return __res; } static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) { - vector int __res = (vector int)(0); + const vector int __zero = (vector int)(0); + vector int __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a, int __b) { - vector unsigned int __res = (vector unsigned int)(0); + const vector unsigned int __zero = (vector unsigned int)(0); + vector unsigned int __res = + __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) { - vector float __res = (vector float)(0); + const vector float __zero = (vector float)(0); + vector float __res = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); __res[__b & 0x3] = __a; return __res; } #ifdef __VSX__ static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) { - vector double __res = (vector double)(0); + const vector double __zero = (vector double)(0); + vector double __res = __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } static __inline__ vector signed long long __ATTRS_o_ai vec_promote(signed long long __a, int __b) { - vector signed long long __res = (vector signed long long)(0); + const vector signed long long __zero = (vector signed long long)(0); + vector signed long long __res = + __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } static __inline__ vector unsigned long long __ATTRS_o_ai vec_promote(unsigned long long __a, int __b) { - vector unsigned long long __res = (vector unsigned long long)(0); + const vector unsigned long long __zero = (vector unsigned long long)(0); + vector unsigned long long __res = + __builtin_shufflevector(__zero, __zero, -1, -1); __res[__b & 0x1] = __a; return __res; } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c index cca2a8b2f55bd..1fe56a820512d 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c @@ -2232,35 +2232,45 @@ res_vuc = vec_xxsldwi(vuc, vuc, 1); res_vd = vec_promote(d, 0); // CHECK: store <2 x double> zeroinitializer +// CHECK: store <2 x double> poison // CHECK: insertelement <2 x double> // CHECK-LE: store <2 x double> zeroinitializer +// CHECK-LE: store <2 x double> poison // CHECK-LE: insertelement <2 x double> res_vsll = vec_promote(sll, 0); // CHECK: store <2 x i64> zeroinitializer +// CHECK: store <2 x i64> poison // CHECK: insertelement <2 x i64> // CHECK-LE: store <2 x i64> zeroinitializer +// CHECK-LE: store <2 x i64> poison // CHECK-LE: insertelement <2 x i64> res_vull = vec_promote(ull, 0); // CHECK: store <2 x i64> zeroinitializer +// CHECK: store <2 x i64> poison // CHECK: insertelement <2 x i64> // CHECK-LE: store <2 x i64> zeroinitializer +// CHECK-LE: store <2 x i64> poison // CHECK-LE: insertelement <2 x i64> res_vsc = vec_promote(asc[0], 8); // CHECK: store <16 x i8> zeroinitializer +// CHECK: store <16 x i8> poison // CHECK: [[IDX:%.*]] = and i32 {{.*}}, 15 // CHECK: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]] // CHECK-LE: store <16 x i8> zeroinitializer +// CHECK-LE: store <16 x i8> poison // CHECK-LE: [[IDX:%.*]] = and i32 {{.*}}, 15 // CHECK-LE: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]] res_vuc = vec_promote(auc[0], 8); // CHECK: store <16 x i8> zeroinitializer +// CHECK: store <16 x i8> poison // CHECK: [[IDX:%.*]] = and i32 {{.*}}, 15 // CHECK: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]] // CHECK-LE: store <16 x i8> zeroinitializer +// CHECK-LE: store <16 x i8> poison // CHECK-LE: [[IDX:%.*]] = and i32 {{.*}}, 15 // CHECK-LE: insertelement <16 x i8> {{.*}}, i8 {{.*}}, i32 [[IDX]] } diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll new file mode 100644 index 0000000000000..1fbb0e8f4205e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll @@ -0,0 +1,276 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=powerpc64-unknown-unknown -verify-machineinstrs -mcpu=pwr8 \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs -mcpu=pwr8 \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-LE + +define noundef <2 x double> @vec_promote_double_zeroed(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_double_zeroed: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfd 0, 0(3) +; CHECK-BE-NEXT: xxlxor 1, 1, 1 +; CHECK-BE-NEXT: xxmrghd 34, 0, 1 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_double_zeroed: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lfd 0, 0(3) +; CHECK-LE-NEXT: xxlxor 1, 1, 1 +; CHECK-LE-NEXT: xxmrghd 34, 1, 0 +; CHECK-LE-NEXT: blr +entry: + %0 = load double, ptr %p, align 8 + %vecins.i = insertelement <2 x double> , double %0, i64 0 + ret <2 x double> %vecins.i +} + +define noundef <2 x double> @vec_promote_double(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_double: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvdsx 34, 0, 3 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_double: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lxvdsx 34, 0, 3 +; CHECK-LE-NEXT: blr +entry: + %0 = load double, ptr %p, align 8 + %vecins.i = insertelement <2 x double> poison, double %0, i64 0 + ret <2 x double> %vecins.i +} + +define noundef <4 x float> @vec_promote_float_zeroed(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_float_zeroed: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfs 1, 0(3) +; CHECK-BE-NEXT: xxlxor 0, 0, 0 +; CHECK-BE-NEXT: xxspltd 2, 0, 0 +; CHECK-BE-NEXT: xxmrghd 0, 1, 0 +; CHECK-BE-NEXT: xvcvdpsp 34, 2 +; CHECK-BE-NEXT: xvcvdpsp 35, 0 +; CHECK-BE-NEXT: vmrgew 2, 3, 2 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_float_zeroed: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lfs 1, 0(3) +; CHECK-LE-NEXT: xxlxor 0, 0, 0 +; CHECK-LE-NEXT: xxspltd 2, 0, 0 +; CHECK-LE-NEXT: xxmrghd 0, 0, 1 +; CHECK-LE-NEXT: xvcvdpsp 34, 2 +; CHECK-LE-NEXT: xvcvdpsp 35, 0 +; CHECK-LE-NEXT: vmrgew 2, 2, 3 +; CHECK-LE-NEXT: blr +entry: + %0 = load float, ptr %p, align 8 + %vecins.i = insertelement <4 x float> , float %0, i64 0 + ret <4 x float> %vecins.i +} + +define noundef <4 x float> @vec_promote_float(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_float: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfiwzx 0, 0, 3 +; CHECK-BE-NEXT: xxspltw 34, 0, 1 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_float: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lfiwzx 0, 0, 3 +; CHECK-LE-NEXT: xxspltw 34, 0, 1 +; CHECK-LE-NEXT: blr +entry: + %0 = load float, ptr %p, align 8 + %vecins.i = insertelement <4 x float> poison, float %0, i64 0 + ret <4 x float> %vecins.i +} + +define noundef <2 x i64> @vec_promote_long_long_zeroed(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_long_long_zeroed: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: ld 3, 0(3) +; CHECK-BE-NEXT: li 4, 0 +; CHECK-BE-NEXT: mtfprd 0, 4 +; CHECK-BE-NEXT: mtfprd 1, 3 +; CHECK-BE-NEXT: xxmrghd 34, 1, 0 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_long_long_zeroed: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: ld 3, 0(3) +; CHECK-LE-NEXT: li 4, 0 +; CHECK-LE-NEXT: mtfprd 0, 4 +; CHECK-LE-NEXT: mtfprd 1, 3 +; CHECK-LE-NEXT: xxmrghd 34, 0, 1 +; CHECK-LE-NEXT: blr +entry: + %0 = load i64, ptr %p, align 8 + %vecins.i = insertelement <2 x i64> , i64 %0, i64 0 + ret <2 x i64> %vecins.i +} + +define noundef <2 x i64> @vec_promote_long_long(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_long_long: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvdsx 34, 0, 3 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_long_long: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lxvdsx 34, 0, 3 +; CHECK-LE-NEXT: blr +entry: + %0 = load i64, ptr %p, align 8 + %vecins.i = insertelement <2 x i64> poison, i64 %0, i64 0 + ret <2 x i64> %vecins.i +} + +define noundef <4 x i32> @vec_promote_int_zeroed(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_int_zeroed: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lwz 3, 0(3) +; CHECK-BE-NEXT: li 4, 0 +; CHECK-BE-NEXT: li 5, 0 +; CHECK-BE-NEXT: rldimi 5, 5, 32, 0 +; CHECK-BE-NEXT: rldimi 4, 3, 32, 0 +; CHECK-BE-NEXT: mtfprd 1, 5 +; CHECK-BE-NEXT: mtfprd 0, 4 +; CHECK-BE-NEXT: xxmrghd 34, 0, 1 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_int_zeroed: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lwz 3, 0(3) +; CHECK-LE-NEXT: li 4, 0 +; CHECK-LE-NEXT: rldimi 3, 4, 32, 0 +; CHECK-LE-NEXT: rldimi 4, 4, 32, 0 +; CHECK-LE-NEXT: mtfprd 0, 3 +; CHECK-LE-NEXT: mtfprd 1, 4 +; CHECK-LE-NEXT: xxmrghd 34, 1, 0 +; CHECK-LE-NEXT: blr +entry: + %0 = load i32, ptr %p, align 4 + %vecins.i = insertelement <4 x i32> , i32 %0, i64 0 + ret <4 x i32> %vecins.i +} + +define noundef <4 x i32> @vec_promote_int(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_int: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfiwzx 0, 0, 3 +; CHECK-BE-NEXT: xxspltw 34, 0, 1 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_int: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lfiwzx 0, 0, 3 +; CHECK-LE-NEXT: xxspltw 34, 0, 1 +; CHECK-LE-NEXT: blr +entry: + %0 = load i32, ptr %p, align 4 + %vecins.i = insertelement <4 x i32> poison, i32 %0, i64 0 + ret <4 x i32> %vecins.i +} + +define noundef <8 x i16> @vec_promote_short_zeroed(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_short_zeroed: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 4, 2, .LCPI8_0@toc@ha +; CHECK-BE-NEXT: lhz 3, 0(3) +; CHECK-BE-NEXT: li 5, 0 +; CHECK-BE-NEXT: addi 4, 4, .LCPI8_0@toc@l +; CHECK-BE-NEXT: mtvsrwz 35, 5 +; CHECK-BE-NEXT: lxvw4x 34, 0, 4 +; CHECK-BE-NEXT: mtvsrwz 36, 3 +; CHECK-BE-NEXT: vperm 2, 4, 3, 2 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_short_zeroed: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha +; CHECK-LE-NEXT: lhz 3, 0(3) +; CHECK-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l +; CHECK-LE-NEXT: lxvd2x 0, 0, 4 +; CHECK-LE-NEXT: li 4, 0 +; CHECK-LE-NEXT: mtvsrd 36, 3 +; CHECK-LE-NEXT: mtvsrd 34, 4 +; CHECK-LE-NEXT: xxswapd 35, 0 +; CHECK-LE-NEXT: vperm 2, 2, 4, 3 +; CHECK-LE-NEXT: blr +entry: + %0 = load i16, ptr %p, align 2 + %vecins.i = insertelement <8 x i16> , i16 %0, i64 0 + ret <8 x i16> %vecins.i +} + +define noundef <8 x i16> @vec_promote_short(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_short: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lhzx 3, 0, 3 +; CHECK-BE-NEXT: mtvsrwz 34, 3 +; CHECK-BE-NEXT: vsplth 2, 2, 3 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_short: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lhzx 3, 0, 3 +; CHECK-LE-NEXT: mtvsrwz 34, 3 +; CHECK-LE-NEXT: vsplth 2, 2, 3 +; CHECK-LE-NEXT: blr +entry: + %0 = load i16, ptr %p, align 2 + %vecins.i = insertelement <8 x i16> poison, i16 %0, i64 0 + ret <8 x i16> %vecins.i +} + +define noundef <16 x i8> @vec_promote_char_zeroed(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_char_zeroed: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 4, 2, .LCPI10_0@toc@ha +; CHECK-BE-NEXT: lbz 3, 0(3) +; CHECK-BE-NEXT: li 5, 0 +; CHECK-BE-NEXT: addi 4, 4, .LCPI10_0@toc@l +; CHECK-BE-NEXT: mtvsrwz 35, 5 +; CHECK-BE-NEXT: lxvw4x 34, 0, 4 +; CHECK-BE-NEXT: mtvsrwz 36, 3 +; CHECK-BE-NEXT: vperm 2, 4, 3, 2 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_char_zeroed: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha +; CHECK-LE-NEXT: lbz 3, 0(3) +; CHECK-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l +; CHECK-LE-NEXT: lxvd2x 0, 0, 4 +; CHECK-LE-NEXT: li 4, 0 +; CHECK-LE-NEXT: mtvsrd 36, 3 +; CHECK-LE-NEXT: mtvsrd 34, 4 +; CHECK-LE-NEXT: xxswapd 35, 0 +; CHECK-LE-NEXT: vperm 2, 2, 4, 3 +; CHECK-LE-NEXT: blr +entry: + %0 = load i8, ptr %p, align 1 + %vecins.i = insertelement <16 x i8> , i8 %0, i64 0 + ret <16 x i8> %vecins.i +} + +define noundef <16 x i8> @vec_promote_char(ptr nocapture noundef readonly %p) { +; CHECK-BE-LABEL: vec_promote_char: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lbzx 3, 0, 3 +; CHECK-BE-NEXT: mtvsrwz 34, 3 +; CHECK-BE-NEXT: vspltb 2, 2, 7 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: vec_promote_char: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lbzx 3, 0, 3 +; CHECK-LE-NEXT: mtvsrwz 34, 3 +; CHECK-LE-NEXT: vspltb 2, 2, 7 +; CHECK-LE-NEXT: blr +entry: + %0 = load i8, ptr %p, align 1 + %vecins.i = insertelement <16 x i8> poison, i8 %0, i64 0 + ret <16 x i8> %vecins.i +}