diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index f3f8614f21ca0..ba865def5b48a 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -817,6 +817,8 @@ CUSTOM_BUILTIN(mma_lxvp, vsx_lxvp, "W256SLiW256C*", false) CUSTOM_BUILTIN(mma_stxvp, vsx_stxvp, "vW256SLiW256C*", false) CUSTOM_BUILTIN(mma_assemble_pair, vsx_assemble_pair, "vW256*VV", false) CUSTOM_BUILTIN(mma_disassemble_pair, vsx_disassemble_pair, "vv*W256*", false) +CUSTOM_BUILTIN(vsx_build_pair, vsx_assemble_pair, "vW256*VV", false) +CUSTOM_BUILTIN(mma_build_acc, mma_assemble_acc, "vW512*VVVV", false) // UNALIASED_CUSTOM_BUILTIN macro is used for built-ins that have // the same name as that of the intrinsic they generate, i.e. the diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 23ab2aee77914..cd08f5ffe6411 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15910,6 +15910,17 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } return Call; } + if (BuiltinID == PPC::BI__builtin_vsx_build_pair || + BuiltinID == PPC::BI__builtin_mma_build_acc) { + // Reverse the order of the operands for LE, so the + // same builtin call can be used on both LE and BE + // without the need for the programmer to swap operands. + // The operands are reversed starting from the second argument, + // the first operand is the pointer to the pair/accumulator + // that is being built. + if (getTarget().isLittleEndian()) + std::reverse(Ops.begin() + 1, Ops.end()); + } bool Accumulate; switch (BuiltinID) { #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ diff --git a/clang/test/CodeGen/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/builtins-ppc-build-pair-mma.c new file mode 100644 index 0000000000000..5f00e6a506f59 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-build-pair-mma.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE +// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE + +// CHECK-LE-LABEL: @test1( +// CHECK-LE-NEXT: entry: +// CHECK-LE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC4:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]]) +// CHECK-LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-LE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]] +// CHECK-LE-NEXT: ret void +// +// CHECK-BE-LABEL: @test1( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]], <16 x i8> [[VC3:%.*]], <16 x i8> [[VC4:%.*]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]] +// CHECK-BE-NEXT: ret void +// +void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vector unsigned char vc2, + vector unsigned char vc3, vector unsigned char vc4, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_quad res; + __builtin_mma_build_acc(&res, vc1, vc2, vc3, vc4); + *((__vector_quad *)resp) = res; +} + +// CHECK-LE-LABEL: @test2( +// CHECK-LE-NEXT: entry: +// CHECK-LE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC2:%.*]], <16 x i8> [[VC1:%.*]]) +// CHECK-LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-LE-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]] +// CHECK-LE-NEXT: ret void +// +// CHECK-BE-LABEL: @test2( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC1:%.*]], <16 x i8> [[VC2:%.*]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]] +// CHECK-BE-NEXT: ret void +// +void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, + vector unsigned char vc2, unsigned char *resp) { + __vector_quad vq = *((__vector_quad *)vqp); + __vector_pair vp = *((__vector_pair *)vpp); + __vector_pair res; + __builtin_vsx_build_pair(&res, vc1, vc2); + *((__vector_pair *)resp) = res; +} diff --git a/clang/test/Sema/ppc-pair-mma-types.c b/clang/test/Sema/ppc-pair-mma-types.c index f63a8e063f854..360d597dfaffd 100644 --- a/clang/test/Sema/ppc-pair-mma-types.c +++ b/clang/test/Sema/ppc-pair-mma-types.c @@ -249,6 +249,7 @@ void testVPLocal(int *ptr, vector unsigned char vc) { __vector_pair vp1 = *vpp; __vector_pair vp2; __builtin_vsx_assemble_pair(&vp2, vc, vc); + __builtin_vsx_build_pair(&vp2, vc, vc); __vector_pair vp3; __vector_quad vq; __builtin_mma_xvf64ger(&vq, vp3, vc); diff --git a/clang/test/SemaCXX/ppc-pair-mma-types.cpp b/clang/test/SemaCXX/ppc-pair-mma-types.cpp index 860c9d75ca7a0..085db9abd1def 100644 --- a/clang/test/SemaCXX/ppc-pair-mma-types.cpp +++ b/clang/test/SemaCXX/ppc-pair-mma-types.cpp @@ -370,6 +370,7 @@ void TestVPLambda() { return *vpp; // expected-error {{invalid use of PPC MMA type}} }; auto f3 = [](vector unsigned char vc) { __vector_pair vp; __builtin_vsx_assemble_pair(&vp, vc, vc); return vp; }; // expected-error {{invalid use of PPC MMA type}} + auto f4 = [](vector unsigned char vc) { __vector_pair vp; __builtin_vsx_build_pair(&vp, vc, vc); return vp; }; // expected-error {{invalid use of PPC MMA type}} } // cast