diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index cf8bdd2a429df..216b5fdb69ff7 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1001,6 +1001,10 @@ TARGET_BUILTIN(__builtin_darn_32, "i", "", "isa-v30-instructions") TARGET_BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "", "vsx") TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx") +// AMO builtins +TARGET_BUILTIN(__builtin_amo_lwat, "UiUi*UiIi", "", "isa-v30-instructions") +TARGET_BUILTIN(__builtin_amo_ldat, "ULiULi*ULiIi", "", "isa-v30-instructions") + // Set the floating point rounding mode BUILTIN(__builtin_setrnd, "di", "") diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 33fff7645df65..038859a513eb8 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -119,6 +119,7 @@ set(opencl_files set(ppc_files altivec.h + amo.h ) set(ppc_htm_files diff --git a/clang/lib/Headers/amo.h b/clang/lib/Headers/amo.h new file mode 100644 index 0000000000000..fda2984b97626 --- /dev/null +++ b/clang/lib/Headers/amo.h @@ -0,0 +1,97 @@ +/*===---- amo.h - PowerPC Atomic Memory Operations ------------------------===*\ + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * +\*===----------------------------------------------------------------------===*/ + +/* This header provides compatibility wrappers for GCC's AMO functions. + * The functions here call Clang's underlying AMO builtins. + */ + +#ifndef _AMO_H +#define _AMO_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* AMO Load Operation Codes (FC values) */ +enum { + _AMO_LD_ADD = 0x00, /* Fetch and Add */ + _AMO_LD_XOR = 0x01, /* Fetch and XOR */ + _AMO_LD_IOR = 0x02, /* Fetch and OR */ + _AMO_LD_AND = 0x03, /* Fetch and AND */ + _AMO_LD_UMAX = 0x04, /* Fetch and Maximum Unsigned */ + _AMO_LD_SMAX = 0x05, /* Fetch and Maximum Signed */ + _AMO_LD_UMIN = 0x06, /* Fetch and Minimum Unsigned */ + _AMO_LD_SMIN = 0x07, /* Fetch and Minimum Signed */ + _AMO_LD_SWAP = 0x08 /* Swap */ +}; + +/* 32-bit unsigned AMO load operations */ +static inline uint32_t amo_lwat_add(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_ADD); +} + +static inline uint32_t amo_lwat_xor(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_XOR); +} + +static inline uint32_t amo_lwat_ior(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_IOR); +} + +static inline uint32_t amo_lwat_and(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_AND); +} + +static inline uint32_t amo_lwat_umax(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_UMAX); +} + +static inline uint32_t amo_lwat_umin(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_UMIN); +} + +static inline uint32_t amo_lwat_swap(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_SWAP); +} + +/* 64-bit unsigned AMO load operations */ +static inline uint64_t amo_ldat_add(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_ADD); +} + +static inline uint64_t amo_ldat_xor(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_XOR); +} + +static inline uint64_t amo_ldat_ior(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_IOR); +} + +static inline uint64_t amo_ldat_and(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_AND); +} + +static inline uint64_t amo_ldat_umax(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_UMAX); +} + +static inline uint64_t amo_ldat_umin(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_UMIN); +} + +static inline uint64_t amo_ldat_swap(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_SWAP); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _AMO_H */ diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index bfa458d207b46..3d6615b6b5395 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -87,6 +87,8 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: + case PPC::BI__builtin_amo_lwat: + case PPC::BI__builtin_amo_ldat: return true; } return false; @@ -253,6 +255,19 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, case PPC::BI__builtin_##Name: \ return BuiltinPPCMMACall(TheCall, BuiltinID, Types); #include "clang/Basic/BuiltinsPPC.def" + case PPC::BI__builtin_amo_lwat: + case PPC::BI__builtin_amo_ldat: { + llvm::APSInt Result; + if (SemaRef.BuiltinConstantArg(TheCall, 2, Result)) + return true; + unsigned Val = Result.getZExtValue(); + static constexpr unsigned ValidFC[] = {0, 1, 2, 3, 4, 6, 8}; + if (llvm::is_contained(ValidFC, Val)) + return false; + Expr *Arg = TheCall->getArg(2); + return SemaRef.Diag(Arg->getBeginLoc(), diag::err_argument_invalid_range) + << toString(Result, 10) << "0-4, 6" << "8" << Arg->getSourceRange(); + } } llvm_unreachable("must return from switch"); } diff --git a/clang/test/CodeGen/PowerPC/builtins-amo-err.c b/clang/test/CodeGen/PowerPC/builtins-amo-err.c new file mode 100644 index 0000000000000..cdc14ef7f7e04 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/builtins-amo-err.c @@ -0,0 +1,18 @@ +// RUN: not %clang_cc1 -triple powerpc-ibm-aix -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=AIX32-ERROR +// RUN: not %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=FC-ERROR + +void test_amo() { + unsigned int *ptr1, value1; + // AIX32-ERROR: error: this builtin is only available on 64-bit targets + __builtin_amo_lwat(ptr1, value1, 0); + // FC-ERROR: argument value 9 is outside the valid range [0-4, 6, 8] + __builtin_amo_lwat(ptr1, value1, 9); + + unsigned long int *ptr2, value2; + // AIX32-ERROR: error: this builtin is only available on 64-bit targets + __builtin_amo_ldat(ptr2, value2, 3); + // FC-ERROR: error: argument value 26 is outside the valid range [0-4, 6, 8] + __builtin_amo_ldat(ptr2, value2, 26); +} diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c new file mode 100644 index 0000000000000..2975b99786869 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c @@ -0,0 +1,58 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AIX + +// CHECK-LABEL: define dso_local void @test_unsigned_lwat( +// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0) +// CHECK-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_unsigned_lwat( +// AIX-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0) +// AIX-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] +// AIX-NEXT: ret void +// +void test_unsigned_lwat(unsigned int *ptr, unsigned int value, unsigned int * resp) { + unsigned int res = __builtin_amo_lwat(ptr, value, 0); + *resp = res; +} + +// CHECK-LABEL: define dso_local void @test_unsigned_ldat( +// CHECK-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3) +// CHECK-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_unsigned_ldat( +// AIX-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3) +// AIX-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]] +// AIX-NEXT: ret void +// +void test_unsigned_ldat(unsigned long int *ptr, unsigned long int value, unsigned long int * resp) { + unsigned long int res = __builtin_amo_ldat(ptr, value, 3); + *resp = res; +} +//. +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"long", [[META4]], i64 0} +//. +// AIX: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// AIX: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// AIX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// AIX: [[META5]] = !{!"Simple C/C++ TBAA"} +// AIX: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// AIX: [[META7]] = !{!"long", [[META4]], i64 0} +//. diff --git a/clang/test/CodeGen/PowerPC/ppc-amo-header.c b/clang/test/CodeGen/PowerPC/ppc-amo-header.c new file mode 100644 index 0000000000000..f544cdef1e7d0 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/ppc-amo-header.c @@ -0,0 +1,91 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +#include + +uint32_t test_lwat_add(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_add + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 0) + return amo_lwat_add(ptr, val); +} + +uint32_t test_lwat_xor(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_xor + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 1) + return amo_lwat_xor(ptr, val); +} + +uint32_t test_lwat_ior(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_ior + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 2) + return amo_lwat_ior(ptr, val); +} + +uint32_t test_lwat_and(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_and + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 3) + return amo_lwat_and(ptr, val); +} + +uint32_t test_lwat_umax(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_umax + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 4) + return amo_lwat_umax(ptr, val); +} + +uint32_t test_lwat_umin(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_umin + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 6) + return amo_lwat_umin(ptr, val); +} + +uint32_t test_lwat_swap(uint32_t *ptr, uint32_t val) { + // CHECK-LABEL: @test_lwat_swap + // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 8) + return amo_lwat_swap(ptr, val); +} + +uint64_t test_ldat_add(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_add + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 0) + return amo_ldat_add(ptr, val); +} + +uint64_t test_ldat_xor(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_xor + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 1) + return amo_ldat_xor(ptr, val); +} + +uint64_t test_ldat_ior(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_ior + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 2) + return amo_ldat_ior(ptr, val); +} + +uint64_t test_ldat_and(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_and + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 3) + return amo_ldat_and(ptr, val); +} + +uint64_t test_ldat_umax(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_umax + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 4) + return amo_ldat_umax(ptr, val); +} + +uint64_t test_ldat_umin(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_umin + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 6) + return amo_ldat_umin(ptr, val); +} + +uint64_t test_ldat_swap(uint64_t *ptr, uint64_t val) { + // CHECK-LABEL: @test_ldat_swap + // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 8) + return amo_ldat_swap(ptr, val); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 3907e864bed1e..446113c4670dd 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -2139,3 +2139,15 @@ let TargetPrefix = "ppc" in { Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty], [IntrArgMemOnly, IntrWriteMem, NoCapture>]>; } + +// AMO intrisics +let TargetPrefix = "ppc" in { + def int_ppc_amo_lwat : ClangBuiltin<"__builtin_amo_lwat">, + DefaultAttrsIntrinsic<[llvm_i32_ty],[llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], + [IntrArgMemOnly, ImmArg>]>; + def int_ppc_amo_ldat : ClangBuiltin<"__builtin_amo_ldat">, + DefaultAttrsIntrinsic<[llvm_i64_ty],[llvm_ptr_ty, + llvm_i64_ty, llvm_i32_ty], + [IntrArgMemOnly, ImmArg>]>; +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index dd233e236e17f..ec5e61b724d87 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -677,6 +677,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); @@ -11633,6 +11634,29 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return Flags; } +SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntrinsicID = Op.getConstantOperandVal(1); + + SDLoc dl(Op); + switch (IntrinsicID) { + case Intrinsic::ppc_amo_lwat: + case Intrinsic::ppc_amo_ldat: + SDValue Ptr = Op.getOperand(2); + SDValue Val1 = Op.getOperand(3); + SDValue FC = Op.getOperand(4); + SDValue Ops[] = {Ptr, Val1, FC}; + bool IsLwat = IntrinsicID == Intrinsic::ppc_amo_lwat; + unsigned Opcode = IsLwat ? PPC::LWAT_PSEUDO : PPC::LDAT_PSEUDO; + MachineSDNode *MNode = DAG.getMachineNode( + Opcode, dl, {IsLwat ? MVT::i32 : MVT::i64, MVT::Other}, Ops); + SDValue Result = SDValue(MNode, 0); + SDValue OutChain = SDValue(MNode, 1); + return DAG.getMergeValues({Result, OutChain}, dl); + } + return SDValue(); +} + SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to @@ -12803,8 +12827,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFP_ROUND(Op, DAG); case ISD::ROTL: return LowerROTL(Op, DAG); - // For counter-based loop handling. - case ISD::INTRINSIC_W_CHAIN: return SDValue(); + // For counter-based loop handling, and amo load. + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::BITCAST: return LowerBITCAST(Op, DAG); @@ -14715,6 +14740,43 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0) .addImm(0) .addReg(Ptr); + } else if (MI.getOpcode() == PPC::LWAT_PSEUDO || + MI.getOpcode() == PPC::LDAT_PSEUDO) { + DebugLoc DL = MI.getDebugLoc(); + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + Register ValReg = MI.getOperand(2).getReg(); + unsigned FC = MI.getOperand(3).getImm(); + bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO; + Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass); + if (IsLwat) + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64) + .addImm(0) + .addReg(ValReg) + .addImm(PPC::sub_32); + else + Val64 = ValReg; + Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair); + Register PairWithVal = MRI.createVirtualRegister(&PPC::G8pRCRegClass); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), PairWithVal) + .addReg(Pair) + .addReg(Val64) + .addImm(PPC::sub_gp8_x1); + Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass); + BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult) + .addReg(PairWithVal) + .addReg(PtrReg) + .addImm(FC); + Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64) + .addReg(PairResult, 0, PPC::sub_gp8_x0); + if (IsLwat) + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(Result64, 0, PPC::sub_32); + else + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(Result64); } else { llvm_unreachable("Unexpected instr type to insert"); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index d967018982734..839f797e2dfed 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1312,6 +1312,7 @@ namespace llvm { EVT VT, SDValue V1, SDValue V2) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index fdca5ebc854ba..462535601e05e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -327,12 +327,19 @@ def LQARXL : XForm_1<31, 276, (outs g8prc:$RST), (ins (memrr $RA, $RB):$addr), "lqarx $RST, $addr, 1", IIC_LdStLQARX, []>, isPPC64, isRecordForm; -let hasExtraDefRegAllocReq = 1 in -def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$RST), (ins g8rc:$RA, u5imm:$RB), +let hasExtraDefRegAllocReq = 1, mayStore = 1 in +def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8prc:$RST), (ins g8prc:$RSTi, ptr_rc_nor0:$RA, u5imm:$RB), "ldat $RST, $RA, $RB", IIC_LdStLoad>, isPPC64, - Requires<[IsISA3_0]>; + Requires<[IsISA3_0]>, + RegConstraint<"$RSTi = $RST">; } +def LDAT_PSEUDO : PPCCustomInserterPseudo< + (outs g8rc:$dst), + (ins ptr_rc_nor0:$ptr, g8rc:$val, u5imm:$fc), + "#LDAT_PSEUDO", + []>; + let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$RST, (memrr $RA, $RB):$addr), "stdcx. $RST, $addr", IIC_LdStSTDCX, []>, isRecordForm; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index f3998113ddd52..3fddda2535673 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1863,14 +1863,21 @@ def LHARXL : XForm_1_memOp<31, 116, (outs gprc:$RST), (ins (memrr $RA, $RB):$ad def LWARXL : XForm_1_memOp<31, 20, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr), "lwarx $RST, $addr, 1", IIC_LdStLWARX, []>, isRecordForm; +} // The atomic instructions use the destination register as well as the next one // or two registers in order (modulo 31). -let hasExtraSrcRegAllocReq = 1 in -def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$RST), (ins gprc:$RA, u5imm:$RB), +let hasExtraSrcRegAllocReq = 1, mayLoad = 1, mayStore = 1 in +def LWAT : X_RD5_RS5_IM5<31, 582, (outs g8prc:$RST), (ins g8prc:$RSTi, ptr_rc_nor0:$RA, u5imm:$RB), "lwat $RST, $RA, $RB", IIC_LdStLoad>, - Requires<[IsISA3_0]>; -} + Requires<[IsISA3_0]>, + RegConstraint<"$RSTi = $RST">; + +def LWAT_PSEUDO : PPCCustomInserterPseudo< + (outs gprc:$dst), + (ins ptr_rc_nor0:$ptr, gprc:$val, u5imm:$fc), + "#LWAT_PSEUDO", + []>; let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr), diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 65d0484805b95..a5b02907db976 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -501,7 +501,7 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { // Similarly, we have an AltOrder for 64-bit ELF ABI which r2 is allocated // at last. def G8pRC : - RegisterClass<"PPC", [i128], 128, + RegisterClass<"PPC", [untyped], 128, (add (sequence "G8p%u", 1, 5), (sequence "G8p%u", 14, 7), G8p15, G8p6, G8p0)> { diff --git a/llvm/test/CodeGen/PowerPC/amo-enable.ll b/llvm/test/CodeGen/PowerPC/amo-enable.ll new file mode 100644 index 0000000000000..33739cca12492 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/amo-enable.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-BE + +define void @test_us_lwat(ptr noundef %ptr, i32 noundef %value, ptr nocapture %resp) { +; CHECK-LABEL: test_us_lwat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r4 +; CHECK-NEXT: lwat r6, r3, 0 +; CHECK-NEXT: stw r6, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_us_lwat: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r7, r4 +; CHECK-BE-NEXT: lwat r6, r3, 0 +; CHECK-BE-NEXT: stw r6, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.amo.lwat(ptr %ptr, i32 %value, i32 0) + store i32 %0, ptr %resp, align 4 + ret void +} + +define void @test_us_ldat(ptr noundef %ptr, i64 noundef %value, ptr nocapture %resp) { +; CHECK-LABEL: test_us_ldat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mr r7, r4 +; CHECK-NEXT: ldat r6, r3, 3 +; CHECK-NEXT: std r6, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_us_ldat: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mr r7, r4 +; CHECK-BE-NEXT: ldat r6, r3, 3 +; CHECK-BE-NEXT: std r6, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call i64 @llvm.ppc.amo.ldat(ptr %ptr, i64 %value, i32 3) + store i64 %0, ptr %resp, align 8 + ret void +} + +declare i64 @llvm.ppc.amo.ldat(ptr, i64, i32 immarg) +declare i32 @llvm.ppc.amo.lwat(ptr, i32, i32 immarg) +